From 54215c46fcb50922cfda570ea1ebea17a4aebbaa Mon Sep 17 00:00:00 2001
From: ZhouChen <zhouchen53@huawei.com>
Date: Tue, 21 Jan 2025 13:23:41 +0000
Subject: [PATCH] !2921 add option aicore_num Merge pull request !2921 from
 ZhouChen/ge_dev

---
 tf_adapter/interface_spec/api_npu_config.pyh  |  2 +-
 tf_adapter/interface_spec/api_npu_plugin.pyh  |  2 +-
 .../npu_bridge/estimator/npu/npu_config.py    |  5 ++-
 .../npu_bridge/estimator/npu/npu_estimator.py | 11 ++++++
 .../npu_bridge/estimator/npu/npu_plugin.py    |  5 ++-
 .../tests/st/util/testcase/ge_plugin_test.cc  |  8 ++++
 .../tests/st/util/testcase/npu_attrs_test.cc  | 38 +++++++++++++++++++
 .../tests/ut/util/testcase/ge_plugin_test.cc  |  8 ++++
 .../tests/ut/util/testcase/npu_attrs_test.cc  | 37 ++++++++++++++++++
 tf_adapter/util/ge_plugin.cc                  |  5 +++
 tf_adapter/util/npu_attrs.cc                  | 17 +++++++++
 .../npu_device/core/npu_wrapper.cpp           |  1 +
 .../python/npu_device/configs/npu_config.py   |  1 +
 .../tests/stub/include/stub/defines.h         |  2 +
 14 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh
index cafdb3f24..892230504 100644
--- a/tf_adapter/interface_spec/api_npu_config.pyh
+++ b/tf_adapter/interface_spec/api_npu_config.pyh
@@ -22,7 +22,7 @@ class NPURunConfig(run_config_lib.RunConfig):
                 event_sync_timeout=-1, external_weight=False, es_cluster_config=None, deterministic=0,
                 frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None,
                 ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None,
-                execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1):
+                execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None):
 
 class ProfilingConfig():
     def __init__(self, enable_profiling=False, profiling_options=None):
diff --git a/tf_adapter/interface_spec/api_npu_plugin.pyh b/tf_adapter/interface_spec/api_npu_plugin.pyh
index 03d2987b2..59d0ea0f8 100644
--- a/tf_adapter/interface_spec/api_npu_plugin.pyh
+++ b/tf_adapter/interface_spec/api_npu_plugin.pyh
@@ -6,6 +6,6 @@ def npu_resource_init(graph_run_mode=1, op_debug_level=0, enable_profiling=False
                       enable_exception_dump=2, aoe_mode=None, work_path=None,
                       op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None,
                       hcom_multi_mode=False, distribute_config=None, aoe_config_file=None,
-                      precision_mode_v2=None, export_compile_stat=1):
+                      precision_mode_v2=None, export_compile_stat=1, aicore_num=None):
 
 def npu_resource_shutdown():
diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
index a1789fe25..aa678d02a 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
@@ -117,7 +117,8 @@ class NPURunConfig(run_config_lib.RunConfig):
                  compile_dynamic_mode=None,
                  execute_times=-1,
                  graph_max_parallel_model_num=1,
-                 export_compile_stat=1
+                 export_compile_stat=1,
+                 aicore_num=None
                  ):
         """
         Constructs a NPUConfig.
@@ -186,6 +187,7 @@ class NPURunConfig(run_config_lib.RunConfig):
         input_fusion_size: Merge input memory less than input_fusion_size, defualt 25600B, max size: 32M, min size: 0M
         precision_mode_v2: default is: ''.
         export_compile_stat: configure statistics of the graph compiler, 0: Not Generate; 1: Generated when the program
+        aicore_num: default is: ''.
         exits (default); 2: Generated when graph compilation complete.
         """
 
@@ -286,6 +288,7 @@ class NPURunConfig(run_config_lib.RunConfig):
         self._graph_max_parallel_model_num = graph_max_parallel_model_num
         self.execute_times = execute_times
         self._export_compile_stat = export_compile_stat
+        self._aicore_num = aicore_num
         super(NPURunConfig, self).__init__(
             model_dir=model_dir,
             tf_random_seed=tf_random_seed,
diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py
index 7b372f621..738eba1ea 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py
@@ -720,6 +720,15 @@ class NPUEstimator(estimator_lib.Estimator):
         if config._export_compile_stat is not None:
             custom_op.parameter_map["export_compile_stat"].i = config._export_compile_stat
 
+    def __load_aicore_num(self, config, custom_op):
+        """Load aicore_num config, and add to custom_optimizers
+        Args:
+            config: NPURunConfig.
+            custom_op: Customer optimizers.
+        """
+        if config._aicore_num is not None:
+            custom_op.parameter_map["aicore_num"].s = tf.compat.as_bytes(config._aicore_num)
+
     def __load_graph_optimizers(self, config):
         """
         Change the session config and load the graph optimizers:
@@ -860,6 +869,8 @@ class NPUEstimator(estimator_lib.Estimator):
 
         self.__load_export_compile_stat(config, custom_op)
 
+        self.__load_aicore_num(config, custom_op)
+
         return config
 
     def __load_job_info(self, job_start_file):
diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py
index ffbfd7df0..fa4e69f0d 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py
@@ -73,7 +73,8 @@ def npu_resource_init(graph_run_mode=1,
                       distribute_config=None,
                       aoe_config_file=None,
                       precision_mode_v2=None,
-                      export_compile_stat=1):
+                      export_compile_stat=1,
+                      aicore_num=None):
     """Initialize NPU resource"""
     util.check_nonnegative_integer(graph_run_mode, "graph_run_mode")
     check_graph_run_mode(graph_run_mode)
@@ -121,6 +122,8 @@ def npu_resource_init(graph_run_mode=1,
     init["ge.aoe_config_file"] = str(aoe_config_file)
     if export_compile_stat is not None:
         init["ge.exportCompileStat"] = str(export_compile_stat)
+    if aicore_num is not None:
+        init["ge.aicoreNum"] = str(aicore_num)
 
     init_options = tf_adapter.map_string_string(init)
     tf_adapter.PluginInit(init_options)
diff --git a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc
index 3e2a4594b..10f0f4085 100644
--- a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc
+++ b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc
@@ -205,5 +205,13 @@ TEST_F(GePluginTest, PluginInitTest_export_compile_stat) {
   ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty());
   NpuClose();
 }
+
+TEST_F(GePluginTest, PluginInitTest_aicore_num) {
+  std::map<std::string, std::string> init_options;
+  init_options["ge.aicoreNum"] = "2|2";
+  PluginInit(init_options);
+  ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty());
+  NpuClose();
+}
 }
 } // end tensorflow
diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc
index 4882fb300..dea78f7eb 100644
--- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc
+++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc
@@ -371,5 +371,43 @@ TEST_F(NpuAttrTest, GetAllAttrOptions_export_compile_stat) {
   const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs);
   EXPECT_NE(all_options.find("export_compile_stat"), all_options.cend());
 }
+
+TEST_F(NpuAttrTest, SetNpuOptimizerAttr_aicore_num) {
+  GraphOptimizationPassOptions options;
+  SessionOptions session_options;
+  session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true);
+  auto *custom_config =
+      session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers();
+  custom_config->set_name("NpuOptimizer");
+  options.session_options = &session_options;
+
+  AttrValue aicore_num = AttrValue();
+  aicore_num.set_s("2|2");
+  (*custom_config->mutable_parameter_map())["aicore_num"] = aicore_num;
+
+  AttrValue jit_compile = AttrValue();
+  jit_compile.set_s("2");
+  (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile;
+
+  Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast<Node *>(1));
+  EXPECT_EQ(s.ok(), false);
+}
+
+TEST_F(NpuAttrTest, GetAllAttrOptions_aicore_num) {
+  AttrValueMap attr_map;
+
+  AttrValue npu_optimizer = AttrValue();
+  npu_optimizer.set_s("NpuOptimizer");
+  attr_map["_NpuOptimizer"] = npu_optimizer;
+
+  AttrValue aicore_num = AttrValue();
+  aicore_num.set_s("2|2");
+  attr_map["_aicore_num"] = aicore_num;
+
+  AttrSlice attrs(&attr_map);
+  const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs);
+  EXPECT_NE(all_options.find("aicore_num"), all_options.cend());
+}
+
 }
 } // end tensorflow
diff --git a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc
index 3a130f1d3..f39fe3afc 100644
--- a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc
+++ b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc
@@ -195,5 +195,13 @@ TEST_F(GePluginTest, PluginInitTest_export_compile_stat) {
   ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty());
   NpuClose();
 }
+
+TEST_F(GePluginTest, PluginInitTest_aicore_num) {
+  std::map<std::string, std::string> init_options;
+  init_options["ge.aicoreNum"] = "2|2";
+  PluginInit(init_options);
+  ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty());
+  NpuClose();
+}
 }
 } // end tensorflow
diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc
index 0a132fd17..ede01ab83 100644
--- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc
+++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc
@@ -511,5 +511,42 @@ TEST_F(NpuAttrTest, GetAllAttrOptions_export_compile_stat) {
   const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs);
   EXPECT_NE(all_options.find("export_compile_stat"), all_options.cend());
 }
+
+TEST_F(NpuAttrTest, SetNpuOptimizerAttr_aicore_num) {
+  GraphOptimizationPassOptions options;
+  SessionOptions session_options;
+  session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true);
+  auto *custom_config =
+      session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers();
+  custom_config->set_name("NpuOptimizer");
+  options.session_options = &session_options;
+
+  AttrValue aicore_num = AttrValue();
+  aicore_num.set_s("2|2");
+  (*custom_config->mutable_parameter_map())["aicore_num"] = aicore_num;
+
+  AttrValue jit_compile = AttrValue();
+  jit_compile.set_s("2");
+  (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile;
+  Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast<Node *>(1));
+  EXPECT_EQ(s.ok(), false);
+}
+
+TEST_F(NpuAttrTest, GetAllAttrOptions_aicore_num) {
+  AttrValueMap attr_map;
+
+  AttrValue npu_optimizer = AttrValue();
+  npu_optimizer.set_s("NpuOptimizer");
+  attr_map["_NpuOptimizer"] = npu_optimizer;
+
+  AttrValue aicore_num = AttrValue();
+  aicore_num.set_s("2|2");
+  attr_map["_aicore_num"] = aicore_num;
+
+  AttrSlice attrs(&attr_map);
+  const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs);
+  EXPECT_NE(all_options.find("aicore_num"), all_options.cend());
+}
+
 }
 } // end tensorflow
diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc
index 443743f51..29dca4467 100644
--- a/tf_adapter/util/ge_plugin.cc
+++ b/tf_adapter/util/ge_plugin.cc
@@ -294,6 +294,11 @@ void GePlugin::Init(std::map<std::string, std::string> &init_options, const bool
   if (init_options.find("ge.exportCompileStat") != init_options.end()) {
     ADP_LOG(INFO) << "[GePlugin] export_compile_stat : " << init_options["ge.exportCompileStat"];
   }
+
+  if (init_options.find("ge.aicoreNum") != init_options.end()) {
+    ADP_LOG(INFO) << "[GePlugin] aicoreNum : " << init_options["ge.aicoreNum"];
+  }
+
   bool tdt_uninit_env = false;
   (void) ReadBoolFromEnvVar("ASCEND_TDT_UNINIT", false, &tdt_uninit_env);
   if (!kIsHeterogeneous && !tdt_uninit_env) {
diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc
index bdffb42a2..b3ad2442c 100644
--- a/tf_adapter/util/npu_attrs.cc
+++ b/tf_adapter/util/npu_attrs.cc
@@ -689,6 +689,7 @@ std::map<std::string, std::string> NpuAttrs::GetInitOptions(const OpKernelConstr
   std::string es_cluster_config;
   std::string execute_times = "-1";
   std::string export_compile_stat;
+  std::string aicore_num;
 
   if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) {
     (void) ctx->GetAttr("_precision_mode", &precision_mode);
@@ -731,6 +732,7 @@ std::map<std::string, std::string> NpuAttrs::GetInitOptions(const OpKernelConstr
     (void) ctx->GetAttr("_es_cluster_config", &es_cluster_config);
     (void) ctx->GetAttr("_execute_times", &execute_times);
     (void) ctx->GetAttr("_export_compile_stat", &export_compile_stat);
+    (void) ctx->GetAttr("_aicore_num", &aicore_num);
   }
 
   std::lock_guard<std::mutex> lock(mutex_);
@@ -789,6 +791,8 @@ std::map<std::string, std::string> NpuAttrs::GetInitOptions(const OpKernelConstr
     init_options_["export_compile_stat"] = export_compile_stat;
     init_options_["ge.exportCompileStat"] = export_compile_stat;
   }
+  init_options_["aicore_num"] = aicore_num;
+  init_options_["ge.aicoreNum"] = aicore_num;
   return init_options_;
 }
 
@@ -1213,6 +1217,7 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
   std::string compile_dynamic_mode;
   std::string execute_times = "-1";
   std::string export_compile_stat;
+  std::string aicore_num;
 
   auto NpuOptimizer_value = attrs.Find("_NpuOptimizer");
   auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc");
@@ -1310,6 +1315,7 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
   auto compile_dynamic_mode_value = attrs.Find("_compile_dynamic_mode");
   auto execute_times_value = attrs.Find("_execute_times");
   auto export_compile_stat_value = attrs.Find("_export_compile_stat");
+  auto aicore_num_value = attrs.Find("_aicore_num");
   if (NpuOptimizer_value != nullptr) {
     do_npu_optimizer = "1";
     if (enable_data_pre_proc_value != nullptr) {
@@ -1622,6 +1628,9 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
     if (export_compile_stat_value != nullptr) {
       export_compile_stat = export_compile_stat_value->s();
     }
+    if (aicore_num_value != nullptr) {
+      aicore_num = aicore_num_value->s();
+    }
   }
 
   all_options["variable_format_optimize"] = variable_format_optimize;
@@ -1738,6 +1747,8 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
     all_options["export_compile_stat"] = export_compile_stat;
     all_options["ge.exportCompileStat"] = export_compile_stat;
   }
+  all_options["aicore_num"] = aicore_num;
+  all_options["ge.aicoreNum"] = aicore_num;
   return all_options;
 }
 
@@ -1862,6 +1873,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
   std::string accelerate_train_mode;
   int32_t execute_times = -1;
   int32_t export_compile_stat = 1;
+  std::string  aicore_num;
 
   const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options();
   for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) {
@@ -2418,6 +2430,11 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
         init_options_["export_compile_stat"] = std::to_string(export_compile_stat);
         init_options_["ge.exportCompileStat"] = std::to_string(export_compile_stat);
       }
+      if ((params.count("aicore_num") > 0)) {
+        aicore_num = params.at("aicore_num").s();
+        init_options_["aicore_num"] = aicore_num;
+        init_options_["ge.aicoreNum"] = aicore_num;
+      }
       if (params.count("jit_compile") > 0) {
         const static std::vector<std::string> kJitCompileList = {"true",
                                                                  "false",
diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
index 73dec5535..c6dd9bb34 100644
--- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
+++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
@@ -94,6 +94,7 @@ const std::map<std::string, std::string> kGlobalConfigOptions = {
   {"event_sync_timeout", "event_sync_timeout"},
   {"execute_times", "execute_times"},
   {"export_compile_stat", "ge.exportCompileStat"},
+  {"aicore_num", "ge.aicoreNum"},
   // private options
   {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID},
   {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE},
diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py
index 21abadeca..cab34f04a 100644
--- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py
+++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py
@@ -80,5 +80,6 @@ class NpuConfig(NpuBaseConfig):
                                              ['fp16', 'origin', 'cube_fp16in_fp32out', 'mixed_float16',
                                               'mixed_bfloat16', 'cube_hif8', 'mixed_hif8'])
         self.export_compile_stat = OptionValue(1, [0, 1, 2])
+        self.aicore_num = OptionValue(None, None)
 
         super(NpuConfig, self).__init__()
diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h
index 2cbb5507f..fe6676d70 100644
--- a/tf_adapter_2.x/tests/stub/include/stub/defines.h
+++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h
@@ -310,6 +310,8 @@ const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode";
 
 const char *const OPTION_EXPORT_COMPILE_STAT = "ge.exportCompileStat";
 
+const char *const OPTION_AICORE_NUM = "ge.aicoreNum";
+
 // Graph run mode
 enum GraphRunMode { PREDICTION = 0, TRAIN };
 // Topo sorting mode
-- 
Gitee