From 7678b6d4118fa1df0480833288453231d5c15fa7 Mon Sep 17 00:00:00 2001
From: qiyuxuan11 <qiyuxuan1@huawei.com>
Date: Wed, 6 Mar 2024 14:38:21 +0800
Subject: [PATCH] Revert "add option quant_dumpable"

This reverts commit 82500e1a62997df38e5979b1221e91b0f27975fb.
---
 tf_adapter/interface_spec/api_npu_config.pyh       |  2 +-
 .../python/npu_bridge/estimator/npu/npu_config.py  |  5 +----
 .../npu_bridge/estimator/npu/npu_estimator.py      | 11 -----------
 .../st/optimizers/pbtxt/om_test_build_geop.pbtxt   |  6 ------
 .../ut/optimizers/pbtxt/om_test_build_geop.pbtxt   |  6 ------
 tf_adapter/util/ge_plugin.cc                       |  1 -
 tf_adapter/util/npu_attrs.cc                       | 14 --------------
 tf_adapter/util/session_manager.cc                 |  2 --
 tf_adapter_2.x/npu_device/core/npu_wrapper.cpp     |  1 -
 .../python/npu_device/configs/npu_config.py        |  1 -
 tf_adapter_2.x/tests/stub/include/stub/defines.h   |  4 ----
 11 files changed, 2 insertions(+), 51 deletions(-)

diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh
index beb26c1cf..eabc8defe 100644
--- a/tf_adapter/interface_spec/api_npu_config.pyh
+++ b/tf_adapter/interface_spec/api_npu_config.pyh
@@ -21,7 +21,7 @@ class NPURunConfig(run_config_lib.RunConfig):
                 topo_sorting_mode=None, aoe_config_file=None, insert_op_file=None, stream_sync_timeout=-1,
                 event_sync_timeout=-1, external_weight=False, es_cluster_config=None, deterministic=0,
                 frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None,
-                ac_parallel_enable=None, quant_dumpable=None):
+                ac_parallel_enable=None):
 
 class ProfilingConfig():
     def __init__(self, enable_profiling=False, profiling_options=None):
diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
index 743421502..f672d6f33 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py
@@ -111,8 +111,7 @@ class NPURunConfig(run_config_lib.RunConfig):
                  variable_placement="Device",
                  jit_compile="auto",
                  precision_mode_v2=None,
-                 ac_parallel_enable=None,
-                 quant_dumpable=None
+                 ac_parallel_enable=None
                  ):
         """
         Constructs a NPUConfig.
@@ -147,7 +146,6 @@ class NPURunConfig(run_config_lib.RunConfig):
         stream_max_parallel_num: Specify the degree of parallelism of the AICPU / AICORE engine
                                  to achieve parallel execution between AICPU / AICORE operators.
         ac_parallel_enable: Enable engines such as Aicpu to parallel with other engines in dynamic shape graphs.
-        quant_dumpable: Ensure that the input and output of quant nodes can be dumped.
         op_select_implmode: Selecting whether the operator is implemented with high_precision
                             or high_performance or high_precision_for_all or high_performance_for_all.
         optypelist_for_implmode: Operator list.
@@ -216,7 +214,6 @@ class NPURunConfig(run_config_lib.RunConfig):
         self._dump_config = self._get_dump_config(dump_config)
         self._stream_max_parallel_num = stream_max_parallel_num
         self._ac_parallel_enable = ac_parallel_enable
-        self._quant_dumpable = quant_dumpable
 
         self.horovod_mode = self._get_horovod_mode(horovod_mode)
         util.check_nonnegative_integer(graph_run_mode, "graph_run_mode")
diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py
index b959ec481..c32e72bfa 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py
@@ -640,15 +640,6 @@ class NPUEstimator(estimator_lib.Estimator):
         if config._ac_parallel_enable is not None:
             custom_op.parameter_map["ac_parallel_enable"].s = tf.compat.as_bytes(config._ac_parallel_enable)
 
-    def __load_quant_dumpable_config(self, config, custom_op):
-        """Load quant_dumpable config, and add to custom_optimizers
-        Args:
-            config: NPURunConfig.
-            custom_op: Customer optimizers.
-        """
-        if config._quant_dumpable is not None:
-            custom_op.parameter_map["quant_dumpable"].s = tf.compat.as_bytes(config._quant_dumpable)
-
     def __load_ps_mode_config(self, custom_op):
         """Load stream_max_parallel_num config ,and add to custom_optimizers
         Args:
@@ -829,8 +820,6 @@ class NPUEstimator(estimator_lib.Estimator):
 
         self.__load_ac_parallel_enable_config(config, custom_op)
 
-        self.__load_quant_dumpable_config(config, custom_op)
-
         self.__load_ps_mode_config(custom_op)
 
         self._load_op_performance_config(config, custom_op)
diff --git a/tf_adapter/tests/st/optimizers/pbtxt/om_test_build_geop.pbtxt b/tf_adapter/tests/st/optimizers/pbtxt/om_test_build_geop.pbtxt
index 9c859bada..3af7fa793 100644
--- a/tf_adapter/tests/st/optimizers/pbtxt/om_test_build_geop.pbtxt
+++ b/tf_adapter/tests/st/optimizers/pbtxt/om_test_build_geop.pbtxt
@@ -134,12 +134,6 @@ node {
       s: "0"
     }
   }
-  attr {
-    key: "_quant_dumpable"
-    value {
-      s: "0"
-    }
-  }
   attr {
     key: "_is_tailing_optimization"
     value {
diff --git a/tf_adapter/tests/ut/optimizers/pbtxt/om_test_build_geop.pbtxt b/tf_adapter/tests/ut/optimizers/pbtxt/om_test_build_geop.pbtxt
index 9c859bada..3af7fa793 100644
--- a/tf_adapter/tests/ut/optimizers/pbtxt/om_test_build_geop.pbtxt
+++ b/tf_adapter/tests/ut/optimizers/pbtxt/om_test_build_geop.pbtxt
@@ -134,12 +134,6 @@ node {
       s: "0"
     }
   }
-  attr {
-    key: "_quant_dumpable"
-    value {
-      s: "0"
-    }
-  }
   attr {
     key: "_is_tailing_optimization"
     value {
diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc
index 3cde8932b..3699ea49b 100644
--- a/tf_adapter/util/ge_plugin.cc
+++ b/tf_adapter/util/ge_plugin.cc
@@ -75,7 +75,6 @@ void SetOptionNameMap(json &option_name_map) {
   option_name_map.emplace(ge::OP_COMPILER_CACHE_DIR, "op_compiler_cache_dir");
   option_name_map.emplace(ge::STREAM_MAX_PARALLEL_NUM, "stream_max_parallel_num");
   option_name_map.emplace(ge::AC_PARALLEL_ENABLE, "ac_parallel_enable");
-  option_name_map.emplace(ge::QUANT_DUMPABLE, "quant_dumpable");
   option_name_map.emplace(ge::HCOM_PARALLEL, "hcom_parallel");
   option_name_map.emplace(ge::HCOM_MULTI_MODE, "hcom_multi_mode");
   option_name_map.emplace(ge::OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION, "is_tailing_optimization");
diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc
index 2b8439470..099abf40c 100644
--- a/tf_adapter/util/npu_attrs.cc
+++ b/tf_adapter/util/npu_attrs.cc
@@ -457,7 +457,6 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(const OpKernelConstr
   std::string dump_layer;
   std::string stream_max_parallel_num;
   std::string ac_parallel_enable;
-  std::string quant_dumpable;
   std::string npuOptimizer;
   std::string is_tailing_optimization = "0";
   std::string op_select_implmode;
@@ -524,7 +523,6 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(const OpKernelConstr
     }
     (void) ctx->GetAttr("_stream_max_parallel_num", &stream_max_parallel_num);
     (void) ctx->GetAttr("_ac_parallel_enable", &ac_parallel_enable);
-    (void) ctx->GetAttr("_quant_dumpable", &quant_dumpable);
     (void) ctx->GetAttr("_is_tailing_optimization", &is_tailing_optimization);
     (void) ctx->GetAttr("_op_select_implmode", &op_select_implmode);
     (void) ctx->GetAttr("_optypelist_for_implmode", &optypelist_for_implmode);
@@ -567,7 +565,6 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(const OpKernelConstr
   sess_options[ge::HCOM_PARALLEL] = hcom_parallel;
   sess_options[ge::STREAM_MAX_PARALLEL_NUM] = stream_max_parallel_num;
   sess_options[ge::AC_PARALLEL_ENABLE] = ac_parallel_enable;
-  sess_options[ge::QUANT_DUMPABLE] = quant_dumpable;
   if (!graph_memory_max_size.empty()) {
     sess_options[ge::GRAPH_MEMORY_MAX_SIZE] = graph_memory_max_size;
   }
@@ -1125,7 +1122,6 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
   std::string dump_layer;
   std::string stream_max_parallel_num;
   std::string ac_parallel_enable;
-  std::string quant_dumpable;
   std::string soc_config;
 
   std::string is_tailing_optimization = "0";
@@ -1216,7 +1212,6 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
   auto dump_debug_mode_value = attrs.Find("_dump_debug_mode");
   auto stream_max_parallel_num_value = attrs.Find("_stream_max_parallel_num");
   auto ac_parallel_enable_value = attrs.Find("_ac_parallel_enable");
-  auto quant_dumpable_value = attrs.Find("_quant_dumpable");
   auto soc_config_value = attrs.Find("_soc_config");
   auto graph_slice_value = attrs.Find("_graph_slice");
 
@@ -1381,9 +1376,6 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
     if (ac_parallel_enable_value != nullptr) {
       ac_parallel_enable = ac_parallel_enable_value->s();
     }
-    if (quant_dumpable_value != nullptr) {
-      quant_dumpable = quant_dumpable_value->s();
-    }
     if (graph_slice_value != nullptr) {
       graph_slice_mode = graph_slice_value->s();
     }
@@ -1581,7 +1573,6 @@ std::map<std::string, std::string> NpuAttrs::GetAllAttrOptions(const AttrSlice &
   all_options["hcom_parallel"] = hcom_parallel;
   all_options["stream_max_parallel_num"] = stream_max_parallel_num;
   all_options["ac_parallel_enable"] = ac_parallel_enable;
-  all_options["quant_dumpable"] = quant_dumpable;
   if (!graph_memory_max_size.empty()) {
     all_options["graph_memory_max_size"] = graph_memory_max_size;
   }
@@ -1722,7 +1713,6 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
   std::string dump_layer;
   std::string stream_max_parallel_num;
   std::string ac_parallel_enable;
-  std::string quant_dumpable;
   std::string soc_config;
   std::string hccl_timeout;
   std::string HCCL_algorithm;
@@ -1875,9 +1865,6 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
       if (params.count("ac_parallel_enable") > 0) {
         ac_parallel_enable = params.at("ac_parallel_enable").s();
       }
-      if (params.count("quant_dumpable") > 0) {
-        quant_dumpable = params.at("quant_dumpable").s();
-      }
 
       if (params.count("is_tailing_optimization") > 0) {
         is_tailing_optimization = params.at("is_tailing_optimization").b();
@@ -2352,7 +2339,6 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options
   sess_options["hcom_parallel"] = std::to_string(static_cast<int32_t>(hcom_parallel));
   sess_options["stream_max_parallel_num"] = stream_max_parallel_num;
   sess_options["ac_parallel_enable"] = ac_parallel_enable;
-  sess_options["quant_dumpable"] = quant_dumpable;
   if (!graph_memory_max_size.empty()) {
     sess_options["graph_memory_max_size"] = graph_memory_max_size;
   }
diff --git a/tf_adapter/util/session_manager.cc b/tf_adapter/util/session_manager.cc
index f4d504592..465b8d99e 100644
--- a/tf_adapter/util/session_manager.cc
+++ b/tf_adapter/util/session_manager.cc
@@ -111,8 +111,6 @@ void SessionManager::PrintGeSessionOptions(std::map<std::string, std::string> &s
   ADP_LOG(INFO) << "[GEOP] stream_max_parallel_num :" << sess_options[ge::STREAM_MAX_PARALLEL_NUM];
   // ac parallel enable
   ADP_LOG(INFO) << "[GEOP] ac_parallel_enable :" << sess_options[ge::AC_PARALLEL_ENABLE];
-  // quant dumpable
-  ADP_LOG(INFO) << "[GEOP] quant_dumpable :" << sess_options[ge::QUANT_DUMPABLE];
 
   // graph memory configuration
   if (!sess_options[ge::GRAPH_MEMORY_MAX_SIZE].empty()) {
diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
index cd94f4c51..698e05855 100644
--- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
+++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp
@@ -74,7 +74,6 @@ const std::map<std::string, std::string> kConfigurableOptions = {
   {"op_compiler_cache_dir", ge::OP_COMPILER_CACHE_DIR},
   {"stream_max_parallel_num", ge::STREAM_MAX_PARALLEL_NUM},
   {"ac_parallel_enable", ge::AC_PARALLEL_ENABLE},
-  {"quant_dumpable", ge::QUANT_DUMPABLE},
   {"hcom_parallel", ge::HCOM_PARALLEL},
   {"hcom_multi_mode", ge::HCOM_MULTI_MODE},
   {"is_tailing_optimization", ge::OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION},
diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py
index fefb60daa..11522a749 100644
--- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py
+++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py
@@ -46,7 +46,6 @@ class NpuConfig(NpuBaseConfig):
         self.op_compiler_cache_dir = OptionValue(None, None)
         self.stream_max_parallel_num = OptionValue(None, None)
         self.ac_parallel_enable = OptionValue(None, ['0', '1'])
-        self.quant_dumpable = OptionValue(None, ['0', '1'])
         self.hcom_parallel = OptionValue(True, [True, False])
         self.hcom_multi_mode = OptionValue(None, None)
         self.is_tailing_optimization = OptionValue(False, [True, False])
diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h
index 047e4295e..78f64f83b 100644
--- a/tf_adapter_2.x/tests/stub/include/stub/defines.h
+++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h
@@ -173,10 +173,6 @@ const std::string STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum";
 // its value should be "0" or "1", default value is "0"
 const std::string AC_PARALLEL_ENABLE = "ac_parallel_enable";
 
-// Configure whether to ensure that the input and output of quant nodes can be dumped.
-// its value should be "0" or "1", default value is "0"
-const std::string QUANT_DUMPABLE = "quant_dumpable";
-
 // congigure outputDatatype to setting net output type
 const std::string OUTPUT_DATATYPE = "ge.outputDatatype";
 
-- 
Gitee