From 7678b6d4118fa1df0480833288453231d5c15fa7 Mon Sep 17 00:00:00 2001 From: qiyuxuan11 Date: Wed, 6 Mar 2024 14:38:21 +0800 Subject: [PATCH] Revert "add option quant_dumpable" This reverts commit 82500e1a62997df38e5979b1221e91b0f27975fb. --- tf_adapter/interface_spec/api_npu_config.pyh | 2 +- .../python/npu_bridge/estimator/npu/npu_config.py | 5 +---- .../npu_bridge/estimator/npu/npu_estimator.py | 11 ----------- .../st/optimizers/pbtxt/om_test_build_geop.pbtxt | 6 ------ .../ut/optimizers/pbtxt/om_test_build_geop.pbtxt | 6 ------ tf_adapter/util/ge_plugin.cc | 1 - tf_adapter/util/npu_attrs.cc | 14 -------------- tf_adapter/util/session_manager.cc | 2 -- tf_adapter_2.x/npu_device/core/npu_wrapper.cpp | 1 - .../python/npu_device/configs/npu_config.py | 1 - tf_adapter_2.x/tests/stub/include/stub/defines.h | 4 ---- 11 files changed, 2 insertions(+), 51 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index beb26c1cf..eabc8defe 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -21,7 +21,7 @@ class NPURunConfig(run_config_lib.RunConfig): topo_sorting_mode=None, aoe_config_file=None, insert_op_file=None, stream_sync_timeout=-1, event_sync_timeout=-1, external_weight=False, es_cluster_config=None, deterministic=0, frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, - ac_parallel_enable=None, quant_dumpable=None): + ac_parallel_enable=None): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 743421502..f672d6f33 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -111,8 +111,7 @@ class NPURunConfig(run_config_lib.RunConfig): variable_placement="Device", jit_compile="auto", precision_mode_v2=None, - ac_parallel_enable=None, - quant_dumpable=None + ac_parallel_enable=None ): """ Constructs a NPUConfig. @@ -147,7 +146,6 @@ class NPURunConfig(run_config_lib.RunConfig): stream_max_parallel_num: Specify the degree of parallelism of the AICPU / AICORE engine to achieve parallel execution between AICPU / AICORE operators. ac_parallel_enable: Enable engines such as Aicpu to parallel with other engines in dynamic shape graphs. - quant_dumpable: Ensure that the input and output of quant nodes can be dumped. op_select_implmode: Selecting whether the operator is implemented with high_precision or high_performance or high_precision_for_all or high_performance_for_all. optypelist_for_implmode: Operator list. @@ -216,7 +214,6 @@ class NPURunConfig(run_config_lib.RunConfig): self._dump_config = self._get_dump_config(dump_config) self._stream_max_parallel_num = stream_max_parallel_num self._ac_parallel_enable = ac_parallel_enable - self._quant_dumpable = quant_dumpable self.horovod_mode = self._get_horovod_mode(horovod_mode) util.check_nonnegative_integer(graph_run_mode, "graph_run_mode") diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index b959ec481..c32e72bfa 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -640,15 +640,6 @@ class NPUEstimator(estimator_lib.Estimator): if config._ac_parallel_enable is not None: custom_op.parameter_map["ac_parallel_enable"].s = tf.compat.as_bytes(config._ac_parallel_enable) - def __load_quant_dumpable_config(self, config, custom_op): - """Load quant_dumpable config, and add to custom_optimizers - Args: - config: NPURunConfig. - custom_op: Customer optimizers. - """ - if config._quant_dumpable is not None: - custom_op.parameter_map["quant_dumpable"].s = tf.compat.as_bytes(config._quant_dumpable) - def __load_ps_mode_config(self, custom_op): """Load stream_max_parallel_num config ,and add to custom_optimizers Args: @@ -829,8 +820,6 @@ class NPUEstimator(estimator_lib.Estimator): self.__load_ac_parallel_enable_config(config, custom_op) - self.__load_quant_dumpable_config(config, custom_op) - self.__load_ps_mode_config(custom_op) self._load_op_performance_config(config, custom_op) diff --git a/tf_adapter/tests/st/optimizers/pbtxt/om_test_build_geop.pbtxt b/tf_adapter/tests/st/optimizers/pbtxt/om_test_build_geop.pbtxt index 9c859bada..3af7fa793 100644 --- a/tf_adapter/tests/st/optimizers/pbtxt/om_test_build_geop.pbtxt +++ b/tf_adapter/tests/st/optimizers/pbtxt/om_test_build_geop.pbtxt @@ -134,12 +134,6 @@ node { s: "0" } } - attr { - key: "_quant_dumpable" - value { - s: "0" - } - } attr { key: "_is_tailing_optimization" value { diff --git a/tf_adapter/tests/ut/optimizers/pbtxt/om_test_build_geop.pbtxt b/tf_adapter/tests/ut/optimizers/pbtxt/om_test_build_geop.pbtxt index 9c859bada..3af7fa793 100644 --- a/tf_adapter/tests/ut/optimizers/pbtxt/om_test_build_geop.pbtxt +++ b/tf_adapter/tests/ut/optimizers/pbtxt/om_test_build_geop.pbtxt @@ -134,12 +134,6 @@ node { s: "0" } } - attr { - key: "_quant_dumpable" - value { - s: "0" - } - } attr { key: "_is_tailing_optimization" value { diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 3cde8932b..3699ea49b 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -75,7 +75,6 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace(ge::OP_COMPILER_CACHE_DIR, "op_compiler_cache_dir"); option_name_map.emplace(ge::STREAM_MAX_PARALLEL_NUM, "stream_max_parallel_num"); option_name_map.emplace(ge::AC_PARALLEL_ENABLE, "ac_parallel_enable"); - option_name_map.emplace(ge::QUANT_DUMPABLE, "quant_dumpable"); option_name_map.emplace(ge::HCOM_PARALLEL, "hcom_parallel"); option_name_map.emplace(ge::HCOM_MULTI_MODE, "hcom_multi_mode"); option_name_map.emplace(ge::OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION, "is_tailing_optimization"); diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 2b8439470..099abf40c 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -457,7 +457,6 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string dump_layer; std::string stream_max_parallel_num; std::string ac_parallel_enable; - std::string quant_dumpable; std::string npuOptimizer; std::string is_tailing_optimization = "0"; std::string op_select_implmode; @@ -524,7 +523,6 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr } (void) ctx->GetAttr("_stream_max_parallel_num", &stream_max_parallel_num); (void) ctx->GetAttr("_ac_parallel_enable", &ac_parallel_enable); - (void) ctx->GetAttr("_quant_dumpable", &quant_dumpable); (void) ctx->GetAttr("_is_tailing_optimization", &is_tailing_optimization); (void) ctx->GetAttr("_op_select_implmode", &op_select_implmode); (void) ctx->GetAttr("_optypelist_for_implmode", &optypelist_for_implmode); @@ -567,7 +565,6 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options[ge::HCOM_PARALLEL] = hcom_parallel; sess_options[ge::STREAM_MAX_PARALLEL_NUM] = stream_max_parallel_num; sess_options[ge::AC_PARALLEL_ENABLE] = ac_parallel_enable; - sess_options[ge::QUANT_DUMPABLE] = quant_dumpable; if (!graph_memory_max_size.empty()) { sess_options[ge::GRAPH_MEMORY_MAX_SIZE] = graph_memory_max_size; } @@ -1125,7 +1122,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string dump_layer; std::string stream_max_parallel_num; std::string ac_parallel_enable; - std::string quant_dumpable; std::string soc_config; std::string is_tailing_optimization = "0"; @@ -1216,7 +1212,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto dump_debug_mode_value = attrs.Find("_dump_debug_mode"); auto stream_max_parallel_num_value = attrs.Find("_stream_max_parallel_num"); auto ac_parallel_enable_value = attrs.Find("_ac_parallel_enable"); - auto quant_dumpable_value = attrs.Find("_quant_dumpable"); auto soc_config_value = attrs.Find("_soc_config"); auto graph_slice_value = attrs.Find("_graph_slice"); @@ -1381,9 +1376,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (ac_parallel_enable_value != nullptr) { ac_parallel_enable = ac_parallel_enable_value->s(); } - if (quant_dumpable_value != nullptr) { - quant_dumpable = quant_dumpable_value->s(); - } if (graph_slice_value != nullptr) { graph_slice_mode = graph_slice_value->s(); } @@ -1581,7 +1573,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["hcom_parallel"] = hcom_parallel; all_options["stream_max_parallel_num"] = stream_max_parallel_num; all_options["ac_parallel_enable"] = ac_parallel_enable; - all_options["quant_dumpable"] = quant_dumpable; if (!graph_memory_max_size.empty()) { all_options["graph_memory_max_size"] = graph_memory_max_size; } @@ -1722,7 +1713,6 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string dump_layer; std::string stream_max_parallel_num; std::string ac_parallel_enable; - std::string quant_dumpable; std::string soc_config; std::string hccl_timeout; std::string HCCL_algorithm; @@ -1875,9 +1865,6 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("ac_parallel_enable") > 0) { ac_parallel_enable = params.at("ac_parallel_enable").s(); } - if (params.count("quant_dumpable") > 0) { - quant_dumpable = params.at("quant_dumpable").s(); - } if (params.count("is_tailing_optimization") > 0) { is_tailing_optimization = params.at("is_tailing_optimization").b(); @@ -2352,7 +2339,6 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["hcom_parallel"] = std::to_string(static_cast(hcom_parallel)); sess_options["stream_max_parallel_num"] = stream_max_parallel_num; sess_options["ac_parallel_enable"] = ac_parallel_enable; - sess_options["quant_dumpable"] = quant_dumpable; if (!graph_memory_max_size.empty()) { sess_options["graph_memory_max_size"] = graph_memory_max_size; } diff --git a/tf_adapter/util/session_manager.cc b/tf_adapter/util/session_manager.cc index f4d504592..465b8d99e 100644 --- a/tf_adapter/util/session_manager.cc +++ b/tf_adapter/util/session_manager.cc @@ -111,8 +111,6 @@ void SessionManager::PrintGeSessionOptions(std::map &s ADP_LOG(INFO) << "[GEOP] stream_max_parallel_num :" << sess_options[ge::STREAM_MAX_PARALLEL_NUM]; // ac parallel enable ADP_LOG(INFO) << "[GEOP] ac_parallel_enable :" << sess_options[ge::AC_PARALLEL_ENABLE]; - // quant dumpable - ADP_LOG(INFO) << "[GEOP] quant_dumpable :" << sess_options[ge::QUANT_DUMPABLE]; // graph memory configuration if (!sess_options[ge::GRAPH_MEMORY_MAX_SIZE].empty()) { diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index cd94f4c51..698e05855 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -74,7 +74,6 @@ const std::map kConfigurableOptions = { {"op_compiler_cache_dir", ge::OP_COMPILER_CACHE_DIR}, {"stream_max_parallel_num", ge::STREAM_MAX_PARALLEL_NUM}, {"ac_parallel_enable", ge::AC_PARALLEL_ENABLE}, - {"quant_dumpable", ge::QUANT_DUMPABLE}, {"hcom_parallel", ge::HCOM_PARALLEL}, {"hcom_multi_mode", ge::HCOM_MULTI_MODE}, {"is_tailing_optimization", ge::OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION}, diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index fefb60daa..11522a749 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -46,7 +46,6 @@ class NpuConfig(NpuBaseConfig): self.op_compiler_cache_dir = OptionValue(None, None) self.stream_max_parallel_num = OptionValue(None, None) self.ac_parallel_enable = OptionValue(None, ['0', '1']) - self.quant_dumpable = OptionValue(None, ['0', '1']) self.hcom_parallel = OptionValue(True, [True, False]) self.hcom_multi_mode = OptionValue(None, None) self.is_tailing_optimization = OptionValue(False, [True, False]) diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h index 047e4295e..78f64f83b 100644 --- a/tf_adapter_2.x/tests/stub/include/stub/defines.h +++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h @@ -173,10 +173,6 @@ const std::string STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum"; // its value should be "0" or "1", default value is "0" const std::string AC_PARALLEL_ENABLE = "ac_parallel_enable"; -// Configure whether to ensure that the input and output of quant nodes can be dumped. -// its value should be "0" or "1", default value is "0" -const std::string QUANT_DUMPABLE = "quant_dumpable"; - // congigure outputDatatype to setting net output type const std::string OUTPUT_DATATYPE = "ge.outputDatatype"; -- Gitee