From 1e074b982454ec69cf52f008dbbbe52fc34d37b6 Mon Sep 17 00:00:00 2001 From: lzl Date: Thu, 9 Jun 2022 20:00:51 +0800 Subject: [PATCH] test --- tf_adapter/interface_spec/api_npu_config.pyh | 2 +- .../npu_bridge/estimator/npu/npu_config.py | 4 +++- .../npu_bridge/estimator/npu/npu_estimator.py | 1 + tf_adapter/util/npu_attrs.cc | 18 ++++++++++++++++++ tf_adapter_2.x/npu_device/core/npu_wrapper.cpp | 1 + .../python/npu_device/configs/npu_config.py | 1 + .../tests/stub/include/stub/defines.h | 3 +++ 7 files changed, 28 insertions(+), 2 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 4ade8cc56..54e4a8244 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -17,7 +17,7 @@ class NPURunConfig(run_config_lib.RunConfig): train_distribute=None, eval_distribute=None, local_rank_id=None, local_device_list=None, session_device_id=None, distribute_config=None, modify_mixlist=None, op_precision_mode=None, device_type="default_device_type", soc_config=None, hccl_timeout=None, op_wait_timeout=None, op_execute_timeout=None, HCCL_algorithm=None, - customize_dtypes=None, op_debug_config=None, memory_config=None, experimental_config=None): + customize_dtypes=None, op_debug_config=None, memory_config=None, experimental_config=None, jit_compile=True): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index f60f441e1..0fdcf8161 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -99,7 +99,8 @@ class NPURunConfig(run_config_lib.RunConfig): customize_dtypes=None, op_debug_config=None, memory_config=None, - experimental_config=None + experimental_config=None, + jit_compile=True ): """ Constructs a NPUConfig. @@ -237,6 +238,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._op_debug_config = op_debug_config self._memory_config = memory_config self._experimental_config = self._get_experimental_config(experimental_config) + self._jit_compile = jit_compile super(NPURunConfig, self).__init__( model_dir=model_dir, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 19bf79fba..959cdee3a 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -733,6 +733,7 @@ class NPUEstimator(estimator_lib.Estimator): if config._experimental_config._logical_device_id is not None: custom_op.parameter_map["experimental_logical_device_id"].s = tf.compat.as_bytes( config._experimental_config._logical_device_id) + custom_op.parameter_map["jit_compile"].b = config._jit_compile self.__load_session_device_id(config, custom_op) self.__load_modify_mixlist(config, custom_op) diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 394ee0f66..fa4eda364 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -365,6 +365,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string HCCL_algorithm; std::string atomic_clean_policy = "0"; std::string static_memory_policy; + std::string jit_compile = "1"; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void)ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); @@ -421,6 +422,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void)ctx->GetAttr("_HCCL_algorithm", &HCCL_algorithm); (void)ctx->GetAttr("_atomic_clean_policy", &atomic_clean_policy); (void)ctx->GetAttr("_static_memory_policy", &static_memory_policy); + (void)ctx->GetAttr("_jit_compile", &jit_compile); } // session options @@ -459,6 +461,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options["ge.exec.hcclExecuteTimeOut"] = hccl_timeout; sess_options["HCCL_algorithm"] = HCCL_algorithm; sess_options["atomic_clean_policy"] = atomic_clean_policy; + sess_options["jit_compile"] = jit_compile; + sess_options["ge.jit_compile"] = jit_compile; return sess_options; } @@ -953,6 +957,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string graph_exec_timeout; std::string logical_device_cluster_deploy_mode = "LB"; std::string logical_device_id; + std::string jit_compile = "1"; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1021,6 +1026,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto graph_exec_timeout_value = attrs.Find("_graph_exec_timeout"); auto logical_device_cluster_deploy_mode_value = attrs.Find("_logical_device_cluster_deploy_mode"); auto logical_device_id_value = attrs.Find("_logical_device_id"); + auto jit_compile_value = attrs.Find("_jit_compile"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; @@ -1246,6 +1252,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (logical_device_id_value != nullptr) { logical_device_id = logical_device_id_value->s(); } + if (jit_compile_value != nullptr) { + jit_compile = jit_compile_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1321,6 +1330,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["graph_exec_timeout"] = graph_exec_timeout; all_options["logical_device_cluster_deploy_mode"] = logical_device_cluster_deploy_mode; all_options["logical_device_id"] = logical_device_id; + all_options["jit_compile"] = jit_compile; + all_options["ge.jit_compile"] = jit_compile; return all_options; } @@ -1411,6 +1422,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options int graph_exec_timeout = 600000; std::string logical_device_cluster_deploy_mode = "LB"; std::string logical_device_id; + bool jit_compile = true; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { @@ -1764,6 +1776,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("experimental_logical_device_id")) { logical_device_id = params.at("experimental_logical_device_id").s(); } + if (params.count("jit_compile")) { + jit_compile = params.at("jit_compile").b(); + } } } @@ -1853,6 +1868,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options // Commercial version has been released, temporarily used init_options_["GE_USE_STATIC_MEMORY"] = static_memory_policy; init_options_["ge.exec.staticMemoryPolicy"] = static_memory_policy; + init_options_["jit_compile"] = std::to_string(static_cast(jit_compile)); + init_options_["ge.jit_compile"] = std::to_string(static_cast(jit_compile)); + init_options_["ge.hcomMultiMode"] = std::to_string(hcom_multi_mode); init_options_[ge::MODIFY_MIXLIST] = modify_mixlist; init_options_["ge.fusionSwitchFile"] = fusion_switch_file; diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index db7acdc6d..5719306f8 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -100,6 +100,7 @@ const std::map kConfigurableOptions = { {"graph_exec_timeout", "ge.exec.graphExecTimeout"}, {"logical_device_cluster_deploy_mode", ge::OPTION_EXEC_LOGICAL_DEVICE_CLUSTER_DEPLOY_MODE}, {"logical_device_id", ge::OPTION_EXEC_LOGICAL_DEVICE_ID}, + {"jit_compile", "ge.jit_compile"}, // private options {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}}; diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 1d86a9081..034a47a20 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -54,6 +54,7 @@ class NpuConfig(NpuBaseConfig): self.profiling_config = NpuProfilingConfig() self.enable_small_channel = OptionValue(False, [True, False]) self.graph_exec_timeout = OptionValue(None, None) + self.jit_compile = OptionValue(True, [True, False]) # Configuration for experiment self.experimental = NpuExperimentalConfig() diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h index 121df364d..daf75dbb3 100644 --- a/tf_adapter_2.x/tests/stub/include/stub/defines.h +++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h @@ -201,6 +201,9 @@ const std::string BUFFER_OPTIMIZE = "ge.bufferOptimize"; // Configure Small Channel flag const std::string ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel"; +// Configure Jit Compile +const std::string JIT_COMPILE = "ge.jit_compile"; + // Configure Compress Weight flag const std::string ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight"; -- Gitee