diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 4ade8cc563b5864892cdca3ad030cf6e7b57f5af..54e4a8244f9e45f899e4f9dac6deb4fa5c8799a4 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -17,7 +17,7 @@ class NPURunConfig(run_config_lib.RunConfig): train_distribute=None, eval_distribute=None, local_rank_id=None, local_device_list=None, session_device_id=None, distribute_config=None, modify_mixlist=None, op_precision_mode=None, device_type="default_device_type", soc_config=None, hccl_timeout=None, op_wait_timeout=None, op_execute_timeout=None, HCCL_algorithm=None, - customize_dtypes=None, op_debug_config=None, memory_config=None, experimental_config=None): + customize_dtypes=None, op_debug_config=None, memory_config=None, experimental_config=None, jit_compile=True): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index f60f441e1bd1644a1e95e5eee8b88dcc482c167a..0fdcf8161d5aa66c6f12317d78847f0ecdb3156e 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -99,7 +99,8 @@ class NPURunConfig(run_config_lib.RunConfig): customize_dtypes=None, op_debug_config=None, memory_config=None, - experimental_config=None + experimental_config=None, + jit_compile=True ): """ Constructs a NPUConfig. @@ -237,6 +238,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._op_debug_config = op_debug_config self._memory_config = memory_config self._experimental_config = self._get_experimental_config(experimental_config) + self._jit_compile = jit_compile super(NPURunConfig, self).__init__( model_dir=model_dir, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 19bf79fba374df921591165167296ee9c3840c40..959cdee3a343129d9e6b5149e47f155be9d99641 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -733,6 +733,7 @@ class NPUEstimator(estimator_lib.Estimator): if config._experimental_config._logical_device_id is not None: custom_op.parameter_map["experimental_logical_device_id"].s = tf.compat.as_bytes( config._experimental_config._logical_device_id) + custom_op.parameter_map["jit_compile"].b = config._jit_compile self.__load_session_device_id(config, custom_op) self.__load_modify_mixlist(config, custom_op) diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 394ee0f66bf53e0077e7c923745331c462fc0181..fa4eda364d3ca387e2043744111e44a7ec0a90a7 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -365,6 +365,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string HCCL_algorithm; std::string atomic_clean_policy = "0"; std::string static_memory_policy; + std::string jit_compile = "1"; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void)ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); @@ -421,6 +422,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void)ctx->GetAttr("_HCCL_algorithm", &HCCL_algorithm); (void)ctx->GetAttr("_atomic_clean_policy", &atomic_clean_policy); (void)ctx->GetAttr("_static_memory_policy", &static_memory_policy); + (void)ctx->GetAttr("_jit_compile", &jit_compile); } // session options @@ -459,6 +461,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options["ge.exec.hcclExecuteTimeOut"] = hccl_timeout; sess_options["HCCL_algorithm"] = HCCL_algorithm; sess_options["atomic_clean_policy"] = atomic_clean_policy; + sess_options["jit_compile"] = jit_compile; + sess_options["ge.jit_compile"] = jit_compile; return sess_options; } @@ -953,6 +957,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string graph_exec_timeout; std::string logical_device_cluster_deploy_mode = "LB"; std::string logical_device_id; + std::string jit_compile = "1"; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1021,6 +1026,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto graph_exec_timeout_value = attrs.Find("_graph_exec_timeout"); auto logical_device_cluster_deploy_mode_value = attrs.Find("_logical_device_cluster_deploy_mode"); auto logical_device_id_value = attrs.Find("_logical_device_id"); + auto jit_compile_value = attrs.Find("_jit_compile"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; @@ -1246,6 +1252,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (logical_device_id_value != nullptr) { logical_device_id = logical_device_id_value->s(); } + if (jit_compile_value != nullptr) { + jit_compile = jit_compile_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1321,6 +1330,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["graph_exec_timeout"] = graph_exec_timeout; all_options["logical_device_cluster_deploy_mode"] = logical_device_cluster_deploy_mode; all_options["logical_device_id"] = logical_device_id; + all_options["jit_compile"] = jit_compile; + all_options["ge.jit_compile"] = jit_compile; return all_options; } @@ -1411,6 +1422,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options int graph_exec_timeout = 600000; std::string logical_device_cluster_deploy_mode = "LB"; std::string logical_device_id; + bool jit_compile = true; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { @@ -1764,6 +1776,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("experimental_logical_device_id")) { logical_device_id = params.at("experimental_logical_device_id").s(); } + if (params.count("jit_compile")) { + jit_compile = params.at("jit_compile").b(); + } } } @@ -1853,6 +1868,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options // Commercial version has been released, temporarily used init_options_["GE_USE_STATIC_MEMORY"] = static_memory_policy; init_options_["ge.exec.staticMemoryPolicy"] = static_memory_policy; + init_options_["jit_compile"] = std::to_string(static_cast(jit_compile)); + init_options_["ge.jit_compile"] = std::to_string(static_cast(jit_compile)); + init_options_["ge.hcomMultiMode"] = std::to_string(hcom_multi_mode); init_options_[ge::MODIFY_MIXLIST] = modify_mixlist; init_options_["ge.fusionSwitchFile"] = fusion_switch_file; diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index db7acdc6d915ef997492852f4c9c9261f0427c40..5719306f8fc590f94d4f539331e1a239108fc69f 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -100,6 +100,7 @@ const std::map kConfigurableOptions = { {"graph_exec_timeout", "ge.exec.graphExecTimeout"}, {"logical_device_cluster_deploy_mode", ge::OPTION_EXEC_LOGICAL_DEVICE_CLUSTER_DEPLOY_MODE}, {"logical_device_id", ge::OPTION_EXEC_LOGICAL_DEVICE_ID}, + {"jit_compile", "ge.jit_compile"}, // private options {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}}; diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 1d86a90817ea9415f8e993a651adb32685017ae2..034a47a20c685ddad1e3b3737abf158240e1e7cb 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -54,6 +54,7 @@ class NpuConfig(NpuBaseConfig): self.profiling_config = NpuProfilingConfig() self.enable_small_channel = OptionValue(False, [True, False]) self.graph_exec_timeout = OptionValue(None, None) + self.jit_compile = OptionValue(True, [True, False]) # Configuration for experiment self.experimental = NpuExperimentalConfig() diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h index 121df364d4d78d5638ed552672114ac965c809f0..daf75dbb32f3817547373a42261c1405689c0129 100644 --- a/tf_adapter_2.x/tests/stub/include/stub/defines.h +++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h @@ -201,6 +201,9 @@ const std::string BUFFER_OPTIMIZE = "ge.bufferOptimize"; // Configure Small Channel flag const std::string ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel"; +// Configure Jit Compile +const std::string JIT_COMPILE = "ge.jit_compile"; + // Configure Compress Weight flag const std::string ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight";