From beb87999e5cfe86cd8b8f387a7ca9ff3c8df7290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E6=A1=82=E5=86=9B?= Date: Mon, 10 Mar 2025 12:32:35 +0000 Subject: [PATCH] =?UTF-8?q?!2927=20add=20variable=5Fuse=5Fsuper=5Fhuge=5Fp?= =?UTF-8?q?age=20Merge=20pull=20request=20!2927=20from=20=E9=BB=84?= =?UTF-8?q?=E6=A1=82=E5=86=9B/huge=5Fpage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter/interface_spec/api_npu_config.pyh | 3 ++- .../npu_bridge/estimator/npu/npu_config.py | 5 +++- .../npu_bridge/estimator/npu/npu_estimator.py | 3 +++ tf_adapter/util/ge_plugin.cc | 1 + tf_adapter/util/npu_attrs.cc | 26 +++++++++++++++++++ .../npu_device/core/npu_wrapper.cpp | 2 ++ .../npu_device/configs/memory_config.py | 1 + 7 files changed, 39 insertions(+), 2 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 492d996e9..21be81cae 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -36,4 +36,5 @@ class DynamicInputConfig(): def __init__(self, input_shape, dynamic_dims, dynamic_node_type): class MemoryConfig(): - def __init__(self, atomic_clean_policy=0, static_memory_policy=0, memory_optimization_policy=None): \ No newline at end of file + def __init__(self, atomic_clean_policy=0, static_memory_policy=0, memory_optimization_policy=None, + variable_use_1g_huge_page=0): \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index b8607ff2c..04b63e1ab 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -368,18 +368,21 @@ class MemoryConfig(): def __init__(self, atomic_clean_policy=0, static_memory_policy=0, - memory_optimization_policy=None): + memory_optimization_policy=None, + variable_use_1g_huge_page=0): """ Constructs a MemoryConfig. Args: atomic_clean_policy: atomic_clean_policy, default is 0. static_memory_policy: static_memory_policy. memory_optimization_policy: memory_optimization_policy. + variable_use_1g_huge_page: variable_use_1g_huge_page. """ self._atomic_clean_policy = atomic_clean_policy self._static_memory_policy = static_memory_policy self._memory_optimization_policy = memory_optimization_policy + self._variable_use_1g_huge_page = variable_use_1g_huge_page diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 67bd1244a..737f97c03 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -522,6 +522,9 @@ class NPUEstimator(estimator_lib.Estimator): custom_op.parameter_map[ "memory_optimization_policy"].s = tf.compat.as_bytes( config._memory_config._memory_optimization_policy) + if config._memory_config._variable_use_1g_huge_page is not None: + custom_op.parameter_map[ + "variable_use_1g_huge_page"].i = config._memory_config._variable_use_1g_huge_page if (config._experimental_config is not None) and ( config._experimental_config._graph_memory_optimize_config is not None) and ( config._experimental_config._graph_memory_optimize_config._graph_slice is not None): diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 5c9525c98..41c8ad26e 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -122,6 +122,7 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace("ge.exec.opExecuteTimeout", "op_execute_timeout"); option_name_map.emplace("op_debug_config", "op_debug_config"); option_name_map.emplace("ge.exec.staticMemoryPolicy", "static_memory_policy"); + option_name_map.emplace("ge.variableUse1gHugePage", "variable_use_1g_huge_page"); option_name_map.emplace("ge.socVersion", "soc_config"); option_name_map.emplace("ge.esClusterConfig", "es_cluster_config"); option_name_map.emplace("ge.executeTimes", "execute_times"); diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index f28fec892..8e580ab6e 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -480,6 +480,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string atomic_clean_policy = "0"; std::string memory_optimization_policy; std::string static_memory_policy = "0"; + std::string variable_use_1g_huge_page = "0"; std::string topo_sorting_mode; std::string insert_op_file; std::string resource_config_path; @@ -550,6 +551,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_atomic_clean_policy", &atomic_clean_policy); (void) ctx->GetAttr("_memory_optimization_policy", &memory_optimization_policy); (void) ctx->GetAttr("_static_memory_policy", &static_memory_policy); + (void) ctx->GetAttr("_variable_use_1g_huge_page", &variable_use_1g_huge_page); (void) ctx->GetAttr("_topo_sorting_mode", &topo_sorting_mode); (void) ctx->GetAttr("_insert_op_file", &insert_op_file); (void) ctx->GetAttr("_resource_config_path", &resource_config_path); @@ -615,6 +617,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options["ge.exec.atomicCleanPolicy"] = atomic_clean_policy; sess_options["memory_optimization_policy"] = memory_optimization_policy; sess_options["ge.exec.memoryOptimizationPolicy"] = memory_optimization_policy; + sess_options["variable_use_1g_huge_page"] = variable_use_1g_huge_page; + sess_options["ge.variableUse1gHugePage"] = variable_use_1g_huge_page; sess_options["topo_sorting_mode"] = topo_sorting_mode; sess_options["ge.topoSortingMode"] = topo_sorting_mode; sess_options["insert_op_file"] = insert_op_file; @@ -651,6 +655,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string precision_mode_v2; std::string profiling_mode = "0"; std::string static_memory_policy = "0"; + std::string variable_use_1g_huge_page = "0"; std::string auto_tune_mode; std::string graph_run_mode = "1"; std::string op_debug_level; @@ -722,6 +727,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_op_debug_config", &op_debug_config); (void) ctx->GetAttr("_graph_exec_timeout", &graph_exec_timeout); (void) ctx->GetAttr("_static_memory_policy", &static_memory_policy); + (void) ctx->GetAttr("_variable_use_1g_huge_page", &variable_use_1g_huge_page); (void) ctx->GetAttr("_logical_device_cluster_deploy_mode", &logical_device_cluster_deploy_mode); (void) ctx->GetAttr("_logical_device_id", &logical_device_id); (void) ctx->GetAttr("_model_deploy_mode", &model_deploy_mode); @@ -771,6 +777,10 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["static_memory_policy"] = static_memory_policy; // Commercial version has been released, temporarily used init_options_["ge.exec.staticMemoryPolicy"] = static_memory_policy; + + init_options_["variable_use_1g_huge_page"] = variable_use_1g_huge_page; + // Commercial version has been released, temporarily used + init_options_["ge.variableUse1gHugePage"] = variable_use_1g_huge_page; if (!soc_config.empty()) { init_options_["ge.socVersion"] = soc_config; } @@ -1169,6 +1179,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string atomic_clean_policy = "0"; std::string memory_optimization_policy; std::string static_memory_policy = "0"; + std::string variable_use_1g_huge_page = "0"; std::string auto_tune_mode; std::string graph_run_mode = "1"; std::string op_debug_level; @@ -1268,6 +1279,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto atomic_clean_policy_value = attrs.Find("_atomic_clean_policy"); auto memory_optimization_policy_value = attrs.Find("_memory_optimization_policy"); auto static_memory_policy_value = attrs.Find("_static_memory_policy"); + auto variable_use_1g_huge_page_value = attrs.Find("_variable_use_1g_huge_page"); auto auto_tune_mode_value = attrs.Find("_auto_tune_mode"); auto graph_run_mode_value = attrs.Find("_graph_run_mode"); auto op_debug_level_value = attrs.Find("_op_debug_level"); @@ -1463,6 +1475,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (static_memory_policy_value != nullptr) { static_memory_policy = static_memory_policy_value->s(); } + if (variable_use_1g_huge_page_value != nullptr) { + variable_use_1g_huge_page = variable_use_1g_huge_page_value->s(); + } if (profiling_mode == "1" && profiling_options.empty()) { profiling_options = profiling_default_options; } @@ -1681,6 +1696,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["atomic_clean_policy"] = atomic_clean_policy; all_options["memory_optimization_policy"] = memory_optimization_policy; all_options["static_memory_policy"] = static_memory_policy; + all_options["variable_use_1g_huge_page"] = variable_use_1g_huge_page; // Commercial version has been released, temporarily used all_options["ge.autoTuneMode"] = auto_tune_mode; all_options["graph_run_mode"] = graph_run_mode; @@ -1818,6 +1834,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options int64_t atomic_clean_policy = 0L; std::string memory_optimization_policy; std::string static_memory_policy = "0"; + std::string variable_use_1g_huge_page = "0"; std::string auto_tune_mode; int64_t graph_run_mode = 1L; std::string enable_scope_fusion_passes; @@ -2364,6 +2381,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("atomic_clean_policy") > 0) { atomic_clean_policy = params.at("atomic_clean_policy").i(); } + if (params.count("variable_use_1g_huge_page") > 0) { + variable_use_1g_huge_page = std::to_string(params.at("variable_use_1g_huge_page").i()); + } if (params.count("memory_optimization_policy") > 0) { memory_optimization_policy = params.at("memory_optimization_policy").s(); } @@ -2528,6 +2548,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["ge.exec.atomicCleanPolicy"] = std::to_string(atomic_clean_policy); sess_options["memory_optimization_policy"] = memory_optimization_policy; sess_options["ge.exec.memoryOptimizationPolicy"] = memory_optimization_policy; + sess_options["variable_use_1g_huge_page"] = variable_use_1g_huge_page; + sess_options["ge.variableUse1gHugePage"] = variable_use_1g_huge_page; sess_options["external_weight"] = std::to_string(static_cast(external_weight)); sess_options["ge.externalWeight"] = std::to_string(static_cast(external_weight)); sess_options["jit_compile"] = jit_compile; @@ -2575,6 +2597,10 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options // Commercial version has been released, temporarily used init_options_["ge.exec.staticMemoryPolicy"] = static_memory_policy; + init_options_["variable_use_1g_huge_page"] = variable_use_1g_huge_page; + // Commercial version has been released, temporarily used + init_options_["ge.variableUse1gHugePage"] = variable_use_1g_huge_page; + init_options_["ge.hcomMultiMode"] = std::to_string(hcom_multi_mode); init_options_[ge::MODIFY_MIXLIST] = modify_mixlist; init_options_["ge.fusionSwitchFile"] = fusion_switch_file; diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 67dec449f..c652f2751 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -100,6 +100,7 @@ const std::map kGlobalConfigOptions = { {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}, {"static_memory_policy", "ge.exec.staticMemoryPolicy"}, + {"variable_use_1g_huge_page", "ge.variableUse1gHugePage"}, {"_distribute.cm_chief_ip", ge::OPTION_EXEC_CM_CHIEF_IP}, {"_distribute.cm_chief_port", ge::OPTION_EXEC_CM_CHIEF_PORT}, {"_distribute.cm_chief_worker_device", ge::OPTION_EXEC_CM_CHIEF_DEVICE}, @@ -111,6 +112,7 @@ const std::map kSessionConfigOptions = { {"graph_run_mode", ge::OPTION_GRAPH_RUN_MODE}, {"graph_memory_max_size", ge::GRAPH_MEMORY_MAX_SIZE}, {"variable_memory_max_size", ge::VARIABLE_MEMORY_MAX_SIZE}, + {"variable_use_1g_huge_page", "ge.variableUse1gHugePage"}, {"variable_format_optimize", "ge.exec.variable_acc"}, {"fusion_switch_file", ge::FUSION_SWITCH_FILE}, {"op_select_implmode", ge::OP_SELECT_IMPL_MODE}, diff --git a/tf_adapter_2.x/python/npu_device/configs/memory_config.py b/tf_adapter_2.x/python/npu_device/configs/memory_config.py index 333d01963..06581e74d 100644 --- a/tf_adapter_2.x/python/npu_device/configs/memory_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/memory_config.py @@ -25,5 +25,6 @@ class MemoryConfig(NpuBaseConfig): def __init__(self): self.atomic_clean_policy = OptionValue(0, [0, 1]) self.static_memory_policy = OptionValue(0, [0, 1, 2, 3, 4]) + self.variable_use_1g_huge_page = OptionValue(0, [0, 1, 2]) super(MemoryConfig, self).__init__() -- Gitee