diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index a7e5adb9cc18adf061bcee218d111bd3e88c60c7..03bc78df91c46ee9edcaa63db8bdace3e67456f8 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -202,6 +202,17 @@ void GeOp::Initialize(OpKernelConstruction *ctx) { Status s = ctx->GetAttr("_session", &tf_session_); if (s.ok()) { LOG(INFO) << "[GEOP] get session info from attr, tf session: " << tf_session_; } + ctx->GetAttr("_dynamic_input", &dynamic_input_); + if (!dynamic_input_.empty() && dynamic_input_ == "1") { + OP_REQUIRES_OK(ctx, ctx->GetAttr("_dynamic_graph_execute_mode", &dynamic_graph_execute_mode_)); + ctx->GetAttr("_getnext_inputs_shape_range", &getnext_inputs_shape_range_); + ctx->GetAttr("_data_inputs_shape_range", &data_inputs_shape_range_); + LOG(INFO) << "[GEOP] dynamic_input: " << dynamic_input_ + << ", dynamic_graph_execute_mode: " << dynamic_graph_execute_mode_ + << ", getnext_inputs_shape_range: " << getnext_inputs_shape_range_ + << ", data_inputs_shape_range: " << data_inputs_shape_range_; + } + // global environment Initialize, invoke once for each process string sess_config = ""; OP_REQUIRES_OK(ctx, ctx->GetAttr("_NpuOptimizer", &sess_config)); @@ -430,22 +441,24 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { } // if input shapes changed, cache graphs - uint32_t cache_graph_id; + uint32_t cache_graph_id = graph_id_; bool is_set_dynamic_config = !sess_options_["ge.inputShape"].empty() && !sess_options_["ge.dynamicDims"].empty(); bool is_tuning = !mstune_mode_.empty() && !work_path_.empty(); + bool is_lazy_recompile_mode = dynamic_input_ == "1" && dynamic_graph_execute_mode_ == "lazy_recompile"; if (is_set_dynamic_config && is_tuning) { LOG(FATAL) << "dynamic input config can not use with mstuning."; } else if (is_set_dynamic_config && !is_tuning) { - cache_graph_id = graph_id_; if (InitRebuildFlag(cache_graph_id) != 0) { OP_REQUIRES_ASYNC(ctx, false, errors::Internal("Failed to check rebuild flag"), done); return; } } else if (!is_set_dynamic_config && is_tuning) { - cache_graph_id = graph_id_; + LOG(INFO) << "[GEOP] in tune func, do not rebuild graph."; } else { - // if set dynamic input config, do not cache graphs. - GetExecGraphId(ctx, cache_graph_id, input_shapes); + // in dynamic input mode, cache graphs. + if (is_lazy_recompile_mode) { + GetExecGraphId(ctx, cache_graph_id, input_shapes); + } if (InitRebuildFlag(cache_graph_id) != 0) { OP_REQUIRES_ASYNC(ctx, false, errors::Internal("Failed to check rebuild flag"), done); return; @@ -581,6 +594,11 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { LOG(INFO) << "[GEOP] set graph option."; graph_options_["ge.exec.placement"] = "HOST"; } + if (dynamic_input_ == "1") { + graph_options_["ge.exec.dynamicInput"] = dynamic_input_; + graph_options_["ge.exec.dynamicGraphExecuteMode"] = dynamic_graph_execute_mode_; + graph_options_["ge.exec.dataInputsShapeRange"] = data_inputs_shape_range_; + } if (is_tuning) { if (!is_train_graph_) { LOG(INFO) << "[GEOP] in tune mode, nontraining graphs should be cache."; @@ -631,7 +649,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { << " ,tf session: " << tf_session_ << " ,graph id:" << cache_graph_id; } build_flag_ = true; - if (!is_set_dynamic_config) { + if (!is_set_dynamic_config && is_lazy_recompile_mode) { cache_graphs_.insert(std::make_pair(input_shapes, cache_graph_id)); graph_counts_.push_back(std::make_pair(input_shapes, 1)); } @@ -697,7 +715,13 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { void GeOp::AddNodeAttrs(Node *node, bool &is_initialize) { // Add dp custom kernel label - if (node->type_string() == "IteratorGetNext") { node->AddAttr("_kernel", "dp"); } + if (node->type_string() == "IteratorGetNext") { + node->AddAttr("_kernel", "dp"); + if (dynamic_input_ == "1") { + node->AddAttr("_dynamic_graph_execute_mode", dynamic_graph_execute_mode_); + node->AddAttr("_getnext_inputs_shape_range", getnext_inputs_shape_range_); + } + } if (node->type_string() == "Assert" || node->type_string() == "Print" || node->type_string() == "PrintV2") { node->AddAttr("_kernel", "extend"); } @@ -890,6 +914,7 @@ Status GeOp::BuildInputTensorInfo(OpKernelContext *ctx, std::vectorinput(i)); + LOG(INFO) << "[GEOP] Input tensor " << i << " shape: " << tensor.shape().DebugString(); DataType data_type = tensor.dtype(); size_t total_bytes = tensor.TotalBytes(); void *tensor_ptr = DMAHelper::base(&tensor); @@ -930,6 +955,9 @@ Status GeOp::GenerateDesc(Node *&node) { REQUIRES_NOT_NULL(node); NodeDef &node_def = const_cast(node->def()); const OpDef &op_def = node->op_def(); + if (dynamic_input_ == "1" && node->type_string() == "IteratorGetNext") { + node_def.set_op("DynamicGetNext"); + } std::string format = this->data_format_; // format int32_t domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_RESERVED; diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index deb078c209ad59cf354e9e6f5d921fea33349792..94eabae5a8373c10f770fecb6aad4f0ec1537bdd 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -136,6 +136,10 @@ class GeOp : public AsyncOpKernel { MsTuningFunc tuning_api_; string auto_tune_mode_; std::vector dynamic_shape_nodes_; + std::string dynamic_input_; + std::string dynamic_graph_execute_mode_; + std::string data_inputs_shape_range_; + std::string getnext_inputs_shape_range_; }; } // namespace tensorflow #endif // TENSORFLOW_KERNELS_GEOP_NPU_H_ diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc index 08da5757c1f127d6a2658ef944c05be70d3edad4..8395730cfd21580107a8df280b84687dd9cfa528 100644 --- a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc @@ -977,9 +977,10 @@ Node *AddIdentityNode(Graph *graph, const Edge *edge, const string &srcName, int class OMSplitter { public: OMSplitter(string groupAttribute, Graph const *graphIn, std::map npu_optimizer_options, - std::map pass_options) + std::map pass_options, std::map graph_options) : groupAttribute_(std::move(groupAttribute)), graphIn_(graphIn), - npu_optimizer_options_(std::move(npu_optimizer_options)), pass_options_(std::move(pass_options)) {} + npu_optimizer_options_(std::move(npu_optimizer_options)), pass_options_(std::move(pass_options)), + graph_options_(std::move(graph_options)) {} ~OMSplitter() = default; // Find subgraphs marked with 'groupAttribute', and build a new @@ -1062,7 +1063,8 @@ class OMSplitter { bool isIsolatedSubgraph(); Status SetOptions(std::map npu_optimizer_options, - std::map pass_options); + std::map pass_options, + std::map graph_options); // GEOp node(s) in the output graph. Not owned. // both point to the function call node. @@ -1101,6 +1103,7 @@ class OMSplitter { std::map npu_optimizer_options_; std::map pass_options_; + std::map graph_options_; }; // Returns the key attribute associated with a node in attr, Sets @@ -1163,6 +1166,7 @@ class OMSplitter { std::unordered_map subgraphs_; std::map npu_optimizer_options_; std::map pass_options_; + std::map graph_options_; TF_DISALLOW_COPY_AND_ASSIGN(OMSplitter); }; @@ -1328,6 +1332,10 @@ Status OMSplitter::Subgraph::BuildFunctionDef(const string &name, FunctionLibrar attr_name = std::string("_") + option.first; AddNodeAttr(attr_name, option.second, &GEOpNodeDef_); } + for (const auto &option : graph_options_) { + attr_name = std::string("_") + option.first; + AddNodeAttr(attr_name, option.second, &GEOpNodeDef_); + } AddNodeAttr("_NpuOptimizer", "NpuOptimizer", &GEOpNodeDef_); if (library->Find(name) == nullptr) { TF_RETURN_IF_ERROR(library->AddFunctionDef(fdef)); } @@ -1350,9 +1358,11 @@ Status OMSplitter::Subgraph::AddGEOpNode(const std::unordered_map npu_optimizer_options, - std::map pass_options) { + std::map pass_options, + std::map graph_options) { npu_optimizer_options_ = std::move(npu_optimizer_options); pass_options_ = std::move(pass_options); + graph_options_ = std::move(graph_options); return Status::OK(); } @@ -1376,7 +1386,7 @@ Status OMSplitter::CopySubgraphNodes(std::unordered_map *n if (!IsInSubgraph(subgraphId)) { continue; } Subgraph &subgraph = subgraphs_[subgraphId]; - Status s = subgraph.SetOptions(npu_optimizer_options_, pass_options_); + Status s = subgraph.SetOptions(npu_optimizer_options_, pass_options_, graph_options_); if (s != Status::OK()) { LOG(INFO) << "Subgraph Id: " << subgraphId << "set npu optimizer error."; return s; @@ -1656,11 +1666,13 @@ Status OMSplitter::BuildOutputGraph(Graph *graphOut) { Status OMPartitionSubgraphsInFunctions(string groupAttribute, std::unique_ptr *graph, const string &graph_format, FunctionLibraryDefinition *flib_def, std::map npu_optimizer_options, - std::map pass_options) { + std::map pass_options, + std::map graph_options) { Graph *graphIn = graph->get(); FunctionLibraryDefinition *const library = flib_def; - OMSplitter omsplitter(std::move(groupAttribute), graphIn, std::move(npu_optimizer_options), std::move(pass_options)); + OMSplitter omsplitter(std::move(groupAttribute), graphIn, std::move(npu_optimizer_options), + std::move(pass_options), std::move(graph_options)); uint32_t subgraphNum = 0; TF_RETURN_IF_ERROR(omsplitter.SplitIntoSubgraphs(subgraphNum)); @@ -1706,6 +1718,61 @@ Status OMPartitionSubgraphsPass::Run(const GraphOptimizationPassOptions &options return Status::OK(); } +void OMPartitionSubgraphsPass::ParseInputShapeRange(std::string dynamic_inputs_shape_range, bool enable_dp, + std::map &graph_options) { + std::vector inputsVec; + std::vector shapesVec; + Split(dynamic_inputs_shape_range, inputsVec, ";"); + std::sort(inputsVec.begin(), inputsVec.end()); + for (auto tmp : inputsVec) { + std::vector shapeVec; + Split(tmp, shapeVec, ":"); + if (shapeVec.size() != 2) { + LOG(FATAL) << "dynamic_inputs_shape_range style is invalid, example:'data:[1,2];getnext:[2,3]'"; + } else if (shapeVec[0] != "data" && shapeVec[0] != "getnext") { + LOG(FATAL) << "dynamic_inputs_shape_range style is invalid, example:'data:[1,2];getnext:[2,3]'"; + } else { + shapesVec.push_back(shapeVec[1]); + } + } + if (shapesVec.empty() || shapesVec.size() > 2) { + LOG(FATAL) << "dynamic_inputs_shape_range style is invalid, more than 2 input styles."; + } else if (shapesVec.size() == 1) { + if (!enable_dp) { + graph_options["data_inputs_shape_range"] = shapesVec[0]; + } else { + graph_options["getnext_inputs_shape_range"] = shapesVec[0]; + } + } else { + if (!enable_dp) { + graph_options["data_inputs_shape_range"] = shapesVec[1] + shapesVec[0]; + } else { + graph_options["data_inputs_shape_range"] = shapesVec[0]; + graph_options["getnext_inputs_shape_range"] = shapesVec[1]; + } + } +} + +void OMPartitionSubgraphsPass::GetGraphDynamicExecConfig(Node *node, bool enable_dp, + std::map &graph_options) { + // get attr from graph_options + auto node_attrs = node->def().attr(); + const std::string kDynamicInput = "_graph_dynamic_input"; + const std::string kDynamicGraphExecuteMode = "_graph_dynamic_graph_execute_mode"; + const std::string kDynamicInputsShapeRange = "_graph_dynamic_inputs_shape_range"; + if (node_attrs.find(kDynamicInput) != node_attrs.end()) { + bool dynamic_input = node_attrs.at(kDynamicInput).b(); + if (dynamic_input) { + graph_options["dynamic_input"] = std::to_string(dynamic_input); + graph_options["dynamic_graph_execute_mode"] = node_attrs.at(kDynamicGraphExecuteMode).s(); + std::string dynamic_inputs_shape_range = node_attrs.at(kDynamicInputsShapeRange).s(); + if (!dynamic_inputs_shape_range.empty()) { + ParseInputShapeRange(dynamic_inputs_shape_range, enable_dp, graph_options); + } + } + } +} + Status OMPartitionSubgraphsPass::ProcessGraph(std::unique_ptr *graph, FunctionLibraryDefinition *func_lib, const OptimizationPassRegistry::Grouping pass_group_value) { int graph_num; @@ -1785,6 +1852,7 @@ Status OMPartitionSubgraphsPass::ProcessGraph(std::unique_ptr *graph, Fun string graph_format_value; Graph *graphIn = graph->get(); int getnext_node_count = 0; + bool include_getnext = false; for (Node *node : graphIn->op_nodes()) { if (node->type_string() == "NPUInit") { std::string attr_name; @@ -1824,20 +1892,33 @@ Status OMPartitionSubgraphsPass::ProcessGraph(std::unique_ptr *graph, Fun } graphIn->RemoveNode(node); } - if (is_set_dynamic_config && node->type_string() == "IteratorGetNext") { - getnext_node_count++; + if (node->type_string() == "IteratorGetNext") { + include_getnext = true; + if (is_set_dynamic_config) { getnext_node_count++; } } } if (getnext_node_count > 1) { LOG(FATAL) << "dynamic dims func can not support graph with " << getnext_node_count << " IteratorGetNext node."; } - + std::map graph_options; + bool enable_dp = (pass_options["enable_dp"] == "1") && include_getnext; + // get attr from pass_options + graph_options["dynamic_input"] = pass_options["dynamic_input"]; + graph_options["dynamic_graph_execute_mode"] = pass_options["dynamic_graph_execute_mode"]; + std::string dynamic_inputs_shape_range = pass_options["dynamic_inputs_shape_range"]; + if (!dynamic_inputs_shape_range.empty()) { + ParseInputShapeRange(dynamic_inputs_shape_range, enable_dp, graph_options); + } for (Node *node : graphIn->op_nodes()) { if (node->type_string() == "OneShotIterator" && iterations_per_loop != 1) { LOG(FATAL) << "iterator_per_loop only support 1 when using OneShotIterator"; } - + // get attr from graph options. + GetGraphDynamicExecConfig(node, enable_dp, graph_options); + if (graph_options["dynamic_input"] == "1" && iterations_per_loop > 1) { + LOG(FATAL) << "iterations_per_loop only support 1 in dynamic input mode."; + } string device_name; if (job != "localhost" && job != "ps" && job != "default") { device_name = std::string("/job:") + std::string(job) + std::string("/replica:0/task:") @@ -1955,7 +2036,7 @@ Status OMPartitionSubgraphsPass::ProcessGraph(std::unique_ptr *graph, Fun } } TF_RETURN_IF_ERROR(OMSplitter::OMPartitionSubgraphsInFunctions( - OMSplitter::PARTITION_SUB_GRAPH_ATTR, graph, graph_format_value, func_lib, all_options, pass_options)); + OMSplitter::PARTITION_SUB_GRAPH_ATTR, graph, graph_format_value, func_lib, all_options, pass_options, graph_options)); LOG(INFO) << "OMPartition subgraph_" << std::to_string(graph_num) << " SubgraphsInFunctions success."; FixupSourceAndSinkEdges(graph->get()); diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.h b/tf_adapter/optimizers/om_partition_subgraphs_pass.h index ca4e55b4caa7e80ac11e7a008737f3157e74d010..6c91e3ba327fb833cf74dbd483489b0c7abc5ad3 100644 --- a/tf_adapter/optimizers/om_partition_subgraphs_pass.h +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.h @@ -60,6 +60,9 @@ class OMPartitionSubgraphsPass : public GraphOptimizationPass { private: Status ProcessGraph(std::unique_ptr *graph, FunctionLibraryDefinition *func_lib, const OptimizationPassRegistry::Grouping pass_group_value); + void GetGraphDynamicExecConfig(Node *node, bool enable_dp, std::map &graph_options); + void ParseInputShapeRange(std::string dynamic_inputs_shape_range, bool enable_dp, + std::map &graph_options); }; } // namespace tensorflow #endif // TENSORFLOW_OM_PARTITION_SUBGRAPHS_PASS_H_ diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 4472b411fa73c4f087c78a341799a52ef85f9e7b..dc666c922eeefe79cd5b78f7ca46b6eb6c686e46 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -62,7 +62,10 @@ class NPURunConfig(run_config_lib.RunConfig): op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None, - hcom_multi_mode=False + hcom_multi_mode=False, + dynamic_input=False, + dynamic_graph_execute_mode="dynamic_execute", + dynamic_inputs_shape_range=None ): """ Constructs a NPUConfig. @@ -123,6 +126,9 @@ class NPURunConfig(run_config_lib.RunConfig): fusion_switch_file: Fusion switch configuration file path. enable_compress_weight: Whether to enable global weight compression. compress_weight_conf:Path and file name of the node list configuration file to be compressed. + dynamic_input:Whether Input is dynamic. + dynamic_graph_execute_mode:Dynamic graph execute mode. lazy_recompile or dynamic_execute + dynamic_inputs_shape_range:Inputs shape range. """ # Check iterations_per_loop. @@ -196,6 +202,9 @@ class NPURunConfig(run_config_lib.RunConfig): self._op_compiler_cache_dir=op_compiler_cache_dir self._debug_dir=debug_dir self._hcom_multi_mode = hcom_multi_mode + self._dynamic_input = dynamic_input + self._dynamic_graph_execute_mode = dynamic_graph_execute_mode + self._dynamic_inputs_shape_range = dynamic_inputs_shape_range super(NPURunConfig, self).__init__( model_dir=model_dir, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 8ce882f2c5db023b3f4a40d57bceaedb84cfcdaa..11abb1d4a6cd22400ed35f4104491a073308f845 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -726,6 +726,10 @@ class NPUEstimator(estimator_lib.Estimator): if config._debug_dir is not None: custom_op.parameter_map["debug_dir"].s = tf.compat.as_bytes(config._debug_dir) custom_op.parameter_map["hcom_multi_mode"].b = config._hcom_multi_mode + custom_op.parameter_map["dynamic_input"].b = config._dynamic_input + custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes(config._dynamic_graph_execute_mode) + if config._dynamic_inputs_shape_range is not None: + custom_op.parameter_map["dynamic_inputs_shape_range"].s = tf.compat.as_bytes(config._dynamic_inputs_shape_range) # add profiling options to custom_op self.__load_profiling_options(config, custom_op) diff --git a/tf_adapter/python/npu_bridge/estimator/npu/util.py b/tf_adapter/python/npu_bridge/estimator/npu/util.py index 1796f786286e98d0ffad5c1f65baf749fbb23595..9424ec09f994a0efeb29b469fced26c3f41dd4a8 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/util.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/util.py @@ -10,6 +10,8 @@ from tensorflow.python.training import training_util from tensorflow.python.ops import variable_scope from tensorflow.python.ops import init_ops from tensorflow.python.framework import dtypes +from tensorflow.python.util import compat +from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import variable_pb2 from tensorflow.python.ops import resource_variable_ops @@ -371,3 +373,55 @@ def get_gid_by_weight(weight): def get_all_grad_item(): global _GRADIENTS_AND_VARS return _GRADIENTS_AND_VARS + +def set_graph_dynamic_exec_config(fetch, dynamic_input = False, + dynamic_graph_execute_mode = "dynamic_execute", + dynamic_inputs_shape_range = None): + """ + add dynamic exec config to operation or tensor. + Args: + fetch: + dynamic_input:Whether Input is dynamic. + dynamic_graph_execute_mode: Dynamic graph execute mode. + dynamic_inputs_shape_range: Inputs shape range. In dynamic_execute mode, should be set. + Returns: + An fetch that includes dynamic exec config. + """ + def _set_op_attr(fetch, dynamic_input_attr, dynamic_graph_execute_mode_attr, + dynamic_inputs_shape_range_attr): + if isinstance(fetch, ops.Operation): + fetch._set_attr("_graph_dynamic_input", dynamic_input_attr) + fetch._set_attr("_graph_dynamic_graph_execute_mode", dynamic_graph_execute_mode_attr) + fetch._set_attr("_graph_dynamic_inputs_shape_range", dynamic_inputs_shape_range_attr) + else: + fetch.op._set_attr("_graph_dynamic_input", dynamic_input_attr) + fetch.op._set_attr("_graph_dynamic_graph_execute_mode", dynamic_graph_execute_mode_attr) + fetch.op._set_attr("_graph_dynamic_inputs_shape_range", dynamic_inputs_shape_range_attr) + return fetch + if dynamic_graph_execute_mode != "lazy_recompile" and dynamic_graph_execute_mode != "dynamic_execute": + raise ValueError("dynamic_graph_execute_mode should be lazy_recompile or dynamic_execute") + dynamic_input_attr = attr_value_pb2.AttrValue(b = dynamic_input) + dynamic_graph_execute_mode_attr = attr_value_pb2.AttrValue(s = compat.as_bytes(dynamic_graph_execute_mode)) + if dynamic_inputs_shape_range is None: + dynamic_inputs_shape_range = "" + dynamic_inputs_shape_range_attr = attr_value_pb2.AttrValue(s = compat.as_bytes(dynamic_inputs_shape_range)) + if isinstance(fetch, ops.Operation): + fetch = _set_op_attr(fetch, dynamic_input_attr, dynamic_graph_execute_mode_attr, + dynamic_inputs_shape_range_attr) + elif isinstance(fetch, tuple): + fetches = () + for tensor in fetch: + tensor = _set_op_attr(tensor.op, dynamic_input_attr, dynamic_graph_execute_mode_attr, + dynamic_inputs_shape_range_attr) + fetches += (tensor,) + return fetches + elif isinstance(fetch, list): + fetches = [] + for tensor in fetch: + tensor = _set_op_attr(tensor.op, dynamic_input_attr, dynamic_graph_execute_mode_attr, + dynamic_inputs_shape_range_attr) + fetches += [tensor] + else: + fetch = _set_op_attr(fetch, dynamic_input_attr, dynamic_graph_execute_mode_attr, + dynamic_inputs_shape_range_attr) + return fetch \ No newline at end of file diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 1a941920543aed2ce324dfe8d9b841b255a19044..fd2a60ced25ffb5cfda07d3beec82c3bdcaf114e 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -376,6 +376,9 @@ std::map NpuAttrs::GetPassOptions(const GraphOptimizat bool lower_functional_ops = false; string job = "default"; int task_index = 0; + bool dynamic_input = false; + std::string dynamic_graph_execute_mode = "dynamic_execute"; + std::string dynamic_inputs_shape_range; for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { if (custom_optimizer.name() == "NpuOptimizer") { do_npu_optimizer = true; @@ -391,6 +394,18 @@ std::map NpuAttrs::GetPassOptions(const GraphOptimizat job = "localhost"; } if (params.count("task_index")) { task_index = params.at("task_index").i(); } + if (params.count("dynamic_input")) { + dynamic_input = params.at("dynamic_input").b(); + if (dynamic_input) { + if (params.count("dynamic_graph_execute_mode")) { + dynamic_graph_execute_mode = params.at("dynamic_graph_execute_mode").s(); + if (dynamic_graph_execute_mode != "lazy_recompile" && dynamic_graph_execute_mode != "dynamic_execute") { + LOG(FATAL) << "dynamic_graph_execute_mode should be lazy_recompile or dynamic_execute."; + } + } + if (params.count("dynamic_inputs_shape_range")) { dynamic_inputs_shape_range = params.at("dynamic_inputs_shape_range").s(); } + } + } } } if (!do_npu_optimizer) { @@ -409,6 +424,9 @@ std::map NpuAttrs::GetPassOptions(const GraphOptimizat pass_options["lower_functional_ops"] = std::to_string(lower_functional_ops); pass_options["job"] = job; pass_options["task_index"] = std::to_string(task_index); + pass_options["dynamic_input"] = std::to_string(dynamic_input); + pass_options["dynamic_graph_execute_mode"] = dynamic_graph_execute_mode; + pass_options["dynamic_inputs_shape_range"] = dynamic_inputs_shape_range; return pass_options; } @@ -423,6 +441,9 @@ std::map NpuAttrs::GetPassOptions(OpKernelConstruction std::string lower_functional_ops = std::to_string(false); string job = "default"; std::string task_index = "0"; + std::string dynamic_input = std::to_string(false); + std::string dynamic_graph_execute_mode = "dynamic_execute"; + std::string dynamic_inputs_shape_range; Status s = Status::OK(); string npuOptimizer; @@ -435,6 +456,9 @@ std::map NpuAttrs::GetPassOptions(OpKernelConstruction ctx->GetAttr("_lower_functional_ops", &lower_functional_ops); if (ctx->GetAttr("_job", &job) != Status::OK()) { job = "localhost"; } ctx->GetAttr("_task_index", &task_index); + ctx->GetAttr("_dynamic_input", &dynamic_input); + ctx->GetAttr("_dynamic_graph_execute_mode", &dynamic_graph_execute_mode); + ctx->GetAttr("_dynamic_inputs_shape_range", &dynamic_inputs_shape_range); } } // pass options @@ -446,6 +470,9 @@ std::map NpuAttrs::GetPassOptions(OpKernelConstruction pass_options["lower_functional_ops"] = lower_functional_ops; pass_options["job"] = job; pass_options["task_index"] = task_index; + pass_options["dynamic_input"] = dynamic_input; + pass_options["dynamic_graph_execute_mode"] = dynamic_graph_execute_mode; + pass_options["dynamic_inputs_shape_range"] = dynamic_inputs_shape_range; return pass_options; } @@ -460,6 +487,9 @@ std::map NpuAttrs::GetPassOptions(AttrSlice attrs) { std::string lower_functional_ops = std::to_string(false); string job = "default"; std::string task_index = "0"; + std::string dynamic_input = std::to_string(false); + std::string dynamic_graph_execute_mode = "dynamic_execute"; + std::string dynamic_inputs_shape_range; Status s = Status::OK(); if (attrs.Find("_NpuOptimizer") != nullptr) { @@ -479,6 +509,13 @@ std::map NpuAttrs::GetPassOptions(AttrSlice attrs) { job = "localhost"; } if (attrs.Find("_task_index") != nullptr) { task_index = attrs.Find("_task_index")->s(); } + if (attrs.Find("_dynamic_input") != nullptr) { dynamic_input = attrs.Find("_dynamic_input")->s(); } + if (attrs.Find("_dynamic_graph_execute_mode") != nullptr) { + dynamic_graph_execute_mode = attrs.Find("_dynamic_graph_execute_mode")->s(); + } + if (attrs.Find("_dynamic_inputs_shape_range") != nullptr) { + dynamic_inputs_shape_range = attrs.Find("_dynamic_inputs_shape_range")->s(); + } } // pass options pass_options["do_npu_optimizer"] = do_npu_optimizer; @@ -489,6 +526,9 @@ std::map NpuAttrs::GetPassOptions(AttrSlice attrs) { pass_options["lower_functional_ops"] = lower_functional_ops; pass_options["job"] = job; pass_options["task_index"] = task_index; + pass_options["dynamic_input"] = dynamic_input; + pass_options["dynamic_graph_execute_mode"] = dynamic_graph_execute_mode; + pass_options["dynamic_inputs_shape_range"] = dynamic_inputs_shape_range; return pass_options; } @@ -754,6 +794,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options bool lower_functional_ops = false; string job = "localhost"; int task_index = 0; + bool dynamic_input = false; + std::string dynamic_graph_execute_mode = "dynamic_execute"; + std::string dynamic_inputs_shape_range; int enable_exception_dump = 0; string op_select_implmode; string optypelist_for_implmode; @@ -874,6 +917,18 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options job = "localhost"; } if (params.count("task_index")) { task_index = params.at("task_index").i(); } + if (params.count("dynamic_input")) { + dynamic_input = params.at("dynamic_input").b(); + if (dynamic_input) { + if (params.count("dynamic_graph_execute_mode")) { + dynamic_graph_execute_mode = params.at("dynamic_graph_execute_mode").s(); + if (dynamic_graph_execute_mode != "lazy_recompile" && dynamic_graph_execute_mode != "dynamic_execute") { + LOG(FATAL) << "dynamic_graph_execute_mode should be lazy_recompile or dynamic_execute."; + } + } + if (params.count("dynamic_inputs_shape_range")) { dynamic_inputs_shape_range = params.at("dynamic_inputs_shape_range").s(); } + } + } if (params.count("enable_exception_dump")) { enable_exception_dump = params.at("enable_exception_dump").i(); } if (!params.count("op_select_implmode") && !params.count("optypelist_for_implmode")) { @@ -984,6 +1039,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options pass_options["lower_functional_ops"] = std::to_string(lower_functional_ops); pass_options["job"] = job; pass_options["task_index"] = std::to_string(task_index); + pass_options["dynamic_input"] = std::to_string(dynamic_input); + pass_options["dynamic_graph_execute_mode"] = dynamic_graph_execute_mode; + pass_options["dynamic_inputs_shape_range"] = dynamic_inputs_shape_range; std::string attr_name; for (const auto &option : sess_options) {