diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index e5c5af658266b12f993e8b4e3e56b36603d63112..46bb681a7c195da6d12be25bdf42e5b52cff4a43 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -169,7 +169,7 @@ Status BuildOutputTensorInfo(OpKernelContext *ctx, std::vector &outp } else { ADP_LOG(INFO) << "[GEOP] GE output data placement is device, construct output info tensor."; auto getnext_output_info = std::unique_ptr(new NpuGetNextOutputInfo( - data_placement, ge_output_dims, output_size, std::move(data_ptr))); + data_placement, ge_output_dims, output_size, std::move(data_ptr))); Allocator *allocator = NpuHostGetNextAllocator::Create(std::move(getnext_output_info)); Tensor cpu_tensor(allocator, out_type, out_shape); ctx->set_output(i, cpu_tensor); @@ -215,8 +215,7 @@ GeOp::GeOp(OpKernelConstruction *ctx) sess_init_flag_(false), compute_graph_empty_(false), data_format_(""), graph_id_(0), is_initialized_graph_(false), need_iteration_(false), tf_session_(""), ge_session_(nullptr), job_type_(""), is_host_graph_(false), handle_(nullptr), aoe_tuning_(nullptr), - need_compile_graph_first_(false), aoe_init_(nullptr), aoe_finalize_(nullptr), - tuned_flag_(ATOMIC_FLAG_INIT) { + need_compile_graph_first_(false), aoe_init_(nullptr), aoe_finalize_(nullptr) { Initialize(ctx); } @@ -275,7 +274,7 @@ void GeOp::Initialize(OpKernelConstruction *ctx) { ADP_LOG(INFO) << "init options:"; NpuAttrs::LogOptions(init_options_); - if ((!init_options_["ge.jobType"].empty()) && (!init_options_["ge.tuningPath"].empty())) { + if (!init_options_["ge.jobType"].empty() && !init_options_["ge.tuningPath"].empty()) { handle_ = mmDlopen("libaoe_tuning.so", MMPA_RTLD_NOW); OP_REQUIRES(ctx, handle_ != nullptr, errors::InvalidArgument("libaoe_tuning.so dlopen failed, ", mmDlerror())); @@ -299,8 +298,7 @@ void GeOp::Initialize(OpKernelConstruction *ctx) { void GeOp::Finalize() { { - ADP_LOG(INFO) << "[GEOP] GeOp start to finalize, tf session: " << tf_session_ - << ", graph_id_: " << graph_id_; + ADP_LOG(INFO) << "[GEOP] GeOp start to finalize, tf session: " << tf_session_ << ", graph_id_: " << graph_id_; // global environment finalize, invoke once for each process { mutex_lock lock{mu_}; @@ -345,15 +343,14 @@ void GeOp::Finalize() { } } init_flag_ = false; - ADP_LOG(INFO) << "[GEOP] GeOp Finalize success, tf session: " << tf_session_ - << ", graph_id_: " << graph_id_; + ADP_LOG(INFO) << "[GEOP] GeOp Finalize success, tf session: " << tf_session_ << ", graph_id_: " << graph_id_; return; } int GeOp::InitRebuildFlag(uint32_t cache_graph_id) { if (!build_flag_) { ADP_LOG(INFO) << "[GEOP] tf session " << tf_session_ << ", graph id: " << cache_graph_id - << " does not build yet, no need to check rebuild"; + << " does not build yet, no need to check rebuild"; return 0; } if (ge_session_ == nullptr) { @@ -514,8 +511,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { string geop_name = ctx->op_kernel().name(); uint32_t num_inputs = static_cast(ctx->num_inputs()); ADP_LOG(INFO) << "[GEOP] Begin GeOp::ComputeAsync" - << ", kernel_name:" << geop_name << ", num_inputs:" - << num_inputs << ", num_outputs:" << ctx->num_outputs(); + << ", kernel_name:" << geop_name << ", num_inputs:" << num_inputs << ", num_outputs:" << ctx->num_outputs(); int64 startTime = InferShapeUtil::GetCurrentTimestap(); std::vector input_vec; std::vector input_shapes; @@ -524,35 +520,22 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { // if input shapes changed, cache graphs uint32_t cache_graph_id = graph_id_; - bool is_set_dynamic_config = (!sess_options_["ge.inputShape"].empty()) && (!sess_options_["ge.dynamicDims"].empty()); - bool is_tuning = (!init_options_["ge.jobType"].empty()) && (!init_options_["ge.tuningPath"].empty()); - bool is_lazy_recompile_mode = (dynamic_input_ == "1") && (dynamic_graph_execute_mode_ == "lazy_recompile"); - ADP_LOG(INFO) << "is_set_dynamic_config: " << is_set_dynamic_config - << " is_tuning: " << is_tuning - << " is_lazy_recompile_mode: " << is_lazy_recompile_mode; - - if (is_tuning) { - if (is_set_dynamic_config) { - ADP_LOG(ERROR) << "dynamic input config can not use with mstuning."; - OP_REQUIRES_ASYNC(ctx, false, errors::Internal("dynamic input config can not use with mstuning."), done); - return; - } - auto input_vec_aoe = input_vec; - if (RunTuning(input_vec_aoe, ctx) != 0) { - ADP_LOG(ERROR) << "RunTuning fail."; - done(); - return; - } - if (InitRebuildFlag(cache_graph_id) != 0) { - OP_REQUIRES_ASYNC(ctx, false, errors::Internal("Failed to check rebuild flag"), done); - return; - } - ADP_LOG(INFO) << "RunTuning finish."; - } else if (is_set_dynamic_config) { + bool is_set_dynamic_config = !sess_options_["ge.inputShape"].empty() && !sess_options_["ge.dynamicDims"].empty(); + bool is_tuning = !init_options_["ge.jobType"].empty() && !init_options_["ge.tuningPath"].empty(); + bool is_lazy_recompile_mode = dynamic_input_ == "1" && dynamic_graph_execute_mode_ == "lazy_recompile"; + if (is_set_dynamic_config && is_tuning) { + ADP_LOG(FATAL) << "dynamic input config can not use with mstuning."; + std::stringstream ss; + ss << "dynamic input config can not use with mstuning."; + OP_REQUIRES_ASYNC(ctx, false, errors::Internal(ss.str()), done); + return; + } else if (is_set_dynamic_config && !is_tuning) { if (InitRebuildFlag(cache_graph_id) != 0) { OP_REQUIRES_ASYNC(ctx, false, errors::Internal("Failed to check rebuild flag"), done); return; } + } else if (!is_set_dynamic_config && is_tuning) { + ADP_LOG(INFO) << "[GEOP] in tune func, do not rebuild graph."; } else { // in dynamic input mode, cache graphs. if (is_lazy_recompile_mode) { @@ -567,8 +550,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { if (!build_flag_) { // Get Graph OP_REQUIRES_ASYNC(ctx, ctx->function_library() != nullptr, errors::Internal("function library is nullptr"), done); - FunctionLibraryDefinition *flib_def = const_cast( - ctx->function_library()->GetFunctionLibraryDefinition()); + FunctionLibraryDefinition *flib_def = const_cast(ctx->function_library()->GetFunctionLibraryDefinition()); OP_REQUIRES_ASYNC(ctx, flib_def != nullptr, errors::Internal("flib_def is nullptr"), done); std::shared_ptr graph = std::make_shared(OpRegistry::Global()); OP_REQUIRES_ASYNC(ctx, graph != nullptr, errors::Internal("create tensorflow graph failed"), done); @@ -595,7 +577,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { ADP_LOG(EVENT) << "[GEOP] In GEOP computeAsync, kernel_name:" << geop_name << " ,TFadapter cost time: [" << ((endTime - startTime) / kMicrosToMillis) << " ms]"; ADP_LOG(INFO) << "[GEOP] TFadpter process graph success, GE parser begin, kernel_name:" << geop_name - << " ,tf session: " << tf_session_ << " ,graph id :" << cache_graph_id; + << " ,tf session: " << tf_session_ << " ,graph id :" << cache_graph_id; // parser, tensorflow graph to ge graph std::shared_ptr model_parser = domi::ModelParserFactory::Instance()->CreateModelParser(domi::FrameworkType::TENSORFLOW); @@ -608,8 +590,65 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { OP_REQUIRES_ASYNC(ctx, compute_graph != nullptr, errors::InvalidArgument("create ComputeGraph failed"), done); auto build_sub_graph = [this, flib_def](const std::string &graph) -> std::string { - return this->BuildSubGraph(flib_def, graph); + // const tensorflow::GraphDef *graph_def_in = reinterpret_cast(root_proto); + ADP_LOG(INFO) << "[GEOP] build_sub_graph enter, sub graph name is " << graph; + const FunctionDef *func_def = flib_def->Find(graph); + if (func_def == nullptr) { + ADP_LOG(ERROR) << "[GEOP] Sub graph not found in library, sub graph name is " << graph; + LOG(ERROR) << "[GEOP] Sub graph not found in library, sub graph name is " << graph; + return ""; + } + // get infershape + Graph subgraph(flib_def); + Status status = InferShapeUtil::GetSubGraphFromFunctionDef(*flib_def, *func_def, &subgraph); + if (status != Status::OK()) { + ADP_LOG(ERROR) << "[GEOP] Get subgraph from functiondef fail:" << status.error_message(); + LOG(ERROR) << "[GEOP] Get subgraph from functiondef fail:" << status.error_message(); + return ""; + } + ADP_LOG(INFO) << "[GEOP] Get subgraph from functiondef success."; + char *enable_force_v2_control = getenv("ENABLE_FORCE_V2_CONTROL"); + if (enable_force_v2_control != nullptr && strcmp("1", enable_force_v2_control) == 0) { + GraphDef graph_def; + subgraph.ToGraphDef(&graph_def); + WriteTextProto(Env::Default(), GetDumpPath() + graph + "_graph.pbtxt", graph_def); + } + + bool is_initialize = false; + for (Node *node : subgraph.nodes()) { + AddNodeAttrs(node, is_initialize); + + // Add Input&Output Desc into NodeDef + if (GenerateDesc(node) != Status::OK()) { + ADP_LOG(WARNING) << "[GEOP] name: " << node->name() << " op:" << node->type_string() + << " Generate desc failed in subgraph."; + LOG(WARNING) << "[GEOP] name: " << node->name() << " op:" << node->type_string() + << " Generate desc failed in subgraph."; + } + } + + unique_ptr sub_graph_def(new (std::nothrow) GraphDef()); + if (sub_graph_def == nullptr) { + ADP_LOG(ERROR) << "[GEOP] Malloc memory for subgraph def fail."; + LOG(ERROR) << "[GEOP] Malloc memory for subgraph def fail."; + return ""; + } + subgraph.ToGraphDef(sub_graph_def.get()); + if (enable_force_v2_control != nullptr && strcmp("1", enable_force_v2_control) == 0) { + sub_graph_def->release_library(); + sub_graph_def->mutable_versions()->clear_min_consumer(); + } + + char *need_print = getenv("PRINT_MODEL"); + if (need_print != nullptr && strcmp("1", need_print) == 0) { + string tmpmodel_path = GetDumpPath() + "TF_Subgraph_"; + string tmodel_path = tmpmodel_path + graph.c_str() + ".pbtxt"; + Status status_out = WriteTextProto(Env::Default(), tmodel_path, *sub_graph_def); + } + ADP_LOG(INFO) << "[GEOP] build_sub_graph exit, sub graph name is " << graph; + return sub_graph_def->SerializeAsString(); }; + ge::Status status = model_parser->ParseProtoWithSubgraph(ori_graph_def.SerializeAsString(), build_sub_graph, compute_graph); if (status != ge::SUCCESS) { @@ -624,7 +663,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { domi::GetContext().format = ge::GetParserContext().format; ADP_LOG(INFO) << "[GEOP] Tensorflow graph parse to ge graph success, kernel_name:" << geop_name - << " ,tf session: " << tf_session_ << " ,graph id: " << cache_graph_id; + << " ,tf session: " << tf_session_ << " ,graph id: " << cache_graph_id; size_t nodes = compute_graph->GetAllNodesSize(); if (nodes == 0) { @@ -649,7 +688,39 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { graph_options_["ge.exec.placement"] = "HOST"; } graph_options_["ge.shape_generalized_build_mode"] = "shape_precise"; - SetDynamicInput(); + if (dynamic_input_ == "1") { + graph_options_["ge.exec.dynamicInput"] = dynamic_input_; + graph_options_["ge.exec.dynamicGraphExecuteMode"] = dynamic_graph_execute_mode_; + graph_options_["ge.exec.dataInputsShapeRange"] = data_inputs_shape_range_; + if (dynamic_graph_execute_mode_ == "dynamic_execute" && data_inputs_shape_range_.empty()) { + graph_options_["ge.shape_generalized_build_mode"] = "shape_generalized"; + } + } + if (is_tuning) { + if (is_train_graph_ != "1" && init_options_["ge.jobType"] != "2" && init_options_["ge.jobType"] != "1") { + ADP_LOG(INFO) << "[GEOP] in tune mode, nontraining graphs should be cache."; + OP_REQUIRES_ASYNC(ctx, SessionManager::GetInstance().CacheGeGraphs(ge_session_, ge_graph), + errors::Internal("[GEOP] cache ge session failed."), done); + build_flag_ = true; + BuildOutTensorInfo(ctx); + done(); + return; + } else { + ADP_LOG(INFO) << "[GEOP] in tune mode, training graph handled by tools."; + std::vector ge_graphs; + OP_REQUIRES_ASYNC(ctx, SessionManager::GetInstance().GetGeGraphs(ge_session_, ge_graphs), + errors::Internal("[GEOP] ge ge session nontraining graphs failed."), done); + tune_options_.insert(graph_options_.begin(), graph_options_.end()); + AoeStatus tune_ret = (*aoe_tuning_)(ge_graph, ge_graphs, ge_session_, tune_options_); + OP_REQUIRES_ASYNC(ctx, (tune_ret == AOE_SUCCESS) || (tune_ret == AOE_ERROR_NO_AICORE_GRAPH), + errors::Internal("[GEOP] exec aoe tuning func failed[", tune_ret, "]."), done); + ADP_LOG(INFO) << "[GEOP] aoe success[" << tune_ret << "]."; + build_flag_ = true; + BuildOutTensorInfo(ctx); + done(); + return; + } + } // call ge session addGraph api status = ge_session_->AddGraph(cache_graph_id, ge_graph, graph_options_); @@ -669,7 +740,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { } else { add_graph_flag_ = true; ADP_LOG(INFO) << "[GEOP] Add graph to ge session success, kernel_name:" << geop_name - << " ,tf session: " << tf_session_ << " ,graph id:" << cache_graph_id; + << " ,tf session: " << tf_session_ << " ,graph id:" << cache_graph_id; } build_flag_ = true; if (!is_set_dynamic_config && is_lazy_recompile_mode) { @@ -703,6 +774,12 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { } } + if (is_tuning) { + BuildOutTensorInfo(ctx); + done(); + return; + } + int64 run_start_time = InferShapeUtil::GetCurrentTimestap(); auto callback = [done, ctx, run_start_time](ge::Status ge_status, std::vector &outputs) { if (ge_status == ge::SUCCESS) { @@ -710,7 +787,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { ADP_LOG(FATAL) << ctx->op_kernel().name() << " GEOP::DoRunAsync get output failed."; std::string error_message = ge::GEGetErrorMsg(); std::stringstream ss; - ss << ctx->op_kernel().name() + ss << ctx->op_kernel().name() << "GEOP::DoRunAsync get output failed." << std::endl << "Error Message is : " << std::endl << error_message; @@ -737,9 +814,9 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { done(); }; + ADP_LOG(INFO) << "[GEOP] Call ge session RunGraphAsync, kernel_name:" << geop_name << " ,tf session: " << tf_session_ + << " ,graph id: " << cache_graph_id; // call ge session runGraphAsync api - ADP_LOG(INFO) << "[GEOP] Call ge session RunGraphAsync, kernel_name:" << geop_name - << " ,tf session: " << tf_session_ << " ,graph id: " << cache_graph_id; ge::Status status = ge_session_->RunGraphAsync(cache_graph_id, inputs, callback); if (status != ge::SUCCESS) { std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); @@ -819,6 +896,7 @@ Status GeOp::BuildGraphDef(FunctionLibraryDefinition &flib_def, !sess_options_["ge.dynamicNodeType"].empty(); if (is_set_dynamic_config) { BuildShapeNodeAndCacheArgNodes(graph); } + bool is_tuning = !init_options_["ge.jobType"].empty() && !init_options_["ge.tuningPath"].empty(); for (Node *node : graph.nodes()) { AddNodeAttrs(node, is_initialize); // Add Input&Output Desc into NodeDef @@ -838,6 +916,27 @@ Status GeOp::BuildGraphDef(FunctionLibraryDefinition &flib_def, return ret; } } + + if (is_tuning) { + // output handle + NodeDef &node_def = const_cast(node->def()); + if (node->type_string() == "_Retval") { + int index = node_def.attr().at("index").i(); + // format: AttrValue.list(ListValue).func(repeated NameAttrList) + NameAttrList desc_attr = node_def.attr().at(INPUT_DESC).list().func(0); + + std::vector dims; + int dim_num = desc_attr.attr().at(SERIALIZE_SHAPE).list().i_size(); + for (int t = 0; t < dim_num; t++) { + int64 dim_i = (int64_t) desc_attr.attr().at(SERIALIZE_SHAPE).list().i(t); + if (dim_i < 0) { dim_i = 1; } + dims.push_back(dim_i); + } + + TensorShape out_shape(dims); + outputs_shape_.insert(std::map::value_type(index, out_shape)); + } + } } // set input_shape to dynamic nodes shape desc if (is_set_dynamic_config) { @@ -1017,165 +1116,6 @@ void GeOp::SetShapesToOutputDesc(const std::vector &input_shapes, } } -int GeOp::RunTuning(std::vector &input_vec, OpKernelContext *ctx) { - if (tuned_flag_.test_and_set()) { - ADP_LOG(INFO) << ctx->op_kernel().name() << " has tuned."; - return 0; - } - ADP_LOG(INFO) << "[GEOP] " << ctx->op_kernel().name() << " begin tune."; - - // Get Graph - if (ctx->function_library() == nullptr) { - ADP_LOG(ERROR) << "function library is nullptr"; - return -1; - } - FunctionLibraryDefinition *flib_def = const_cast( - ctx->function_library()->GetFunctionLibraryDefinition()); - if (flib_def == nullptr) { - ADP_LOG(ERROR) << "flib_def is nullptr"; - return -1; - } - std::shared_ptr graph = std::make_shared(OpRegistry::Global()); - if (graph == nullptr) { - ADP_LOG(ERROR) << "create tensorflow graph failed"; - return -1; - } - - // Build GraphDef from FunctionDef - GraphDef ori_graph_def; - Status s = BuildGraphDef(*flib_def, input_vec, ori_graph_def, is_initialized_graph_); - if (!s.ok()) { - ADP_LOG(ERROR) << "BuildGraphDef error"; - return -1; - } - - // parser, tensorflow graph to ge graph - std::shared_ptr model_parser = - domi::ModelParserFactory::Instance()->CreateModelParser(domi::FrameworkType::TENSORFLOW); - if (model_parser == nullptr) { - ADP_LOG(ERROR) << "create model parser ret failed."; - return -1; - } - ge::ComputeGraphPtr compute_graph = nullptr; - compute_graph = std::make_shared("ge_default_" + CurrentTimeInStr()); - if (compute_graph == nullptr) { - ADP_LOG(ERROR) << "create ComputeGraph failed"; - return -1; - } - - auto build_sub_graph = [this, flib_def](const std::string &graph) -> std::string { - return this->BuildSubGraph(flib_def, graph); - }; - ge::Status status = model_parser->ParseProtoWithSubgraph(ori_graph_def.SerializeAsString(), - build_sub_graph, compute_graph); - if (status != ge::SUCCESS) { - std::stringstream ss; - ss << "graph parse failed. ret : " << status << std::endl - << "Error Message is : " << std::endl - << ge::GEGetErrorMsg(); - ADP_LOG(ERROR) << ss.str(); - return -1; - } - domi::GetContext().format = ge::GetParserContext().format; - ADP_LOG(INFO) << "[GEOP] Tensorflow graph parse to ge graph success."; - - // convert to ge::graph - ge::Graph ge_graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); - if (iteration_per_loop_ > 1) { - ge_graph.SetNeedIteration(this->need_iteration_); - } - if (is_host_graph_) { - graph_options_["ge.exec.placement"] = "HOST"; - } - SetDynamicInput(); - - // run aoe tuning - if (is_train_graph_ != "1" && init_options_["ge.jobType"] != "2" && init_options_["ge.jobType"] != "1") { - ADP_LOG(INFO) << "[GEOP] in tune mode, nontraining graphs should be cache."; - if (!SessionManager::GetInstance().CacheGeGraphs(ge_session_, ge_graph)) { - ADP_LOG(ERROR) << "cache ge session failed."; - return -1; - } - return 0; - } else { - ADP_LOG(INFO) << "[GEOP] in tune mode, training graph handled by tools."; - std::vector ge_graphs; - if (!SessionManager::GetInstance().GetGeGraphs(ge_session_, ge_graphs)) { - ADP_LOG(ERROR) << "get ge session nontraining graphs failed."; - return -1; - } - tune_options_.insert(graph_options_.begin(), graph_options_.end()); - AoeStatus tune_ret = (*aoe_tuning_)(ge_graph, ge_graphs, ge_session_, tune_options_); - if ((tune_ret != AOE_SUCCESS) && (tune_ret != AOE_ERROR_NO_AICORE_GRAPH)) { - ADP_LOG(ERROR) << "exec aoe tuning func failed[" << tune_ret << "]."; - return -1; - } - ADP_LOG(INFO) << "[GEOP] aoe success[" << tune_ret << "]."; - return 0; - } -} - -std::string GeOp::BuildSubGraph(FunctionLibraryDefinition *flib_def, const std::string &graph) { - ADP_LOG(INFO) << "[GEOP] build_sub_graph enter, sub graph name is " << graph; - const FunctionDef *func_def = flib_def->Find(graph); - if (func_def == nullptr) { - ADP_LOG(ERROR) << "[GEOP] Sub graph not found in library, sub graph name is " << graph; - return ""; - } - // get infershape - Graph subgraph(flib_def); - Status status = InferShapeUtil::GetSubGraphFromFunctionDef(*flib_def, *func_def, &subgraph); - if (status != Status::OK()) { - ADP_LOG(ERROR) << "[GEOP] Get subgraph from functiondef fail:" << status.error_message(); - return ""; - } - ADP_LOG(INFO) << "[GEOP] Get subgraph from functiondef success."; - char *enable_force_v2_control = getenv("ENABLE_FORCE_V2_CONTROL"); - if (enable_force_v2_control != nullptr && strcmp("1", enable_force_v2_control) == 0) { - GraphDef graph_def; - subgraph.ToGraphDef(&graph_def); - WriteTextProto(Env::Default(), GetDumpPath() + graph + "_graph.pbtxt", graph_def); - } - bool is_initialize = false; - for (Node *node : subgraph.nodes()) { - AddNodeAttrs(node, is_initialize); - // Add Input&Output Desc into NodeDef - if (GenerateDesc(node) != Status::OK()) { - ADP_LOG(WARNING) << "[GEOP] name: " << node->name() << " op:" << node->type_string() - << " Generate desc failed in subgraph."; - } - } - unique_ptr sub_graph_def(new (std::nothrow) GraphDef()); - if (sub_graph_def == nullptr) { - ADP_LOG(ERROR) << "[GEOP] Malloc memory for subgraph def fail."; - return ""; - } - subgraph.ToGraphDef(sub_graph_def.get()); - if (enable_force_v2_control != nullptr && strcmp("1", enable_force_v2_control) == 0) { - sub_graph_def->release_library(); - sub_graph_def->mutable_versions()->clear_min_consumer(); - } - char *need_print = getenv("PRINT_MODEL"); - if (need_print != nullptr && strcmp("1", need_print) == 0) { - string tmpmodel_path = GetDumpPath() + "TF_Subgraph_"; - string tmodel_path = tmpmodel_path + graph.c_str() + ".pbtxt"; - Status status_out = WriteTextProto(Env::Default(), tmodel_path, *sub_graph_def); - } - ADP_LOG(INFO) << "[GEOP] build_sub_graph exit, sub graph name is " << graph; - return sub_graph_def->SerializeAsString(); -} - -void GeOp::SetDynamicInput() { - if (dynamic_input_ == "1") { - graph_options_["ge.exec.dynamicInput"] = dynamic_input_; - graph_options_["ge.exec.dynamicGraphExecuteMode"] = dynamic_graph_execute_mode_; - graph_options_["ge.exec.dataInputsShapeRange"] = data_inputs_shape_range_; - if (dynamic_graph_execute_mode_ == "dynamic_execute" && data_inputs_shape_range_.empty()) { - graph_options_["ge.shape_generalized_build_mode"] = "shape_generalized"; - } - } -} - void GeOp::AnalyzeInputDesc(void *tensor_ptr, ge::Tensor &input, ge::DataType type, std::vector &input_shapes) { ADP_LOG(INFO) << "[GEOP] Start analyze input tensor."; @@ -1260,6 +1200,17 @@ Status GeOp::BuildInputTensorInfo(OpKernelContext *ctx, return Status::OK(); } +Status GeOp::BuildOutTensorInfo(OpKernelContext *ctx) { + int num_outputs = ctx->num_outputs(); + // populate outputs + for (int i = 0; i < num_outputs; i++) { + TensorShape out_shape = outputs_shape_.at(i); + Tensor *tensor = nullptr; + TF_RETURN_IF_ERROR(ctx->allocate_output(i, out_shape, &tensor)); + } + return Status::OK(); +} + // For each NodeDef, Create Input&Output Desc(shape,format,dataType) Status GeOp::GenerateDesc(Node *&node) { REQUIRES_NOT_NULL(node); diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index 08c2350c8d8bb88a88874a39f537ae410dc5f112..f6c1d5d753cccff9804ba18d52b35c094bb5889c 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -29,7 +29,6 @@ #include "graph/utils/graph_utils.h" #include "tune_api.h" #include -#include namespace tensorflow { using AoeTuningFunc = AoeStatus (*)(ge::Graph &, std::vector &, ge::Session *, @@ -61,6 +60,9 @@ class GeOp : public AsyncOpKernel { std::vector &input_shapes, std::vector &inputs); + // prepare output tensor + Status BuildOutTensorInfo(OpKernelContext *ctx); + // create input and output desc for NodeDef Status GenerateDesc(Node *&node); @@ -94,11 +96,6 @@ class GeOp : public AsyncOpKernel { void AnalyzeInputDesc(void *tensor_ptr, ge::Tensor &input, ge::DataType type, std::vector &input_shapes); - int RunTuning(std::vector &input_vec, OpKernelContext *ctx); - - std::string BuildSubGraph(FunctionLibraryDefinition *flib_def, const std::string &graph); - - void SetDynamicInput(); private: static const std::string INPUT_DESC; @@ -148,8 +145,6 @@ class GeOp : public AsyncOpKernel { std::map tune_options_; std::string is_dynamic_getnext_; std::string placeholder_index_; - - std::atomic_flag tuned_flag_; }; } // namespace tensorflow #endif // TENSORFLOW_KERNELS_GEOP_NPU_H_ diff --git a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc index 8df70b865256ec4e47e99f3d0fefe540371eacbb..2645fee89813ee2bc3a61cec67759172e46af806 100644 --- a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc @@ -153,15 +153,6 @@ TEST_F(GeOpTest, GeOpDynamicInput1Test) { EXPECT_TRUE(!attrs["_dynamic_input"].s().empty()); EXPECT_EQ(attrs["_dynamic_graph_execute_mode"].s() == "dynamic_execute", true); } -TEST_F(GeOpTest, GeOpAoeTuningAndDynamicDimsTest) { - setenv("PRINT_MODEL", "1", true); - NodeDef node_def; - std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_aoe_tuning_and_dynamic_dims.pbtxt"; - Tensor a(DT_INT32, TensorShape({1,})); - gtl::InlinedVector inputs{TensorValue(&a)}; - setenv("ENABLE_FORCE_V2_CONTROL", "1", true); - EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp13_0", false).ok()); -} TEST_F(GeOpTest, GeOpAoeTuningTest) { Env* env = Env::Default(); GraphDef graph_def; diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_aoe_tuning_and_dynamic_dims.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_aoe_tuning_and_dynamic_dims.pbtxt deleted file mode 100644 index 39c217cc85a7ba3433b61b58ba0b090e0a874470..0000000000000000000000000000000000000000 --- a/tf_adapter/tests/ut/kernels/pbtxt/geop_aoe_tuning_and_dynamic_dims.pbtxt +++ /dev/null @@ -1,696 +0,0 @@ -node { - name: "arg_Placeholder_0_0" - op: "_Arg" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "T" - value { - type: DT_FLOAT - } - } - attr { - key: "index" - value { - i: 0 - } - } -} -node { - name: "retval_Add_1_0" - op: "_Retval" - input: "GeOp13_0" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "T" - value { - type: DT_FLOAT - } - } - attr { - key: "index" - value { - i: 0 - } - } -} -node { - name: "GeOp13_0" - op: "GeOp" - input: "arg_Placeholder_0_0" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "Tin" - value { - list { - type: DT_FLOAT - } - } - } - attr { - key: "Tout" - value { - list { - type: DT_FLOAT - } - } - } - attr { - key: "_NpuOptimizer" - value { - s: "NpuOptimizer" - } - } - attr { - key: "_auto_tune_mode" - value { - s: "" - } - } - attr { - key: "_buffer_optimize" - value { - s: "l2_optimize" - } - } - attr { - key: "_compress_weight_conf" - value { - s: "" - } - } - attr { - key: "_debug_dir" - value { - s: "" - } - } - attr { - key: "_distribute_config" - value { - s: "" - } - } - attr { - key: "_do_npu_optimizer" - value { - s: "1" - } - } - attr { - key: "_dump_debug_mode" - value { - s: "all" - } - } - attr { - key: "_dump_mode" - value { - s: "output" - } - } - attr { - key: "_dump_path" - value { - s: "" - } - } - attr { - key: "_dump_step" - value { - s: "" - } - } - attr { - key: "_dynamic_dims" - value { - s: "" - } - } - attr { - key: "_dynamic_graph_execute_mode" - value { - s: "dynamic_execute" - } - } - attr { - key: "_dynamic_input" - value { - s: "1" - } - } - attr { - key: "_dynamic_node_type" - value { - s: "" - } - } - attr { - key: "_enable_compress_weight" - value { - s: "0" - } - } - attr { - key: "_enable_data_pre_proc" - value { - s: "1" - } - } - attr { - key: "_enable_dump" - value { - s: "0" - } - } - attr { - key: "_enable_dump_debug" - value { - s: "0" - } - } - attr { - key: "_enable_exception_dump" - value { - s: "" - } - } - attr { - key: "_enable_scope_fusion_passes" - value { - s: "" - } - } - attr { - key: "_enable_small_channel" - value { - s: "0" - } - } - attr { - key: "_fusion_switch_file" - value { - s: "" - } - } - attr { - key: "_graph_run_mode" - value { - s: "1" - } - } - attr { - key: "_hcom_multi_mode" - value { - s: "" - } - } - attr { - key: "_hcom_parallel" - value { - s: "0" - } - } - attr { - key: "_in_out_pair" - value { - s: "" - } - } - attr { - key: "_in_out_pair_flag" - value { - s: "1" - } - } - attr { - key: "_input_shape" - value { - s: "" - } - } - attr { - key: "_is_tailing_optimization" - value { - s: "0" - } - } - attr { - key: "_iterations_per_loop" - value { - s: "777" - } - } - attr { - key: "_job" - value { - s: "localhost" - } - } - attr { - key: "_local_device_list" - value { - s: "" - } - } - attr { - key: "_local_rank_id" - value { - s: "-1" - } - } - attr { - key: "_lower_functional_ops" - value { - s: "0" - } - } - attr { - key: "_mix_compile_mode" - value { - s: "0" - } - } - attr { - key: "_aoe_mode" - value { - s: "2" - } - } - attr { - key: "_op_compiler_cache_dir" - value { - s: "" - } - } - attr { - key: "_op_compiler_cache_mode" - value { - s: "" - } - } - attr { - key: "_op_debug_level" - value { - s: "0" - } - } - attr { - key: "_op_select_implmode" - value { - s: "" - } - } - attr { - key: "_op_tune_mode" - value { - s: "" - } - } - attr { - key: "_optypelist_for_implmode" - value { - s: "" - } - } - attr { - key: "_precision_mode" - value { - s: "" - } - } - attr { - key: "_profiling_mode" - value { - s: "0" - } - } - attr { - key: "_profiling_options" - value { - s: "" - } - } - attr { - key: "_session_device_id" - value { - s: "" - } - } - attr { - key: "_stream_max_parallel_num" - value { - s: "" - } - } - attr { - key: "_task_index" - value { - s: "0" - } - } - attr { - key: "_train_graph" - value { - s: "0" - } - } - attr { - key: "_use_off_line" - value { - s: "1" - } - } - attr { - key: "_variable_format_optimize" - value { - s: "1" - } - } - attr { - key: "_work_path" - value { - s: "" - } - } - attr { - key: "data_format" - value { - s: "NHWC" - } - } - attr { - key: "function" - value { - func { - name: "GeOp13_0" - } - } - } -} -library { - function { - signature { - name: "GeOp13_0" - input_arg { - name: "arg_Placeholder_0_0_0_arg" - type: DT_FLOAT - } - output_arg { - name: "Add_1_retval" - type: DT_FLOAT - } - } - node_def { - name: "add/x" - op: "Const" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "dtype" - value { - type: DT_FLOAT - } - } - attr { - key: "value" - value { - tensor { - dtype: DT_FLOAT - tensor_shape { - dim { - size: 1 - } - dim { - size: 128 - } - } - tensor_content: "\321[{>\267\360\212=`F\033?\016Y>%\']>\333o\243>%\242L?{\312\253>}\\\270>\211\276\217>\370h\351>\262Lh?FR^?\255a\361>\326\214\334>\243\026\177?\262\324E>\2064\335>\262P\334=\030\317T?\230$\345>\376\216d?\21540?G\250\255=@\301z?)\317\237>\346\017\250m?\t9y?\210n]?c\260j>3\224\203>\261\324\006?\007t/>\354\251\321>\030\320q?\375\226\342>\344\020 ?\031\037n?o\333^=,S/?\363\373\217>\246mM?\336L\021?\026\257\324>\035\024\037?K\247\024?\202\217r?\3206\002?\r\026\326>\212\264\261=/e^?\376P\010?\260\2440?\222E\346>\223\010\311>\017t\007?;\324-?Y\230n?@\326\036?:,\364=H\022Z?n<\037>U\311I?f2\313>T\'\\?\310z\335>1\214M?\r\321\322>|;\344>\263\365U?\230\243 ?\251\3453?-\025\311>Ztu?\213\353\215>^T\021?|\311??(\301\255>\224\262\030?\243\361\322=\320\344\274>\212\001\203=\272\027\320>\005\254\354>Z\371-?\355\315\367>\2035)?w\227j?\355My?8c\347>\221\207\270>\215\306\343>(&\273>\320\034\363>\023I*>dyX?\024\365f?\313RP?J\226\001?\240|\'=\370\365:?\204\276c?\346\231\276>B\007H=\267N\243>_\273\310>\271\032\t?k\232%?\322\3517?J\220A?\356\240U?\310\2705?\347.\030?R\264\312>u4c;Ptb?G\3424?\343S,?O\311\020?\312JP?\347Z\020?\216\334n?" - } - } - } - } - node_def { - name: "strided_slice/stack" - op: "Const" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "dtype" - value { - type: DT_INT32 - } - } - attr { - key: "value" - value { - tensor { - dtype: DT_INT32 - tensor_shape { - dim { - size: 1 - } - } - int_val: 0 - } - } - } - } - node_def { - name: "strided_slice/stack_1" - op: "Const" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "dtype" - value { - type: DT_INT32 - } - } - attr { - key: "value" - value { - tensor { - dtype: DT_INT32 - tensor_shape { - dim { - size: 1 - } - } - int_val: 1 - } - } - } - } - node_def { - name: "IteratorV2" - op: "IteratorV2" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "_NpuOptimizer" - value { - s: "NpuOptimizer" - } - } - attr { - key: "_dynamic_dims" - value { - s: "" - } - } - attr { - key: "_dynamic_node_type" - value { - s: "" - } - } - attr { - key: "_enable_data_pre_proc" - value { - s: "1" - } - } - attr { - key: "_input_shape" - value { - s: "" - } - } - attr { - key: "_is_train_graph" - value { - b: false - } - } - attr { - key: "_iterations_per_loop" - value { - s: "777" - } - } - attr { - key: "_job" - value { - s: "localhost" - } - } - attr { - key: "_mix_compile_mode" - value { - s: "0" - } - } - attr { - key: "_use_off_line" - value { - s: "1" - } - } - attr { - key: "container" - value { - s: "" - } - } - attr { - key: "output_shapes" - value { - list { - shape { - dim { - size: 1 - } - dim { - size: 128 - } - } - } - } - } - attr { - key: "output_types" - value { - list { - type: DT_FLOAT - } - } - } - attr { - key: "shared_name" - value { - s: "IteratorV2" - } - } - } - node_def { - name: "add" - op: "AddV2" - input: "arg_Placeholder_0_0_0_arg" - input: "add/x:output:0" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "T" - value { - type: DT_FLOAT - } - } - } - node_def { - name: "IteratorGetNext" - op: "IteratorGetNext" - input: "IteratorV2:handle:0" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "output_shapes" - value { - list { - shape { - dim { - size: 1 - } - dim { - size: 128 - } - } - } - } - } - attr { - key: "output_types" - value { - list { - type: DT_FLOAT - } - } - } - } - node_def { - name: "strided_slice" - op: "StridedSlice" - input: "IteratorGetNext:components:0" - input: "strided_slice/stack:output:0" - input: "strided_slice/stack_1:output:0" - input: "strided_slice/stack_1:output:0" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "Index" - value { - type: DT_INT32 - } - } - attr { - key: "T" - value { - type: DT_FLOAT - } - } - attr { - key: "begin_mask" - value { - i: 0 - } - } - attr { - key: "ellipsis_mask" - value { - i: 0 - } - } - attr { - key: "end_mask" - value { - i: 0 - } - } - attr { - key: "new_axis_mask" - value { - i: 0 - } - } - attr { - key: "shrink_axis_mask" - value { - i: 1 - } - } - } - node_def { - name: "Add_1" - op: "Add" - input: "add:z:0" - input: "strided_slice:output:0" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "T" - value { - type: DT_FLOAT - } - } - } - ret { - key: "Add_1_retval" - value: "Add_1:z:0" - } - } -} -versions { - producer: 134 - min_consumer: 12 -} diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index 8df70b865256ec4e47e99f3d0fefe540371eacbb..2645fee89813ee2bc3a61cec67759172e46af806 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -153,15 +153,6 @@ TEST_F(GeOpTest, GeOpDynamicInput1Test) { EXPECT_TRUE(!attrs["_dynamic_input"].s().empty()); EXPECT_EQ(attrs["_dynamic_graph_execute_mode"].s() == "dynamic_execute", true); } -TEST_F(GeOpTest, GeOpAoeTuningAndDynamicDimsTest) { - setenv("PRINT_MODEL", "1", true); - NodeDef node_def; - std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_aoe_tuning_and_dynamic_dims.pbtxt"; - Tensor a(DT_INT32, TensorShape({1,})); - gtl::InlinedVector inputs{TensorValue(&a)}; - setenv("ENABLE_FORCE_V2_CONTROL", "1", true); - EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp13_0", false).ok()); -} TEST_F(GeOpTest, GeOpAoeTuningTest) { Env* env = Env::Default(); GraphDef graph_def; diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index e055e4f2aa5e557491f807149128fe792069036a..0f272a715e949d1ebbd09de44cc845471869b319 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -1,13 +1,6 @@ #include "tf_adapter/util/npu_attrs.h" #include "gtest/gtest.h" #include -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/graph/graph_constructor.h" -#include "tensorflow/core/lib/strings/str_util.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/public/session_options.h" -#include "tensorflow/core/common_runtime/optimization_registry.h" namespace tensorflow { Status CheckOpImplMode(const string &op_select_implmode); @@ -72,63 +65,5 @@ setenv("DUMP_GRAPH_PATH", "./dump_fold", 1); string new_path = GetDumpPath(); EXPECT_NE(new_path, "./dump_fold/"); } - -TEST_F(NpuAttrTest, SetNpuOptimizerAttrInvalidEnableDump) { - GraphOptimizationPassOptions options; - SessionOptions session_options; - session_options.config.mutable_graph_options() - ->mutable_optimizer_options() - ->set_do_function_inlining(true); - auto *custom_config = session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); - custom_config->set_name("NpuOptimizer"); - options.session_options = &session_options; - Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); - EXPECT_EQ(s.ok(), false); - - AttrValue enable_dump_debug = AttrValue(); - enable_dump_debug.set_b(true); - (*custom_config->mutable_parameter_map())["enable_dump_debug"] = enable_dump_debug; - s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); - EXPECT_EQ(s.ok(), false); - - AttrValue dump_path = AttrValue(); - dump_path.set_s("/invalid"); - (*custom_config->mutable_parameter_map())["dump_path"] = dump_path; - s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); - EXPECT_EQ(s.ok(), false); - - dump_path.set_s("/"); - (*custom_config->mutable_parameter_map())["dump_path"] = dump_path; - AttrValue dump_step = AttrValue(); - dump_step.set_s("777"); - (*custom_config->mutable_parameter_map())["dump_step"] = dump_step; - s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); - EXPECT_EQ(s.ok(), false); - - enable_dump_debug.set_b(false); - (*custom_config->mutable_parameter_map())["enable_dump_debug"] = enable_dump_debug; - AttrValue local_rank_id = AttrValue(); - local_rank_id.set_i(777); - (*custom_config->mutable_parameter_map())["local_rank_id"] = local_rank_id; - s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); - EXPECT_EQ(s.ok(), false); - - local_rank_id.set_i(0); - (*custom_config->mutable_parameter_map())["local_rank_id"] = local_rank_id; - AttrValue local_device_list = AttrValue(); - local_device_list.set_s("invalid string"); - (*custom_config->mutable_parameter_map())["local_device_list"] = local_device_list; - s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); - EXPECT_EQ(s.ok(), false); - - AttrValue dynamic_input = AttrValue(); - dynamic_input.set_b(true); - (*custom_config->mutable_parameter_map())["dynamic_input"] = dynamic_input; - AttrValue dynamic_graph_execute_mode = AttrValue(); - dynamic_graph_execute_mode.set_s("execute mode"); - (*custom_config->mutable_parameter_map())["dynamic_graph_execute_mode"] = dynamic_graph_execute_mode; - s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); - EXPECT_EQ(s.ok(), false); -} } } // end tensorflow \ No newline at end of file diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index ab9b66d4fb55b5191f2ba214f73c06cf24d846fd..23b93672b4040f138083224d75ce156f4211d903 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -68,9 +68,9 @@ Status GetEnvDeviceID(uint32_t &device_id) { int64 phy_device_id = -1; int64 logic_device_id = -1; const char* tmp_ascend_device_id = std::getenv("ASCEND_DEVICE_ID"); - std::string env_ascend_device_id(tmp_ascend_device_id == nullptr ? "" : tmp_ascend_device_id); + string env_ascend_device_id(tmp_ascend_device_id == nullptr ? "" : tmp_ascend_device_id); const char* tmp_device_id = std::getenv("DEVICE_ID"); - std::string env_device_id(tmp_device_id == nullptr ? "" : tmp_device_id); + string env_device_id(tmp_device_id == nullptr ? "" : tmp_device_id); if (env_ascend_device_id.empty() && env_device_id.empty()) { ADP_LOG(WARNING) << "[GePlugin] DEVICE_ID and ASCEND_DEVICE_ID is none, use default device id : 0, if set session_device_id, session_device_id has a higher priority"; LOG(WARNING) << "[GePlugin] DEVICE_ID and ASCEND_DEVICE_ID is none, use default device id : 0, if set session_device_id, session_device_id has a higher priority"; @@ -113,7 +113,7 @@ void Split(const std::string &s, std::vector &result, const char *d delete[] buffer; } -inline Status checkDumpStep(const std::string &dump_step) { +inline Status checkDumpStep(const string &dump_step) { std::string tmp_dump_step = dump_step + "|"; std::smatch result; std::vector match_vecs; @@ -139,7 +139,7 @@ inline Status checkDumpStep(const std::string &dump_step) { return Status::OK(); } -inline Status checkDumpMode(const std::string &dump_mode) { +inline Status checkDumpMode(const string &dump_mode) { std::set dump_mode_list = {"input", "output", "all"}; std::set::iterator iter; @@ -150,7 +150,7 @@ inline Status checkDumpMode(const std::string &dump_mode) { } } -inline Status checkDumpDebugMode(const std::string &dump_debug_mode) { +inline Status checkDumpDebugMode(const string &dump_debug_mode) { std::set dump_debug_mode_list = {"aicore_overflow", "atomic_overflow", "all"}; std::set::iterator iter; @@ -161,7 +161,7 @@ inline Status checkDumpDebugMode(const std::string &dump_debug_mode) { } } -inline Status CheckPath(const std::string &input, std::string &output) { +inline Status CheckPath(const string &input, string &output) { if (mmIsDir(input.c_str()) != EN_OK) { return errors::InvalidArgument("the path ", input.c_str(), " is not directory."); } @@ -176,7 +176,7 @@ inline Status CheckPath(const std::string &input, std::string &output) { return Status::OK(); } -inline Status CheckOpImplMode(const std::string &op_select_implmode) { +inline Status CheckOpImplMode(const string &op_select_implmode) { std::set op_impl_mode_list = {"high_precision", "high_performance", "high_precision_for_all", "high_performance_for_all"}; @@ -188,7 +188,7 @@ inline Status CheckOpImplMode(const std::string &op_select_implmode) { } } -inline Status CheckAoeMode(const std::string &aoe_mode) { +inline Status CheckAoeMode(const string &aoe_mode) { std::set aoe_mode_list = {"1", "2", "3", "4"}; if (aoe_mode_list.find(aoe_mode) != aoe_mode_list.end()) { @@ -198,7 +198,7 @@ inline Status CheckAoeMode(const std::string &aoe_mode) { } } -inline Status CheckInputShape(const std::string &input_shape) { +inline Status CheckInputShape(const string &input_shape) { std::vector inputs; Split(input_shape, inputs, ";"); if (inputs.empty()) { @@ -214,7 +214,7 @@ inline Status CheckInputShape(const std::string &input_shape) { return Status::OK(); } -inline Status CheckDynamicDims(const std::string &dynamic_dims) { +inline Status CheckDynamicDims(const string &dynamic_dims) { std::vector inputs; Split(dynamic_dims, inputs, ";"); if (inputs.empty()) { @@ -303,7 +303,7 @@ std::map NpuAttrs::GetSessOptions(OpKernelConstruction std::string dump_mode = "output"; std::string dump_debug_mode = "all"; std::string stream_max_parallel_num; - std::string npuOptimizer; + string npuOptimizer; std::string is_tailing_optimization = std::to_string(false); std::string op_select_implmode; std::string optypelist_for_implmode; @@ -511,7 +511,7 @@ std::map NpuAttrs::GetPassOptions(const GraphOptimizat bool mix_compile_mode = false; int iterations_per_loop = 1; bool lower_functional_ops = false; - std::string job = "default"; + string job = "default"; int task_index = 0; bool dynamic_input = false; std::string dynamic_graph_execute_mode = "dynamic_execute"; @@ -611,7 +611,7 @@ std::map NpuAttrs::GetPassOptions(OpKernelConstruction std::string mix_compile_mode = std::to_string(false); std::string iterations_per_loop = "1"; std::string lower_functional_ops = std::to_string(false); - std::string job = "default"; + string job = "default"; std::string task_index = "0"; std::string dynamic_input = std::to_string(false); std::string dynamic_graph_execute_mode = "dynamic_execute"; @@ -621,7 +621,7 @@ std::map NpuAttrs::GetPassOptions(OpKernelConstruction std::string in_out_pair_flag = std::to_string(true); std::string in_out_pair; Status s = Status::OK(); - std::string npuOptimizer; + string npuOptimizer; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { do_npu_optimizer = "1"; @@ -669,7 +669,7 @@ std::map NpuAttrs::GetPassOptions(AttrSlice attrs) { std::string mix_compile_mode = std::to_string(false); std::string iterations_per_loop = "1"; std::string lower_functional_ops = std::to_string(false); - std::string job = "default"; + string job = "default"; std::string task_index = "0"; std::string dynamic_input = std::to_string(false); std::string dynamic_graph_execute_mode = "dynamic_execute"; @@ -757,7 +757,7 @@ std::map NpuAttrs::GetAllAttrOptions(AttrSlice attrs) std::string mix_compile_mode = std::to_string(false); std::string iterations_per_loop = "1"; std::string lower_functional_ops = std::to_string(false); - std::string job = "default"; + string job = "default"; std::string task_index = "0"; std::string local_rank_id = "-1"; std::string local_device_list; @@ -1110,7 +1110,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options bool mix_compile_mode = false; int iterations_per_loop = 1; bool lower_functional_ops = false; - std::string job = "localhost"; + string job = "localhost"; int task_index = 0; bool dynamic_input = false; std::string dynamic_graph_execute_mode = "dynamic_execute"; @@ -1158,17 +1158,15 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("enable_dump_debug")) { enable_dump_debug = params.at("enable_dump_debug").b(); } if (enable_dump || enable_dump_debug) { if (params.count("dump_path")) { - std::string tmp_path = params.at("dump_path").s(); + string tmp_path = params.at("dump_path").s(); Status s = CheckPath(tmp_path, dump_path); if (!s.ok()) { - ADP_LOG(ERROR) << s.error_message(); - LOG(ERROR) << s.error_message(); - return errors::Internal(s.error_message()); + ADP_LOG(FATAL) << s.error_message(); + LOG(FATAL) << s.error_message(); } } else { - ADP_LOG(ERROR) << "if use dump function, dump_path must be set."; - LOG(ERROR) << "if use dump function, dump_path must be set."; - return errors::Internal("if use dump function, dump_path must be set."); + ADP_LOG(FATAL) << "if use dump function, dump_path must be set."; + LOG(FATAL) << "if use dump function, dump_path must be set."; } } if (enable_dump) { @@ -1288,8 +1286,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options dynamic_graph_execute_mode = params.at("dynamic_graph_execute_mode").s(); if (dynamic_graph_execute_mode != "lazy_recompile" && dynamic_graph_execute_mode != "dynamic_execute") { ADP_LOG(ERROR) << "dynamic_graph_execute_mode should be lazy_recompile or dynamic_execute."; - LOG(ERROR) << "dynamic_graph_execute_mode should be lazy_recompile or dynamic_execute."; - return errors::Internal("dynamic_graph_execute_mode should be lazy_recompile or dynamic_execute."); + LOG(FATAL) << "dynamic_graph_execute_mode should be lazy_recompile or dynamic_execute."; } } if (params.count("dynamic_inputs_shape_range")) { dynamic_inputs_shape_range = params.at("dynamic_inputs_shape_range").s(); } @@ -1300,8 +1297,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options Status s = CheckLocalRankId(local_rank_id); if (!s.ok()) { ADP_LOG(ERROR) << s.error_message(); - LOG(ERROR) << s.error_message(); - return errors::Internal(s.error_message()); + LOG(FATAL) << s.error_message(); } } if (params.count("local_device_list")) { @@ -1309,8 +1305,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options Status s = CheckDeviceList(local_device_list); if (!s.ok()) { ADP_LOG(ERROR) << s.error_message(); - LOG(ERROR) << s.error_message(); - return errors::Internal(s.error_message()); + LOG(FATAL) << s.error_message(); } } if (params.count("in_out_pair_flag")) { in_out_pair_flag = params.at("in_out_pair_flag").b(); } @@ -1476,11 +1471,6 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options pass_options["in_out_pair_flag"] = std::to_string(in_out_pair_flag); pass_options["in_out_pair"] = in_out_pair; - if (!node) { - ADP_LOG(ERROR) << "node is null."; - LOG(ERROR) << "node is null."; - return errors::Internal("node is null."); - } std::string attr_name; for (const auto &option : sess_options) { attr_name = std::string("_") + option.first;