diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 687a1f5b50bbbaad415ad080e847cb08d06b51ef..491b3b9a8e48b6530f864418cf561cb02336ca3e 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -215,7 +215,8 @@ GeOp::GeOp(OpKernelConstruction *ctx) sess_init_flag_(false), compute_graph_empty_(false), data_format_(""), graph_id_(0), is_initialized_graph_(false), need_iteration_(false), tf_session_(""), ge_session_(nullptr), job_type_(""), is_host_graph_(false), handle_(nullptr), aoe_tuning_(nullptr), - need_compile_graph_first_(false), aoe_init_(nullptr), aoe_finalize_(nullptr) { + need_compile_graph_first_(false), aoe_init_(nullptr), aoe_finalize_(nullptr), + tuned_flag_(ATOMIC_FLAG_INIT) { Initialize(ctx); } @@ -523,19 +524,32 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { bool is_set_dynamic_config = !sess_options_["ge.inputShape"].empty() && !sess_options_["ge.dynamicDims"].empty(); bool is_tuning = !init_options_["ge.jobType"].empty() && !init_options_["ge.tuningPath"].empty(); bool is_lazy_recompile_mode = dynamic_input_ == "1" && dynamic_graph_execute_mode_ == "lazy_recompile"; - if (is_set_dynamic_config && is_tuning) { - ADP_LOG(FATAL) << "dynamic input config can not use with mstuning."; - std::stringstream ss; - ss << "dynamic input config can not use with mstuning."; - OP_REQUIRES_ASYNC(ctx, false, errors::Internal(ss.str()), done); - return; - } else if (is_set_dynamic_config && !is_tuning) { + ADP_LOG(INFO) << "is_set_dynamic_config: " << is_set_dynamic_config + << " is_tuning: " << is_tuning + << " is_lazy_recompile_mode: " << is_lazy_recompile_mode; + + if (is_tuning) { + if (is_set_dynamic_config) { + ADP_LOG(ERROR) << "dynamic input config can not use with mstuning."; + OP_REQUIRES_ASYNC(ctx, false, errors::Internal("dynamic input config can not use with mstuning."), done); + return; + } + auto input_vec_aoe = input_vec; + if (RunTuning(input_vec_aoe, ctx) != 0) { + ADP_LOG(ERROR) << "RunTuning fail."; + done(); + return; + } + if (InitRebuildFlag(cache_graph_id) != 0) { + OP_REQUIRES_ASYNC(ctx, false, errors::Internal("Failed to check rebuild flag"), done); + return; + } + ADP_LOG(INFO) << "RunTuning finish."; + } else if (is_set_dynamic_config) { if (InitRebuildFlag(cache_graph_id) != 0) { OP_REQUIRES_ASYNC(ctx, false, errors::Internal("Failed to check rebuild flag"), done); return; } - } else if (!is_set_dynamic_config && is_tuning) { - ADP_LOG(INFO) << "[GEOP] in tune func, do not rebuild graph."; } else { // in dynamic input mode, cache graphs. if (is_lazy_recompile_mode) { @@ -590,65 +604,8 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { OP_REQUIRES_ASYNC(ctx, compute_graph != nullptr, errors::InvalidArgument("create ComputeGraph failed"), done); auto build_sub_graph = [this, flib_def](const std::string &graph) -> std::string { - // const tensorflow::GraphDef *graph_def_in = reinterpret_cast(root_proto); - ADP_LOG(INFO) << "[GEOP] build_sub_graph enter, sub graph name is " << graph; - const FunctionDef *func_def = flib_def->Find(graph); - if (func_def == nullptr) { - ADP_LOG(ERROR) << "[GEOP] Sub graph not found in library, sub graph name is " << graph; - LOG(ERROR) << "[GEOP] Sub graph not found in library, sub graph name is " << graph; - return ""; - } - // get infershape - Graph subgraph(flib_def); - Status status = InferShapeUtil::GetSubGraphFromFunctionDef(*flib_def, *func_def, &subgraph); - if (status != Status::OK()) { - ADP_LOG(ERROR) << "[GEOP] Get subgraph from functiondef fail:" << status.error_message(); - LOG(ERROR) << "[GEOP] Get subgraph from functiondef fail:" << status.error_message(); - return ""; - } - ADP_LOG(INFO) << "[GEOP] Get subgraph from functiondef success."; - char *enable_force_v2_control = getenv("ENABLE_FORCE_V2_CONTROL"); - if (enable_force_v2_control != nullptr && strcmp("1", enable_force_v2_control) == 0) { - GraphDef graph_def; - subgraph.ToGraphDef(&graph_def); - WriteTextProto(Env::Default(), GetDumpPath() + graph + "_graph.pbtxt", graph_def); - } - - bool is_initialize = false; - for (Node *node : subgraph.nodes()) { - AddNodeAttrs(node, is_initialize); - - // Add Input&Output Desc into NodeDef - if (GenerateDesc(node) != Status::OK()) { - ADP_LOG(WARNING) << "[GEOP] name: " << node->name() << " op:" << node->type_string() - << " Generate desc failed in subgraph."; - LOG(WARNING) << "[GEOP] name: " << node->name() << " op:" << node->type_string() - << " Generate desc failed in subgraph."; - } - } - - unique_ptr sub_graph_def(new (std::nothrow) GraphDef()); - if (sub_graph_def == nullptr) { - ADP_LOG(ERROR) << "[GEOP] Malloc memory for subgraph def fail."; - LOG(ERROR) << "[GEOP] Malloc memory for subgraph def fail."; - return ""; - } - subgraph.ToGraphDef(sub_graph_def.get()); - if (enable_force_v2_control != nullptr && strcmp("1", enable_force_v2_control) == 0) { - sub_graph_def->release_library(); - sub_graph_def->mutable_versions()->clear_min_consumer(); - } - - char *need_print = getenv("PRINT_MODEL"); - if (need_print != nullptr && strcmp("1", need_print) == 0) { - string tmpmodel_path = GetDumpPath() + "TF_Subgraph_"; - string tmodel_path = tmpmodel_path + graph.c_str() + ".pbtxt"; - Status status_out = WriteTextProto(Env::Default(), tmodel_path, *sub_graph_def); - } - ADP_LOG(INFO) << "[GEOP] build_sub_graph exit, sub graph name is " << graph; - return sub_graph_def->SerializeAsString(); + return this->BuildSubGraph(flib_def, graph); }; - ge::Status status = model_parser->ParseProtoWithSubgraph(ori_graph_def.SerializeAsString(), build_sub_graph, compute_graph); if (status != ge::SUCCESS) { @@ -688,40 +645,8 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { graph_options_["ge.exec.placement"] = "HOST"; } graph_options_["ge.shape_generalized_build_mode"] = "shape_precise"; - if (dynamic_input_ == "1") { - graph_options_["ge.exec.dynamicInput"] = dynamic_input_; - graph_options_["ge.exec.dynamicGraphExecuteMode"] = dynamic_graph_execute_mode_; - graph_options_["ge.exec.dataInputsShapeRange"] = data_inputs_shape_range_; - if (dynamic_graph_execute_mode_ == "dynamic_execute" && data_inputs_shape_range_.empty()) { - graph_options_["ge.shape_generalized_build_mode"] = "shape_generalized"; - } - } - if (is_tuning) { - if (is_train_graph_ != "1" && init_options_["ge.jobType"] != "2" && init_options_["ge.jobType"] != "1") { - ADP_LOG(INFO) << "[GEOP] in tune mode, nontraining graphs should be cache."; - OP_REQUIRES_ASYNC(ctx, SessionManager::GetInstance().CacheGeGraphs(ge_session_, ge_graph), - errors::Internal("[GEOP] cache ge session failed."), done); - build_flag_ = true; - BuildOutTensorInfo(ctx); - done(); - return; - } else { - ADP_LOG(INFO) << "[GEOP] in tune mode, training graph handled by tools."; - std::vector ge_graphs; - OP_REQUIRES_ASYNC(ctx, SessionManager::GetInstance().GetGeGraphs(ge_session_, ge_graphs), - errors::Internal("[GEOP] ge ge session nontraining graphs failed."), done); - tune_options_.insert(graph_options_.begin(), graph_options_.end()); - AoeStatus tune_ret = (*aoe_tuning_)(ge_graph, ge_graphs, ge_session_, tune_options_); - OP_REQUIRES_ASYNC(ctx, (tune_ret == AOE_SUCCESS) || (tune_ret == AOE_ERROR_NO_AICORE_GRAPH), - errors::Internal("[GEOP] exec aoe tuning func failed[", tune_ret, "]."), done); - ADP_LOG(INFO) << "[GEOP] aoe success[" << tune_ret << "]."; - build_flag_ = true; - BuildOutTensorInfo(ctx); - done(); - return; - } - } - + SetDynamicInput(); + // call ge session addGraph api status = ge_session_->AddGraph(cache_graph_id, ge_graph, graph_options_); if (status != ge::SUCCESS) { @@ -774,12 +699,6 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { } } - if (is_tuning) { - BuildOutTensorInfo(ctx); - done(); - return; - } - int64 run_start_time = InferShapeUtil::GetCurrentTimestap(); auto callback = [done, ctx, run_start_time](ge::Status ge_status, std::vector &outputs) { if (ge_status == ge::SUCCESS) { @@ -896,7 +815,6 @@ Status GeOp::BuildGraphDef(FunctionLibraryDefinition &flib_def, !sess_options_["ge.dynamicNodeType"].empty(); if (is_set_dynamic_config) { BuildShapeNodeAndCacheArgNodes(graph); } - bool is_tuning = !init_options_["ge.jobType"].empty() && !init_options_["ge.tuningPath"].empty(); for (Node *node : graph.nodes()) { AddNodeAttrs(node, is_initialize); // Add Input&Output Desc into NodeDef @@ -916,27 +834,6 @@ Status GeOp::BuildGraphDef(FunctionLibraryDefinition &flib_def, return ret; } } - - if (is_tuning) { - // output handle - NodeDef &node_def = const_cast(node->def()); - if (node->type_string() == "_Retval") { - int index = node_def.attr().at("index").i(); - // format: AttrValue.list(ListValue).func(repeated NameAttrList) - NameAttrList desc_attr = node_def.attr().at(INPUT_DESC).list().func(0); - - std::vector dims; - int dim_num = desc_attr.attr().at(SERIALIZE_SHAPE).list().i_size(); - for (int t = 0; t < dim_num; t++) { - int64 dim_i = (int64_t) desc_attr.attr().at(SERIALIZE_SHAPE).list().i(t); - if (dim_i < 0) { dim_i = 1; } - dims.push_back(dim_i); - } - - TensorShape out_shape(dims); - outputs_shape_.insert(std::map::value_type(index, out_shape)); - } - } } // set input_shape to dynamic nodes shape desc if (is_set_dynamic_config) { @@ -1116,6 +1013,166 @@ void GeOp::SetShapesToOutputDesc(const std::vector &input_shapes, } } +int GeOp::RunTuning(std::vector &input_vec, OpKernelContext *ctx) { + if (tuned_flag_.test_and_set()) { + ADP_LOG(INFO) << ctx->op_kernel().name() << " has tuned."; + return 0; + } + ADP_LOG(INFO) << "[GEOP] " << ctx->op_kernel().name() << " begin tune." + + // Get Graph + if (ctx->function_library() == nullptr) { + ADP_LOG(ERROR) << "function library is nullptr"; + return -1; + } + FunctionLibraryDefinition *flib_def = const_cast( + ctx->function_library()->GetFunctionLibraryDefinition()); + if (flib_def == nullptr) { + ADP_LOG(ERROR) << "flib_def is nullptr"; + return -1; + } + std::shared_ptr graph = std::make_shared(OpRegistry::Global()); + if (graph == nullptr) { + ADP_LOG(ERROR) << "create tensorflow graph failed"; + return -1; + } + + // Build GraphDef from FunctionDef + GraphDef ori_graph_def; + Status s = BuildGraphDef(*flib_def, input_vec, ori_graph_def, is_initialized_graph_); + if (!s.ok()) { + ADP_LOG(ERROR) << "BuildGraphDef error"; + return -1; + } + + // parser, tensorflow graph to ge graph + std::shared_ptr model_parser = + domi::ModelParserFactory::Instance()->CreateModelParser(domi::FrameworkType::TENSORFLOW); + if (model_parser == nullptr) { + ADP_LOG(ERROR) << "create model parser ret failed."; + return -1; + } + ge::ComputeGraphPtr compute_graph = nullptr; + compute_graph = std::make_shared("ge_default_" + CurrentTimeInStr()); + if (compute_graph == nullptr) { + ADP_LOG(ERROR) << "create ComputeGraph failed"; + return -1; + } + + auto build_sub_graph = [this, flib_def](const std::string &graph) -> std::string { + return this->BuildSubGraph(flib_def, graph); + }; + + ge::Status status = model_parser->ParseProtoWithSubgraph(ori_graph_def.SerializeAsString(), + build_sub_graph, compute_graph); + if (status != ge::SUCCESS) { + std::stringstream ss; + ss << "graph parse failed. ret : " << status << std::endl + << "Error Message is : " << std::endl + << ge::GEGetErrorMsg(); + ADP_LOG(ERROR) << ss.str(); + return -1; + } + domi::GetContext().format = ge::GetParserContext().format; + ADP_LOG(INFO) << "[GEOP] Tensorflow graph parse to ge graph success."; + + // convert to ge::graph + ge::Graph ge_graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); + if (iteration_per_loop_ > 1) { + ge_graph.SetNeedIteration(this->need_iteration_); + } + if (is_host_graph_) { + graph_options_["ge.exec.placement"] = "HOST"; + } + SetDynamicInput(); + + if (is_train_graph_ != "1" && init_options_["ge.jobType"] != "2" && init_options_["ge.jobType"] != "1") { + ADP_LOG(INFO) << "[GEOP] in tune mode, nontraining graphs should be cache."; + if (!SessionManager::GetInstance().CacheGeGraphs(ge_session_, ge_graph)) { + ADP_LOG(ERROR) << "cache ge session failed."; + return -1; + } + return 0; + } else { + ADP_LOG(INFO) << "[GEOP] in tune mode, training graph handled by tools."; + std::vector ge_graphs; + if (!SessionManager::GetInstance().GetGeGraphs(ge_session_, ge_graphs)) { + ADP_LOG(ERROR) << "get ge session nontraining graphs failed."; + return -1; + } + tune_options_.insert(graph_options_.begin(), graph_options_.end()); + AoeStatus tune_ret = (*aoe_tuning_)(ge_graph, ge_graphs, ge_session_, tune_options_); + if (tune_ret != AOE_SUCCESS) { + ADP_LOG(ERROR) << "exec aoe tuning func failed."; + return -1; + } + ADP_LOG(INFO) << "[GEOP] aoe success."; + return 0; + } +} + +std::string GeOp::BuildSubGraph(FunctionLibraryDefinition *flib_def, const std::string &graph) { + // const tensorflow::GraphDef *graph_def_in = reinterpret_cast(root_proto); + ADP_LOG(INFO) << "[GEOP] build_sub_graph enter, sub graph name is " << graph; + const FunctionDef *func_def = flib_def->Find(graph); + if (func_def == nullptr) { + ADP_LOG(ERROR) << "[GEOP] Sub graph not found in library, sub graph name is " << graph; + return ""; + } + // get infershape + Graph subgraph(flib_def); + Status status = InferShapeUtil::GetSubGraphFromFunctionDef(*flib_def, *func_def, &subgraph); + if (status != Status::OK()) { + ADP_LOG(ERROR) << "[GEOP] Get subgraph from functiondef fail:" << status.error_message(); + return ""; + } + ADP_LOG(INFO) << "[GEOP] Get subgraph from functiondef success."; + char *enable_force_v2_control = getenv("ENABLE_FORCE_V2_CONTROL"); + if (enable_force_v2_control != nullptr && strcmp("1", enable_force_v2_control) == 0) { + GraphDef graph_def; + subgraph.ToGraphDef(&graph_def); + WriteTextProto(Env::Default(), GetDumpPath() + graph + "_graph.pbtxt", graph_def); + } + bool is_initialize = false; + for (Node *node : subgraph.nodes()) { + AddNodeAttrs(node, is_initialize); + // Add Input&Output Desc into NodeDef + if (GenerateDesc(node) != Status::OK()) { + ADP_LOG(WARNING) << "[GEOP] name: " << node->name() << " op:" << node->type_string() + << " Generate desc failed in subgraph."; + } + } + unique_ptr sub_graph_def(new (std::nothrow) GraphDef()); + if (sub_graph_def == nullptr) { + ADP_LOG(ERROR) << "[GEOP] Malloc memory for subgraph def fail."; + return ""; + } + subgraph.ToGraphDef(sub_graph_def.get()); + if (enable_force_v2_control != nullptr && strcmp("1", enable_force_v2_control) == 0) { + sub_graph_def->release_library(); + sub_graph_def->mutable_versions()->clear_min_consumer(); + } + char *need_print = getenv("PRINT_MODEL"); + if (need_print != nullptr && strcmp("1", need_print) == 0) { + string tmpmodel_path = GetDumpPath() + "TF_Subgraph_"; + string tmodel_path = tmpmodel_path + graph.c_str() + ".pbtxt"; + Status status_out = WriteTextProto(Env::Default(), tmodel_path, *sub_graph_def); + } + ADP_LOG(INFO) << "[GEOP] build_sub_graph exit, sub graph name is " << graph; + return sub_graph_def->SerializeAsString(); +} + +void GeOp::SetDynamicInput() { + if (dynamic_input_ == "1") { + graph_options_["ge.exec.dynamicInput"] = dynamic_input_; + graph_options_["ge.exec.dynamicGraphExecuteMode"] = dynamic_graph_execute_mode_; + graph_options_["ge.exec.dataInputsShapeRange"] = data_inputs_shape_range_; + if (dynamic_graph_execute_mode_ == "dynamic_execute" && data_inputs_shape_range_.empty()) { + graph_options_["ge.shape_generalized_build_mode"] = "shape_generalized"; + } + } +} + void GeOp::AnalyzeInputDesc(void *tensor_ptr, ge::Tensor &input, ge::DataType type, std::vector &input_shapes) { ADP_LOG(INFO) << "[GEOP] Start analyze input tensor."; @@ -1200,17 +1257,6 @@ Status GeOp::BuildInputTensorInfo(OpKernelContext *ctx, return Status::OK(); } -Status GeOp::BuildOutTensorInfo(OpKernelContext *ctx) { - int num_outputs = ctx->num_outputs(); - // populate outputs - for (int i = 0; i < num_outputs; i++) { - TensorShape out_shape = outputs_shape_.at(i); - Tensor *tensor = nullptr; - TF_RETURN_IF_ERROR(ctx->allocate_output(i, out_shape, &tensor)); - } - return Status::OK(); -} - // For each NodeDef, Create Input&Output Desc(shape,format,dataType) Status GeOp::GenerateDesc(Node *&node) { REQUIRES_NOT_NULL(node); diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index f6c1d5d753cccff9804ba18d52b35c094bb5889c..08c2350c8d8bb88a88874a39f537ae410dc5f112 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -29,6 +29,7 @@ #include "graph/utils/graph_utils.h" #include "tune_api.h" #include +#include namespace tensorflow { using AoeTuningFunc = AoeStatus (*)(ge::Graph &, std::vector &, ge::Session *, @@ -60,9 +61,6 @@ class GeOp : public AsyncOpKernel { std::vector &input_shapes, std::vector &inputs); - // prepare output tensor - Status BuildOutTensorInfo(OpKernelContext *ctx); - // create input and output desc for NodeDef Status GenerateDesc(Node *&node); @@ -96,6 +94,11 @@ class GeOp : public AsyncOpKernel { void AnalyzeInputDesc(void *tensor_ptr, ge::Tensor &input, ge::DataType type, std::vector &input_shapes); + int RunTuning(std::vector &input_vec, OpKernelContext *ctx); + + std::string BuildSubGraph(FunctionLibraryDefinition *flib_def, const std::string &graph); + + void SetDynamicInput(); private: static const std::string INPUT_DESC; @@ -145,6 +148,8 @@ class GeOp : public AsyncOpKernel { std::map tune_options_; std::string is_dynamic_getnext_; std::string placeholder_index_; + + std::atomic_flag tuned_flag_; }; } // namespace tensorflow #endif // TENSORFLOW_KERNELS_GEOP_NPU_H_ diff --git a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc index 2645fee89813ee2bc3a61cec67759172e46af806..8df70b865256ec4e47e99f3d0fefe540371eacbb 100644 --- a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc @@ -153,6 +153,15 @@ TEST_F(GeOpTest, GeOpDynamicInput1Test) { EXPECT_TRUE(!attrs["_dynamic_input"].s().empty()); EXPECT_EQ(attrs["_dynamic_graph_execute_mode"].s() == "dynamic_execute", true); } +TEST_F(GeOpTest, GeOpAoeTuningAndDynamicDimsTest) { + setenv("PRINT_MODEL", "1", true); + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_aoe_tuning_and_dynamic_dims.pbtxt"; + Tensor a(DT_INT32, TensorShape({1,})); + gtl::InlinedVector inputs{TensorValue(&a)}; + setenv("ENABLE_FORCE_V2_CONTROL", "1", true); + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp13_0", false).ok()); +} TEST_F(GeOpTest, GeOpAoeTuningTest) { Env* env = Env::Default(); GraphDef graph_def; diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index 0f272a715e949d1ebbd09de44cc845471869b319..447f8dac04271f065a5298051111418f5bd93a76 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -1,6 +1,13 @@ #include "tf_adapter/util/npu_attrs.h" #include "gtest/gtest.h" #include +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" namespace tensorflow { Status CheckOpImplMode(const string &op_select_implmode); @@ -65,5 +72,35 @@ setenv("DUMP_GRAPH_PATH", "./dump_fold", 1); string new_path = GetDumpPath(); EXPECT_NE(new_path, "./dump_fold/"); } + +TEST_F(NpuAttrTest, SetNpuOptimizerAttrInvalidEnableDump) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options() + ->mutable_optimizer_options() + ->set_do_function_inlining(true); + auto *custom_config = session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + AttrValue enable_dump_debug = AttrValue(); + enable_dump_debug.set_b(true); + (*custom_config->mutable_parameter_map())["enable_dump_debug"] = enable_dump_debug; + options.session_options = &session_options; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); + + AttrValue dump_path = AttrValue(); + dump_path.set_s("/invalid"); + (*custom_config->mutable_parameter_map())["dump_path"] = dump_path; + s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); + + dump_path.set_s("/"); + (*custom_config->mutable_parameter_map())["dump_path"] = dump_path; + AttrValue dump_step = AttrValue(); + dump_step.set_s("777"); + (*custom_config->mutable_parameter_map())["dump_step"] = dump_step; + s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); +} } } // end tensorflow \ No newline at end of file diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_aoe_tuning_and_dynamic_dims.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_aoe_tuning_and_dynamic_dims.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..39c217cc85a7ba3433b61b58ba0b090e0a874470 --- /dev/null +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_aoe_tuning_and_dynamic_dims.pbtxt @@ -0,0 +1,696 @@ +node { + name: "arg_Placeholder_0_0" + op: "_Arg" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +node { + name: "retval_Add_1_0" + op: "_Retval" + input: "GeOp13_0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +node { + name: "GeOp13_0" + op: "GeOp" + input: "arg_Placeholder_0_0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + type: DT_FLOAT + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_FLOAT + } + } + } + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_auto_tune_mode" + value { + s: "" + } + } + attr { + key: "_buffer_optimize" + value { + s: "l2_optimize" + } + } + attr { + key: "_compress_weight_conf" + value { + s: "" + } + } + attr { + key: "_debug_dir" + value { + s: "" + } + } + attr { + key: "_distribute_config" + value { + s: "" + } + } + attr { + key: "_do_npu_optimizer" + value { + s: "1" + } + } + attr { + key: "_dump_debug_mode" + value { + s: "all" + } + } + attr { + key: "_dump_mode" + value { + s: "output" + } + } + attr { + key: "_dump_path" + value { + s: "" + } + } + attr { + key: "_dump_step" + value { + s: "" + } + } + attr { + key: "_dynamic_dims" + value { + s: "" + } + } + attr { + key: "_dynamic_graph_execute_mode" + value { + s: "dynamic_execute" + } + } + attr { + key: "_dynamic_input" + value { + s: "1" + } + } + attr { + key: "_dynamic_node_type" + value { + s: "" + } + } + attr { + key: "_enable_compress_weight" + value { + s: "0" + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "1" + } + } + attr { + key: "_enable_dump" + value { + s: "0" + } + } + attr { + key: "_enable_dump_debug" + value { + s: "0" + } + } + attr { + key: "_enable_exception_dump" + value { + s: "" + } + } + attr { + key: "_enable_scope_fusion_passes" + value { + s: "" + } + } + attr { + key: "_enable_small_channel" + value { + s: "0" + } + } + attr { + key: "_fusion_switch_file" + value { + s: "" + } + } + attr { + key: "_graph_run_mode" + value { + s: "1" + } + } + attr { + key: "_hcom_multi_mode" + value { + s: "" + } + } + attr { + key: "_hcom_parallel" + value { + s: "0" + } + } + attr { + key: "_in_out_pair" + value { + s: "" + } + } + attr { + key: "_in_out_pair_flag" + value { + s: "1" + } + } + attr { + key: "_input_shape" + value { + s: "" + } + } + attr { + key: "_is_tailing_optimization" + value { + s: "0" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "777" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_local_device_list" + value { + s: "" + } + } + attr { + key: "_local_rank_id" + value { + s: "-1" + } + } + attr { + key: "_lower_functional_ops" + value { + s: "0" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_aoe_mode" + value { + s: "2" + } + } + attr { + key: "_op_compiler_cache_dir" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_mode" + value { + s: "" + } + } + attr { + key: "_op_debug_level" + value { + s: "0" + } + } + attr { + key: "_op_select_implmode" + value { + s: "" + } + } + attr { + key: "_op_tune_mode" + value { + s: "" + } + } + attr { + key: "_optypelist_for_implmode" + value { + s: "" + } + } + attr { + key: "_precision_mode" + value { + s: "" + } + } + attr { + key: "_profiling_mode" + value { + s: "0" + } + } + attr { + key: "_profiling_options" + value { + s: "" + } + } + attr { + key: "_session_device_id" + value { + s: "" + } + } + attr { + key: "_stream_max_parallel_num" + value { + s: "" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_train_graph" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "_variable_format_optimize" + value { + s: "1" + } + } + attr { + key: "_work_path" + value { + s: "" + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "function" + value { + func { + name: "GeOp13_0" + } + } + } +} +library { + function { + signature { + name: "GeOp13_0" + input_arg { + name: "arg_Placeholder_0_0_0_arg" + type: DT_FLOAT + } + output_arg { + name: "Add_1_retval" + type: DT_FLOAT + } + } + node_def { + name: "add/x" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + dim { + size: 128 + } + } + tensor_content: "\321[{>\267\360\212=`F\033?\016Y>%\']>\333o\243>%\242L?{\312\253>}\\\270>\211\276\217>\370h\351>\262Lh?FR^?\255a\361>\326\214\334>\243\026\177?\262\324E>\2064\335>\262P\334=\030\317T?\230$\345>\376\216d?\21540?G\250\255=@\301z?)\317\237>\346\017\250m?\t9y?\210n]?c\260j>3\224\203>\261\324\006?\007t/>\354\251\321>\030\320q?\375\226\342>\344\020 ?\031\037n?o\333^=,S/?\363\373\217>\246mM?\336L\021?\026\257\324>\035\024\037?K\247\024?\202\217r?\3206\002?\r\026\326>\212\264\261=/e^?\376P\010?\260\2440?\222E\346>\223\010\311>\017t\007?;\324-?Y\230n?@\326\036?:,\364=H\022Z?n<\037>U\311I?f2\313>T\'\\?\310z\335>1\214M?\r\321\322>|;\344>\263\365U?\230\243 ?\251\3453?-\025\311>Ztu?\213\353\215>^T\021?|\311??(\301\255>\224\262\030?\243\361\322=\320\344\274>\212\001\203=\272\027\320>\005\254\354>Z\371-?\355\315\367>\2035)?w\227j?\355My?8c\347>\221\207\270>\215\306\343>(&\273>\320\034\363>\023I*>dyX?\024\365f?\313RP?J\226\001?\240|\'=\370\365:?\204\276c?\346\231\276>B\007H=\267N\243>_\273\310>\271\032\t?k\232%?\322\3517?J\220A?\356\240U?\310\2705?\347.\030?R\264\312>u4c;Ptb?G\3424?\343S,?O\311\020?\312JP?\347Z\020?\216\334n?" + } + } + } + } + node_def { + name: "strided_slice/stack" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "strided_slice/stack_1" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "IteratorV2" + op: "IteratorV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_dynamic_dims" + value { + s: "" + } + } + attr { + key: "_dynamic_node_type" + value { + s: "" + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "1" + } + } + attr { + key: "_input_shape" + value { + s: "" + } + } + attr { + key: "_is_train_graph" + value { + b: false + } + } + attr { + key: "_iterations_per_loop" + value { + s: "777" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + } + } + } + attr { + key: "shared_name" + value { + s: "IteratorV2" + } + } + } + node_def { + name: "add" + op: "AddV2" + input: "arg_Placeholder_0_0_0_arg" + input: "add/x:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "IteratorGetNext" + op: "IteratorGetNext" + input: "IteratorV2:handle:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + } + } + } + } + node_def { + name: "strided_slice" + op: "StridedSlice" + input: "IteratorGetNext:components:0" + input: "strided_slice/stack:output:0" + input: "strided_slice/stack_1:output:0" + input: "strided_slice/stack_1:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "Add_1" + op: "Add" + input: "add:z:0" + input: "strided_slice:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + ret { + key: "Add_1_retval" + value: "Add_1:z:0" + } + } +} +versions { + producer: 134 + min_consumer: 12 +} diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index 2645fee89813ee2bc3a61cec67759172e46af806..8df70b865256ec4e47e99f3d0fefe540371eacbb 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -153,6 +153,15 @@ TEST_F(GeOpTest, GeOpDynamicInput1Test) { EXPECT_TRUE(!attrs["_dynamic_input"].s().empty()); EXPECT_EQ(attrs["_dynamic_graph_execute_mode"].s() == "dynamic_execute", true); } +TEST_F(GeOpTest, GeOpAoeTuningAndDynamicDimsTest) { + setenv("PRINT_MODEL", "1", true); + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_aoe_tuning_and_dynamic_dims.pbtxt"; + Tensor a(DT_INT32, TensorShape({1,})); + gtl::InlinedVector inputs{TensorValue(&a)}; + setenv("ENABLE_FORCE_V2_CONTROL", "1", true); + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp13_0", false).ok()); +} TEST_F(GeOpTest, GeOpAoeTuningTest) { Env* env = Env::Default(); GraphDef graph_def; diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index 0f272a715e949d1ebbd09de44cc845471869b319..447f8dac04271f065a5298051111418f5bd93a76 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -1,6 +1,13 @@ #include "tf_adapter/util/npu_attrs.h" #include "gtest/gtest.h" #include +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" namespace tensorflow { Status CheckOpImplMode(const string &op_select_implmode); @@ -65,5 +72,35 @@ setenv("DUMP_GRAPH_PATH", "./dump_fold", 1); string new_path = GetDumpPath(); EXPECT_NE(new_path, "./dump_fold/"); } + +TEST_F(NpuAttrTest, SetNpuOptimizerAttrInvalidEnableDump) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options() + ->mutable_optimizer_options() + ->set_do_function_inlining(true); + auto *custom_config = session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + AttrValue enable_dump_debug = AttrValue(); + enable_dump_debug.set_b(true); + (*custom_config->mutable_parameter_map())["enable_dump_debug"] = enable_dump_debug; + options.session_options = &session_options; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); + + AttrValue dump_path = AttrValue(); + dump_path.set_s("/invalid"); + (*custom_config->mutable_parameter_map())["dump_path"] = dump_path; + s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); + + dump_path.set_s("/"); + (*custom_config->mutable_parameter_map())["dump_path"] = dump_path; + AttrValue dump_step = AttrValue(); + dump_step.set_s("777"); + (*custom_config->mutable_parameter_map())["dump_step"] = dump_step; + s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); +} } } // end tensorflow \ No newline at end of file diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 23b93672b4040f138083224d75ce156f4211d903..a64ef8c857bcfb54a216bbd16698d9d3325ce62f 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -1149,6 +1149,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("variable_format_optimize")) { variable_format_optimize = params.at("variable_format_optimize").b(); } + LOG(INFO) << "777777"; if (params.count("hcom_parallel")) { hcom_parallel = params.at("hcom_parallel").b(); } if (params.count("graph_memory_max_size")) { graph_memory_max_size = params.at("graph_memory_max_size").s(); } if (params.count("variable_memory_max_size")) { @@ -1161,12 +1162,12 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options string tmp_path = params.at("dump_path").s(); Status s = CheckPath(tmp_path, dump_path); if (!s.ok()) { - ADP_LOG(FATAL) << s.error_message(); - LOG(FATAL) << s.error_message(); + ADP_LOG(ERROR) << s.error_message(); + LOG(ERROR) << s.error_message(); } } else { - ADP_LOG(FATAL) << "if use dump function, dump_path must be set."; - LOG(FATAL) << "if use dump function, dump_path must be set."; + ADP_LOG(ERROR) << "if use dump function, dump_path must be set."; + LOG(ERROR) << "if use dump function, dump_path must be set."; } } if (enable_dump) { @@ -1174,8 +1175,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options dump_step = params.at("dump_step").s(); Status s = checkDumpStep(dump_step); if (!s.ok()) { - ADP_LOG(FATAL) << s.error_message(); - LOG(FATAL) << s.error_message(); + ADP_LOG(ERROR) << s.error_message(); + LOG(ERROR) << s.error_message(); } } if (params.count("dump_mode")) { @@ -1235,8 +1236,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (!aoe_mode.empty()) { Status s = CheckAoeMode(aoe_mode); if (!s.ok()) { - ADP_LOG(FATAL) << s.error_message(); - LOG(FATAL) << s.error_message(); + ADP_LOG(ERROR) << s.error_message(); + LOG(ERROR) << s.error_message(); + return errors::Internal(s.error_message()); } if (params.count("work_path")) { std::string tmp_path = params.at("work_path").s(); @@ -1472,6 +1474,10 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options pass_options["in_out_pair"] = in_out_pair; std::string attr_name; + if (!node) { + LOG(ERROR) << "node is null"; + return errors::Internal("node is null."); + } for (const auto &option : sess_options) { attr_name = std::string("_") + option.first; node->AddAttr(attr_name, option.second);