diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index f53502290ef4f194ea0e6bbe3c6ad57a4d12719d..6b1fc712712bc114d71e167aeb87ab9da2f95227 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -528,7 +528,8 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { bool is_lazy_recompile_mode = dynamic_input_ == "1" && dynamic_graph_execute_mode_ == "lazy_recompile"; if (is_set_dynamic_config && is_tuning) { ADP_LOG(FATAL) << "dynamic input config can not use with mstuning."; - LOG(FATAL) << "dynamic input config can not use with mstuning."; + OP_REQUIRES_ASYNC(ctx, false, errors::Internal("dynamic input config can not use with mstuning."), done); + return; } else if (is_set_dynamic_config && !is_tuning) { if (InitRebuildFlag(cache_graph_id) != 0) { OP_REQUIRES_ASYNC(ctx, false, errors::Internal("Failed to check rebuild flag"), done); @@ -731,7 +732,6 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { << ", graph id: " << cache_graph_id << std::endl << "Error Message is : " << std::endl << error_message; - LOG(FATAL) << ss.str(); OP_REQUIRES_ASYNC(ctx, status == ge::SUCCESS, errors::Unavailable(ss.str()), done); } else { add_graph_flag_ = true; @@ -781,21 +781,26 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { if (ge_status == ge::SUCCESS) { if (BuildOutputTensorInfo(ctx, outputs) != Status::OK()) { ADP_LOG(FATAL) << ctx->op_kernel().name() << " GEOP::DoRunAsync get output failed."; - LOG(FATAL) << ctx->op_kernel().name() << " GEOP::DoRunAsync get output failed."; + std::string error_message = ge::GEGetErrorMsg(); + std::stringstream ss; + ss << ctx->op_kernel().name() << "GEOP::DoRunAsync get output failed." << std::endl + << "Error Message is : " << std::endl + << error_message; + OP_REQUIRES_ASYNC(ctx, false, errors::Internal(ss.str()), done); } } else if (ge_status == ge::END_OF_SEQUENCE) { ctx->SetStatus(errors::OutOfRange("End of sequence")); ADP_LOG(WARNING) << "[GEOP] Out of range: End of sequence."; LOG(WARNING) << "[GEOP] Out of range: End of sequence."; } else if (ge_status != ge::SUCCESS) { - tensorflow::Status tfStatus = errors::Unavailable(ToString(ge_status)); - ctx->CtxFailureWithWarning(tfStatus); std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); ADP_LOG(FATAL) << ctx->op_kernel().name() << "GEOP::::DoRunAsync Failed"; std::string error_message = ge::GEGetErrorMsg(); - LOG(FATAL) << ctx->op_kernel().name() << "GEOP::::DoRunAsync Failed" << std::endl - << "Error Message is : " << std::endl - << error_message; + std::stringstream ss; + ss << ctx->op_kernel().name() << "GEOP::::DoRunAsync Failed" << std::endl + << "Error Message is : " << std::endl + << error_message; + OP_REQUIRES_ASYNC(ctx, false, errors::Internal(ss.str()), done); } int64 run_end_time = InferShapeUtil::GetCurrentTimestap(); ADP_LOG(INFO) << "[GEOP] RunGraphAsync callback, status:" << ge_status << ", kernel_name:" @@ -818,7 +823,6 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { << ", graph id: " << cache_graph_id << std::endl << "Error Message is : " << std::endl << error_message; - LOG(FATAL) << ss.str(); OP_REQUIRES_ASYNC(ctx, status == ge::SUCCESS, errors::Unavailable(ss.str()), done); } diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_error.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_error.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..647545d283d284754e2760b4e9e5598198e89433 --- /dev/null +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_error.pbtxt @@ -0,0 +1,527 @@ +node { + name: "retval_Add_0_0" + op: "_Retval" + input: "GeOp61_0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +node { + name: "GeOp61_0" + op: "GeOp" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_FLOAT + } + } + } + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_auto_tune_mode" + value { + s: "" + } + } + attr { + key: "_buffer_optimize" + value { + s: "l2_optimize" + } + } + attr { + key: "_compress_weight_conf" + value { + s: "" + } + } + attr { + key: "_debug_dir" + value { + s: "" + } + } + attr { + key: "_distribute_config" + value { + s: "" + } + } + attr { + key: "_do_npu_optimizer" + value { + s: "1" + } + } + attr { + key: "_dump_debug_mode" + value { + s: "all" + } + } + attr { + key: "_dump_mode" + value { + s: "output" + } + } + attr { + key: "_dump_path" + value { + s: "./" + } + } + attr { + key: "_dump_step" + value { + s: "1" + } + } + attr { + key: "_dynamic_dims" + value { + s: "" + } + } + attr { + key: "_dynamic_graph_execute_mode" + value { + s: "lazy_recompile" + } + } + attr { + key: "_dynamic_input" + value { + s: "1" + } + } + attr { + key: "_dynamic_node_type" + value { + s: "" + } + } + attr { + key: "_enable_compress_weight" + value { + s: "0" + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "0" + } + } + attr { + key: "_enable_dump" + value { + s: "1" + } + } + attr { + key: "_enable_dump_debug" + value { + s: "1" + } + } + attr { + key: "_enable_exception_dump" + value { + s: "" + } + } + attr { + key: "_enable_scope_fusion_passes" + value { + s: "" + } + } + attr { + key: "_enable_small_channel" + value { + s: "0" + } + } + attr { + key: "_fusion_switch_file" + value { + s: "" + } + } + attr { + key: "_graph_run_mode" + value { + s: "1" + } + } + attr { + key: "_hcom_multi_mode" + value { + s: "" + } + } + attr { + key: "_hcom_parallel" + value { + s: "0" + } + } + attr { + key: "_in_out_pair" + value { + s: "" + } + } + attr { + key: "_in_out_pair_flag" + value { + s: "1" + } + } + attr { + key: "_input_shape" + value { + s: "" + } + } + attr { + key: "_is_tailing_optimization" + value { + s: "0" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_local_device_list" + value { + s: "" + } + } + attr { + key: "_local_rank_id" + value { + s: "-1" + } + } + attr { + key: "_lower_functional_ops" + value { + s: "0" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_mstune_mode" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_dir" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_mode" + value { + s: "" + } + } + attr { + key: "_op_debug_level" + value { + s: "0" + } + } + attr { + key: "_op_select_implmode" + value { + s: "" + } + } + attr { + key: "_op_tune_mode" + value { + s: "" + } + } + attr { + key: "_optypelist_for_implmode" + value { + s: "" + } + } + attr { + key: "_precision_mode" + value { + s: "" + } + } + attr { + key: "_profiling_mode" + value { + s: "0" + } + } + attr { + key: "_profiling_options" + value { + s: "" + } + } + attr { + key: "_session_device_id" + value { + s: "" + } + } + attr { + key: "_stream_max_parallel_num" + value { + s: "" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "_variable_format_optimize" + value { + s: "1" + } + } + attr { + key: "_work_path" + value { + s: "/home/ascend" + } + } + attr { + key: "_aoe_mode" + value { + s: "4" + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "function" + value { + func { + name: "GeOp61_0" + } + } + } +} +library { + function { + signature { + name: "GeOp61_0" + output_arg { + name: "Add_0_retval" + type: DT_FLOAT + } + } + node_def { + name: "Const_1" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000 A\000\000 A" + } + } + } + } + node_def { + name: "Variable" + op: "VariableV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_class" + value { + list { + s: "loc:@Variable/read" + } + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } + } + node_def { + name: "Variable/read" + op: "Identity" + input: "Variable:ref:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + } + node_def { + name: "Add" + op: "Add" + input: "Const_1:output:0" + input: "Variable/read:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + ret { + key: "Add_0_retval" + value: "Add:z:0" + } + } +} +versions { + producer: 134 +} diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_run_error.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_run_error.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..181effa6150b921061a86db33a80579659ac0459 --- /dev/null +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_run_error.pbtxt @@ -0,0 +1,553 @@ +node { + name: "retval_Add_0_0" + op: "_Retval" + input: "GeOp51_0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +node { + name: "retval_Add_1_1" + op: "_Retval" + input: "GeOp51_0:1" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +node { + name: "GeOp51_0" + op: "GeOp" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_FLOAT + } + } + } + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_auto_tune_mode" + value { + s: "" + } + } + attr { + key: "_buffer_optimize" + value { + s: "l2_optimize" + } + } + attr { + key: "_compress_weight_conf" + value { + s: "" + } + } + attr { + key: "_debug_dir" + value { + s: "" + } + } + attr { + key: "_distribute_config" + value { + s: "" + } + } + attr { + key: "_do_npu_optimizer" + value { + s: "1" + } + } + attr { + key: "_dump_debug_mode" + value { + s: "all" + } + } + attr { + key: "_dump_mode" + value { + s: "output" + } + } + attr { + key: "_dump_path" + value { + s: "./" + } + } + attr { + key: "_dump_step" + value { + s: "1" + } + } + attr { + key: "_dynamic_dims" + value { + s: "" + } + } + attr { + key: "_dynamic_graph_execute_mode" + value { + s: "lazy_recompile" + } + } + attr { + key: "_dynamic_input" + value { + s: "0" + } + } + attr { + key: "_dynamic_node_type" + value { + s: "" + } + } + attr { + key: "_enable_compress_weight" + value { + s: "0" + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "0" + } + } + attr { + key: "_enable_dump" + value { + s: "1" + } + } + attr { + key: "_enable_dump_debug" + value { + s: "1" + } + } + attr { + key: "_enable_exception_dump" + value { + s: "" + } + } + attr { + key: "_enable_scope_fusion_passes" + value { + s: "" + } + } + attr { + key: "_enable_small_channel" + value { + s: "0" + } + } + attr { + key: "_fusion_switch_file" + value { + s: "" + } + } + attr { + key: "_graph_run_mode" + value { + s: "1" + } + } + attr { + key: "_hcom_multi_mode" + value { + s: "" + } + } + attr { + key: "_hcom_parallel" + value { + s: "0" + } + } + attr { + key: "_in_out_pair" + value { + s: "" + } + } + attr { + key: "_in_out_pair_flag" + value { + s: "1" + } + } + attr { + key: "_input_shape" + value { + s: "" + } + } + attr { + key: "_is_tailing_optimization" + value { + s: "0" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_local_device_list" + value { + s: "" + } + } + attr { + key: "_local_rank_id" + value { + s: "-1" + } + } + attr { + key: "_lower_functional_ops" + value { + s: "0" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_mstune_mode" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_dir" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_mode" + value { + s: "" + } + } + attr { + key: "_op_debug_level" + value { + s: "0" + } + } + attr { + key: "_op_select_implmode" + value { + s: "" + } + } + attr { + key: "_op_tune_mode" + value { + s: "" + } + } + attr { + key: "_optypelist_for_implmode" + value { + s: "" + } + } + attr { + key: "_precision_mode" + value { + s: "" + } + } + attr { + key: "_profiling_mode" + value { + s: "0" + } + } + attr { + key: "_profiling_options" + value { + s: "" + } + } + attr { + key: "_session_device_id" + value { + s: "" + } + } + attr { + key: "_stream_max_parallel_num" + value { + s: "" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "_variable_format_optimize" + value { + s: "1" + } + } + attr { + key: "_work_path" + value { + s: "/home/ascend" + } + } + attr { + key: "_aoe_mode" + value { + s: "4" + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "function" + value { + func { + name: "GeOp51_0" + } + } + } +} +library { + function { + signature { + name: "GeOp51_0" + output_arg { + name: "Add_0_retval" + type: DT_FLOAT + } + output_arg { + name: "Add_1_retval" + type: DT_FLOAT + } + } + node_def { + name: "Const_1" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000 A\000\000 A" + } + } + } + } + node_def { + name: "Variable" + op: "VariableV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_class" + value { + list { + s: "loc:@Variable/read" + } + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } + } + node_def { + name: "Variable/read" + op: "Identity" + input: "Variable:ref:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + } + node_def { + name: "Add" + op: "Add" + input: "Const_1:output:0" + input: "Variable/read:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + ret { + key: "Add_0_retval" + value: "Add:z:0" + } + ret { + key: "Add_1_retval" + value: "Add:z:0" + } + } +} +versions { + producer: 134 +} diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index 727b71d3e088d746cc0697840c4e30e6841e077c..d29417f3483d7f0c271e1fdb07e1d6531ea60c6c 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -105,6 +105,18 @@ TEST_F(GeOpTest, GeOpFuncTest) { gtl::InlinedVector inputs; EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); } +TEST_F(GeOpTest, GeOpError) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_error.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp51_0").ok()); +} +TEST_F(GeOpTest, GeOpRunError) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_run_error.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp61_0").ok()); +} TEST_F(GeOpTest, GeOpVarInitGraphTest) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_var_init_graph.pbtxt";