diff --git a/tf_adapter_2.x/npu_device/core/npu_device.cpp b/tf_adapter_2.x/npu_device/core/npu_device.cpp index 4c2852520dee887d7e07dc66f075731f6f884496..746cea2ebbd9c269a94a441c6f46eed1eeff03d6 100644 --- a/tf_adapter_2.x/npu_device/core/npu_device.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_device.cpp @@ -80,7 +80,7 @@ void NpuDevice::CreateIteratorProvider(TFE_Context *context, const tensorflow::T NPU_CTX_REQUIRES_OK(status, GetMirroredIteratorShapesAndTypes(resource, shapes, types)); auto dp_provider = npu::IteratorResourceProvider::GetFunctionDef(resource.name(), std::move(device_ids), shapes, types, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); tensorflow::FunctionLibraryDefinition *lib_def = npu::UnwrapCtx(context)->FuncLibDef(); NPU_CTX_REQUIRES_OK(status, lib_def->AddFunctionDef(dp_provider)); @@ -193,25 +193,39 @@ void NpuDevice::DeleteDevice(void *device) { delete npu_device; } +bool NpuDevice::SupportedInputType(tensorflow::DataType data_type) const { + return tensorflow::DataTypeCanUseMemcpy(data_type); +} + +bool NpuDevice::SupportedOutputType(tensorflow::DataType data_type) const { + return tensorflow::DataTypeCanUseMemcpy(data_type); +} + tensorflow::Status NpuDevice::ValidateOutputTypes(const TensorDataTypes &data_types) const { + std::stringstream ss; for (size_t i = 0; i < data_types.size(); i++) { auto data_type = data_types[i]; - if ((data_type != tensorflow::DT_RESOURCE) && (!tensorflow::DataTypeCanUseMemcpy(data_type))) { - return tensorflow::errors::Unimplemented("Output ", i, " unsupported type ", - tensorflow::DataTypeString(data_type)); + if (!SupportedOutputType(data_type)) { + ss << "Output " << i << " unsupported type " << tensorflow::DataTypeString(data_type) << std::endl; } } + if (!ss.str().empty()) { + return tensorflow::errors::Unimplemented(ss.str()); + } return tensorflow::Status::OK(); } tensorflow::Status NpuDevice::ValidateInputTypes(const TensorDataTypes &data_types) const { + std::stringstream ss; for (size_t i = 0; i < data_types.size(); i++) { auto data_type = data_types[i]; - if ((data_type != tensorflow::DT_RESOURCE) && (!tensorflow::DataTypeCanUseMemcpy(data_type))) { - return tensorflow::errors::Unimplemented("Input ", i, " unsupported type ", - tensorflow::DataTypeString(data_type)); + if (!SupportedInputType(data_type)) { + ss << "Input " << i << " unsupported type " << tensorflow::DataTypeString(data_type) << std::endl; } } + if (!ss.str().empty()) { + return tensorflow::errors::Unimplemented(ss.str()); + } return tensorflow::Status::OK(); } /** @@ -312,7 +326,9 @@ TFE_TensorHandle *NpuDevice::CopyTensorH2D(TFE_Context *context, TFE_TensorHandl scope_handle_deleter.Guard(local_handle); } - if (TF_GetCode(status) != TF_OK) { return nullptr; } + if (TF_GetCode(status) != TF_OK) { + return nullptr; + } const tensorflow::Tensor *local_tensor = nullptr; NPU_CTX_REQUIRES_OK_RETURN(status, npu::GetTensorHandleTensor(local_handle, &local_tensor), nullptr); if (local_tensor->dtype() == tensorflow::DT_RESOURCE) { @@ -324,7 +340,9 @@ TFE_TensorHandle *NpuDevice::CopyTensorH2D(TFE_Context *context, TFE_TensorHandl TFE_TensorHandle *npu_handle = NewDeviceTensorHandle(context, fmt, local_tensor->shape(), local_tensor->dtype(), status); - if (TF_GetCode(status) != TF_OK) { return nullptr; } + if (TF_GetCode(status) != TF_OK) { + return nullptr; + } const tensorflow::Tensor *npu_tensor = nullptr; NPU_CTX_REQUIRES_OK_RETURN(status, npu::GetTensorHandleTensor(npu_handle, &npu_tensor), nullptr); @@ -501,7 +519,7 @@ void NpuDevice::GetOrCreateOpExecutor(TFE_Context *context, const char *op_name, } DLOG() << "No cached op executor for " << op_name << ", start create and cache"; *spec = OpExecutor::Create(context, this, ndef, num_inputs, inputs, s); - if (TF_GetCode(s) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(s); DLOG() << "Cache " << (*spec)->Type() << " op_executor for " << ndef.DebugString() << std::endl << (*spec)->DebugString(); CacheOpExecutor(*spec); @@ -511,7 +529,7 @@ void NpuDevice::FallbackCPU(TFE_Context *context, const char *op_name, const TFE TFE_TensorHandle **inputs, int num_outputs, TFE_TensorHandle **outputs, TF_Status *status) { DLOG() << "Start fallback executing " << op_name << " by " << underlying_device; TFE_Op *op(TFE_NewOp(context, op_name, status)); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); TFE_OpAddAttrs(op, attributes); TFE_OpSetDevice(op, underlying_device.c_str(), status); ScopeTensorHandleDeleter scope_handle_deleter; @@ -520,7 +538,7 @@ void NpuDevice::FallbackCPU(TFE_Context *context, const char *op_name, const TFE if (npu::IsNpuTensorHandle(input)) { input = CopyTensorD2H(context, input, status); // 创建完成计数为1 scope_handle_deleter.Guard(input); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); } if (kDumpExecutionDetail) { const tensorflow::Tensor *tensor = nullptr; @@ -528,13 +546,13 @@ void NpuDevice::FallbackCPU(TFE_Context *context, const char *op_name, const TFE LOG(INFO) << " input " << j << " " << tensor->DebugString(); } TFE_OpAddInput(op, input, status); // add完成计数为2 - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); } std::vector op_outputs(num_outputs); TFE_Execute(op, op_outputs.data(), &num_outputs, status); TFE_DeleteOp(op); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); for (int i = 0; i < num_outputs; ++i) { outputs[i] = op_outputs[i]; } @@ -570,25 +588,26 @@ void NpuDevice::FallbackCPU(TFE_Context *context, const tensorflow::NodeDef &nde */ void NpuDevice::Execute(const TFE_Op *op, int num_outputs, TFE_TensorHandle **outputs, TF_Status *s) { auto context = TFE_OpGetContext(op, s); - if (TF_GetCode(s) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(s); auto num_inputs = TFE_OpGetFlatInputCount(op, s); - if (TF_GetCode(s) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(s); std::vector inputs; for (int i = 0; i < num_inputs; i++) { inputs.push_back(TFE_OpGetFlatInput(op, i, s)); - if (TF_GetCode(s) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(s); } auto op_name = TFE_OpGetName(op, s); - if (TF_GetCode(s) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(s); auto attributes = TFE_OpGetAttrs(op); DLOG() << "NPU Start executing " << op_name; std::shared_ptr spec; GetOrCreateOpExecutor(context, op_name, attributes, inputs.size(), inputs.data(), &spec, s); - if (TF_GetCode(s) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(s); + spec->Run(context, this, num_inputs, inputs.data(), num_outputs, outputs, s); } @@ -656,7 +675,7 @@ void NpuDevice::SetNpuLoopSize(TFE_Context *context, int64_t loop, TF_Status *st RunGeGraphPin2CpuAnonymous(context, "set_npu_loop_conditions", graph.ToGraphDefDebug(), 0, nullptr, 0, nullptr, status); - if (TF_GetCode(status) != TF_OK) return; + NPU_REQUIRES_TFE_OK(status); tensorflow::Node *variable; tensorflow::Node *arg; @@ -691,11 +710,11 @@ void NpuDevice::SetNpuLoopSize(TFE_Context *context, int64_t loop, TF_Status *st loop_var_graph_id = AddGeGraph(context, "set_loop_var", graph2.ToGraphDefDebug(), status); init_status = status->status; - if (TF_GetCode(status) != TF_OK) return; + NPU_REQUIRES_TFE_OK(status); } status->status = init_status; - if (TF_GetCode(status) != TF_OK) return; + NPU_REQUIRES_TFE_OK(status); std::vector inputs(1); inputs[0] = @@ -963,7 +982,7 @@ void NpuDevice::RunGeGraph(TFE_Context *context, uint64_t graph_id, int num_inpu notification.Notify(); }; RunGeGraphAsync(context, graph_id, num_inputs, inputs, pin_to_npu, output_types, num_outputs, outputs, done, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); notification.WaitForNotification(); } @@ -1055,7 +1074,7 @@ void NpuDevice::RunGeGraphAnonymous(TFE_Context *context, const std::string &nam int num_inputs, TFE_TensorHandle **inputs, bool pin_to_npu, int num_outputs, TFE_TensorHandle **outputs, TF_Status *status) { uint64_t graph_id = AddGeGraph(context, name, gdef, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); std::map indexed_types; @@ -1074,10 +1093,10 @@ void NpuDevice::RunGeGraphAnonymous(TFE_Context *context, const std::string &nam } RunGeGraph(context, graph_id, num_inputs, inputs, pin_to_npu, types, num_outputs, outputs, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); RemoveGeGraph(context, graph_id, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); } /** diff --git a/tf_adapter_2.x/npu_device/core/npu_device.h b/tf_adapter_2.x/npu_device/core/npu_device.h index a2ff2daf1ca43315aa566eac0538a0d923fdb1d5..7865004b37d16a423c52f34a6b29a7bcca7a0ab1 100644 --- a/tf_adapter_2.x/npu_device/core/npu_device.h +++ b/tf_adapter_2.x/npu_device/core/npu_device.h @@ -61,6 +61,9 @@ class NpuDevice { tensorflow::Status ValidateOutputTypes(const TensorDataTypes &data_types) const; tensorflow::Status ValidateInputTypes(const TensorDataTypes &data_types) const; + bool SupportedInputType(tensorflow::DataType data_type) const; + bool SupportedOutputType(tensorflow::DataType data_type) const; + TFE_TensorHandle *NewDeviceTensorHandle(TFE_Context *context, ge::Format fmt, const tensorflow::TensorShape &shape, tensorflow::DataType type, TF_Status *status); diff --git a/tf_adapter_2.x/npu_device/core/npu_device_register.cpp b/tf_adapter_2.x/npu_device/core/npu_device_register.cpp index ffcc33a2ed4ab989da403b354ef36ecd9d10d36a..56d3777be931082b03d8e6b3c455935246e07583 100644 --- a/tf_adapter_2.x/npu_device/core/npu_device_register.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_device_register.cpp @@ -34,7 +34,9 @@ TFE_TensorHandle *CopyTensorToNpuDevice(TFE_Context *context, TFE_TensorHandle * LOG(INFO) << "[CopyTensorToNpuDevice] Copy tensor from " << tensorflow::unwrap(tensor)->DeviceName(&tf_status) << " to " << dev->device_name; TFE_TensorHandle *npu_handle = dev->CopyTensorH2D(context, tensor, status); - if (TF_GetCode(status) != TF_OK) return nullptr; + if (TF_GetCode(status) != TF_OK) { + return nullptr; + } return npu_handle; } @@ -45,9 +47,13 @@ TFE_TensorHandle *CopyTensorFromNpuDevice(TFE_Context *context, TFE_TensorHandle // 输入的TensorHandle是NPU的,应当先进行NPU->CPU的传输,再调用TFE_TensorHandleCopyToDevice防止可能的NPU->GPU传输 // 一旦Copy动作发生,需要进行stream同步。如果是NPU->NPU的拷贝(理论上不应该发生),可以不同步。 TFE_TensorHandle *local_tensor = dev->CopyTensorD2H(context, tensor, status); - if (TF_GetCode(status) != TF_OK) return nullptr; + if (TF_GetCode(status) != TF_OK) { + return nullptr; + } TFE_TensorHandle *target_tensor = TFE_TensorHandleCopyToDevice(local_tensor, context, target_device_name, status); - if (TF_GetCode(status) != TF_OK) return nullptr; + if (TF_GetCode(status) != TF_OK) { + return nullptr; + } TFE_DeleteTensorHandle(local_tensor); return target_tensor; diff --git a/tf_adapter_2.x/npu_device/core/npu_micros.h b/tf_adapter_2.x/npu_device/core/npu_micros.h index ad8d7afc2123590585c5a27b30a718cdab172ac9..cfeeb4ee06241189296b84df1d92f48809a685ce 100644 --- a/tf_adapter_2.x/npu_device/core/npu_micros.h +++ b/tf_adapter_2.x/npu_device/core/npu_micros.h @@ -19,9 +19,9 @@ #define NPU_CTX_REQUIRES_OK(CTX, ...) \ do { \ - (CTX)->status = (__VA_ARGS__); \ + (CTX)->status = (__VA_ARGS__); \ if (TF_PREDICT_FALSE(!CTX->status.ok())) { \ - LOG(ERROR) << (CTX)->status.ToString(); \ + LOG(ERROR) << (CTX)->status.ToString(); \ return; \ } \ } while (0) @@ -113,6 +113,13 @@ if (TF_PREDICT_FALSE(!_status.ok())) LOG(ERROR) << _status.ToString(); \ } while (0) +#define NPU_REQUIRES_TFE_OK(STATUS) \ + do { \ + if (TF_GetCode(STATUS) != TF_OK) { \ + return; \ + } \ + } while (0) + #define HANDLE_ALL_FORMAT() \ HANDLE_FORMAT(Nd) \ HANDLE_FORMAT(Nchw) \ diff --git a/tf_adapter_2.x/npu_device/core/npu_op_executor.cpp b/tf_adapter_2.x/npu_device/core/npu_op_executor.cpp index a9f65d685354413f706f3a40289bc76a5a2fee78..e0f63675dc5154d619c753fefc7dc75b6638c5e1 100644 --- a/tf_adapter_2.x/npu_device/core/npu_op_executor.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_op_executor.cpp @@ -50,7 +50,9 @@ std::shared_ptr OpExecutor::Create(TFE_Context *context, NpuDevice * if (op_reg_data->is_function_op) { std::unique_ptr concrete_graph; device->GetConcreteGraph(context, ndef, num_inputs, inputs, &concrete_graph, s); - if (TF_GetCode(s) != TF_OK) { return nullptr; } + if (TF_GetCode(s) != TF_OK) { + return nullptr; + } return concrete_graph; } @@ -65,19 +67,10 @@ std::shared_ptr OpExecutor::Create(TFE_Context *context, NpuDevice * NPU_CTX_REQUIRES_OK_RETURN(s, tensorflow::InOutTypesForNode(ndef, op_reg_data->op_def, &input_types, &output_types), nullptr); - tensorflow::Status status; - if (!(status = device->ValidateOutputTypes(output_types)).ok()) { - return std::make_shared(op_reg_data, ndef, input_shapes, status.error_message()); - } - if (!device->Supported(op_name)) { return std::make_shared(op_reg_data, ndef, input_shapes, "Op unsupported by NPU"); } - if (!(status = device->ValidateInputTypes(input_types)).ok()) { - return std::make_shared(op_reg_data, ndef, input_shapes, status.error_message()); - } - for (auto type : input_types) { if (type == tensorflow::DT_RESOURCE) { return std::make_shared(op_reg_data, ndef, input_shapes); @@ -90,6 +83,15 @@ std::shared_ptr OpExecutor::Create(TFE_Context *context, NpuDevice * } } + tensorflow::Status status; + if (!(status = device->ValidateOutputTypes(output_types)).ok()) { + return std::make_shared(op_reg_data, ndef, input_shapes, status.error_message()); + } + + if (!(status = device->ValidateInputTypes(input_types)).ok()) { + return std::make_shared(op_reg_data, ndef, input_shapes, status.error_message()); + } + if (op_reg_data->shape_inference_fn == nullptr) { return std::make_shared(op_reg_data, ndef, input_shapes, "No infer shape function registered"); } diff --git a/tf_adapter_2.x/npu_device/core/npu_utils.cpp b/tf_adapter_2.x/npu_device/core/npu_utils.cpp index 96eb324c85d7ad226c77877e89998a72097478fe..df5c34a34b8837ef343dd4cf857370abf44b3013 100644 --- a/tf_adapter_2.x/npu_device/core/npu_utils.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_utils.cpp @@ -191,7 +191,9 @@ OptimizeStageGraphDumper::OptimizeStageGraphDumper(const std::string &graph) { } void OptimizeStageGraphDumper::Dump(const std::string &stage, const tensorflow::GraphDef &graph_def) { - if (!enabled_) { return; } + if (!enabled_) { + return; + } std::string graph_name = tensorflow::strings::StrCat(graph_, ".", counter_++, ".", stage, ".pbtxt"); DLOG() << "Dump graph " << graph_name; WriteTextProto(tensorflow::Env::Default(), graph_name, graph_def); @@ -199,7 +201,9 @@ void OptimizeStageGraphDumper::Dump(const std::string &stage, const tensorflow:: void OptimizeStageGraphDumper::DumpWithSubGraphs(const std::string &stage, const tensorflow::GraphDef &graph_def, const tensorflow::FunctionLibraryDefinition *lib_def) { - if (!enabled_) { return; } + if (!enabled_) { + return; + } tensorflow::GraphDef copied_graph_def = graph_def; *copied_graph_def.mutable_library() = CollectGraphSubGraphs(graph_def, lib_def); Dump(stage, copied_graph_def); diff --git a/tf_adapter_2.x/npu_device/core/op_executors/hooks/make_iterator_op.cpp b/tf_adapter_2.x/npu_device/core/op_executors/hooks/make_iterator_op.cpp index 54844640f6bd728f8343841e4dcfffe5b616c71d..cb95d879ff5960e83ff10cb8b543e72fce6c4ec4 100644 --- a/tf_adapter_2.x/npu_device/core/op_executors/hooks/make_iterator_op.cpp +++ b/tf_adapter_2.x/npu_device/core/op_executors/hooks/make_iterator_op.cpp @@ -107,7 +107,7 @@ static auto kernel = [](TFE_Context *context, NpuDevice *dev, const tensorflow:: TensorDataTypes types; NPU_CTX_REQUIRES_OK(status, dev->GetMirroredIteratorShapesAndTypes(handle, shapes, types)); auto dp_init_graph = MakeIteratorGraphBuilder::GetGraph(handle.container(), handle.name(), shapes, types, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); if (kDumpExecutionDetail || kDumpGraph) { std::string file_name = "dp_init_" + handle.name() + ".pbtxt"; DLOG() << "NPU Dump mirrored resource init graph to: " << file_name; @@ -115,10 +115,10 @@ static auto kernel = [](TFE_Context *context, NpuDevice *dev, const tensorflow:: } dev->RunGeGraphPin2CpuAnonymous(context, "dp_init_" + handle.name(), dp_init_graph, num_inputs, inputs, 0, nullptr, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); // 针对推荐网络,Provider需要支持1对N的传输,默认只向资源所处的Device发送 dev->CreateIteratorProvider(context, tensor, {dev->device_id}, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); } } }; diff --git a/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp b/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp index 232162a58a8c1f6b3584a6816ff957dd6f542834..4526f28b34a7b21f8ded86b30698e88164e417a5 100644 --- a/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp +++ b/tf_adapter_2.x/npu_device/core/op_executors/npu_concrete_graph.cpp @@ -86,7 +86,7 @@ void NpuConcreteGraph::RunImpl(TFE_Context *context, NpuDevice *device, int tf_n // 这里需要根据算子选择输入格式了 input = device->CopyTensorD2H(context, input, status); scope_handle_deleter.Guard(input); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); } input_handles_[i] = input; } @@ -96,7 +96,7 @@ void NpuConcreteGraph::RunImpl(TFE_Context *context, NpuDevice *device, int tf_n if (NeedLoop()) { iterations_per_loop = npu::global::g_npu_loop_size; device->SetNpuLoopSize(context, iterations_per_loop, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); } int64_t consume_resource_times = 1; @@ -146,7 +146,7 @@ void NpuConcreteGraph::Load(TFE_Context *context, NpuDevice *device, TF_Status * if (Built() && device->GeSession()->IsGraphNeedRebuild(GeGraphId())) { LOG(INFO) << "Unload ge graph " << GeGraphId() << " for rebuild of op " << Op(); device->RemoveGeGraph(context, GeGraphId(), status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); built_ = false; } @@ -155,17 +155,19 @@ void NpuConcreteGraph::Load(TFE_Context *context, NpuDevice *device, TF_Status * if (kEmptyGeGraphId == device->AddGeGraphInner(context, GeGraphId(), Op(), GraphDef(), NeedLoop(), status)) { empty_ge_graph_ = true; } - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); built_ = true; graph_def_serialized_ = true; } } void NpuConcreteGraph::UnLoad(TFE_Context *context, NpuDevice *device, TF_Status *status) const { - if (!Built()) { return; } + if (!Built()) { + return; + } DLOG() << "Unload ge graph " << GeGraphId() << " of op " << Op(); device->RemoveGeGraph(context, GeGraphId(), status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); built_ = false; } @@ -173,14 +175,14 @@ void NpuConcreteGraph::RunOneShot(TFE_Context *context, NpuDevice *device, int n int num_outputs, TFE_TensorHandle **outputs, TF_Status *status) const { DLOG() << "Run one shot ge graph " << GeGraphId() << " for resource consume op " << Op(); RunImpl(context, device, num_inputs, inputs, num_outputs, outputs, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); UnLoad(context, device, status); } tensorflow::Status NpuMutableConcreteGraph::DevicePartition(TFE_Context *context, NpuDevice *device) { tensorflow::Status input_supported = device->ValidateInputTypes(ConsumedTypes()); tensorflow::Status output_supported = device->ValidateOutputTypes(ProducedTypes()); - if (!CpuResources().empty() || !input_supported.ok() || !output_supported.ok()) { + if (!input_supported.ok() || !output_supported.ok()) { if (!NpuResources().empty()) { SetExecutionType(ExecutionType::MIX); std::stringstream ss; @@ -256,7 +258,9 @@ tensorflow::Status NpuMutableConcreteGraph::TryTransToNpuLoopGraph(TFE_Context * // Inline body function will change name of variable, which used as id for npu variable for (auto node : graph->op_nodes()) { - if (!tensorflow::grappler::IsVariable(node->def())) { continue; } + if (!tensorflow::grappler::IsVariable(node->def())) { + continue; + } auto attr = node->attrs().Find("shared_name"); if (attr != nullptr) { DLOG() << "Change variable " << node->name() << " " << node->type_string() << " name to " << attr->s(); diff --git a/tf_adapter_2.x/npu_device/core/op_executors/npu_mirrored_op.cpp b/tf_adapter_2.x/npu_device/core/op_executors/npu_mirrored_op.cpp index 2056fbd29d86d753a73305787a371d989e775b6a..b25733430f89f93e2ba10c49445976f398bf9b41 100644 --- a/tf_adapter_2.x/npu_device/core/op_executors/npu_mirrored_op.cpp +++ b/tf_adapter_2.x/npu_device/core/op_executors/npu_mirrored_op.cpp @@ -34,7 +34,7 @@ void NpuMirroredOp::RunImpl(TFE_Context *context, NpuDevice *device, int num_inp int num_outputs, TFE_TensorHandle **outputs, TF_Status *status) const { NPU_CTX_REQUIRES(status, custom_kernel_ != nullptr, tensorflow::errors::Internal(Op(), " hook func is nullptr")); device->FallbackCPU(context, NodeDef(), num_inputs, inputs, num_outputs, outputs, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); custom_kernel_(context, device, NodeDef(), num_inputs, inputs, num_outputs, outputs, status); } } // namespace npu diff --git a/tf_adapter_2.x/npu_device/core/op_executors/npu_resource_generator_op.cpp b/tf_adapter_2.x/npu_device/core/op_executors/npu_resource_generator_op.cpp index a93dc2ab6c490983440ff7f1890d3e8fc6253177..36667c8e410fb55799ce3ebcc40e7838ff437cf6 100644 --- a/tf_adapter_2.x/npu_device/core/op_executors/npu_resource_generator_op.cpp +++ b/tf_adapter_2.x/npu_device/core/op_executors/npu_resource_generator_op.cpp @@ -39,12 +39,12 @@ void NpuResourceGeneratorOp::RunImpl(TFE_Context *context, NpuDevice *device, in } outputs[0] = device->NewDeviceResourceHandle(context, kScalarShape, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); npu::ScopeTensorHandleDeleter scope_handle_deleter; TFE_TensorHandle *cpu_output = nullptr; device->FallbackCPU(context, NodeDef(), num_inputs, inputs, num_outputs, &cpu_output, status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); scope_handle_deleter.Guard(cpu_output); const tensorflow::Tensor *cpu_tensor = nullptr; diff --git a/tf_adapter_2.x/npu_device/core/op_executors/npu_static_shape_op.cpp b/tf_adapter_2.x/npu_device/core/op_executors/npu_static_shape_op.cpp index 94df204f69d7ecc0051e9dd08650c0dc4936e8f2..66fd1d64922a2088ea767c5481d75acea11ffba1 100644 --- a/tf_adapter_2.x/npu_device/core/op_executors/npu_static_shape_op.cpp +++ b/tf_adapter_2.x/npu_device/core/op_executors/npu_static_shape_op.cpp @@ -65,14 +65,14 @@ void NpuStaticShapeOp::RunWithShape(TFE_Context *context, NpuDevice *device, con // 这里需要根据算子选择输入格式了 input = device->CopyTensorH2D(context, input, Format::FORMAT_ND, status); scope_handle_deleter.Guard(input); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); } npu_inputs[i] = input; } const auto &output_types = spec->OutputTypes(); for (size_t i = 0; i < output_types.size(); ++i) { outputs[i] = device->NewDeviceTensorHandle(context, Format::FORMAT_ND, output_shapes[i], output_types[i], status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); } /******************************************模拟NPU执行Start************************************/ std::vector acl_inputs(num_inputs); @@ -89,12 +89,12 @@ void NpuStaticShapeOp::RunWithShape(TFE_Context *context, NpuDevice *device, con } acl_inputs[i] = tensorflow::wrap(tensorflow::TensorHandle::CreateLocalHandle(cpu_tensor)); scope_handle_deleter.Guard(acl_inputs[i]); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); } /**********调用CPU模拟NPU Start*************/ std::vector acl_outputs(num_outputs); device->FallbackCPU(context, spec->NodeDef(), num_inputs, acl_inputs.data(), num_outputs, acl_outputs.data(), status); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); /**********调用CPU模拟NPU End***************/ for (int i = 0; i < num_outputs; ++i) { const tensorflow::Tensor *acl_tensor = nullptr; @@ -103,7 +103,7 @@ void NpuStaticShapeOp::RunWithShape(TFE_Context *context, NpuDevice *device, con NPU_CTX_REQUIRES_OK(status, npu::GetTensorHandleTensor(outputs[i], &npu_tensor)); NPU_CTX_REQUIRES_OK(status, npu::Unwrap(npu_tensor)->AssembleFrom(acl_tensor)); TFE_DeleteTensorHandle(acl_outputs[i]); - if (TF_GetCode(status) != TF_OK) { return; } + NPU_REQUIRES_TFE_OK(status); } /******************************************模拟NPU执行End************************************/ DLOG() << "NPU Executing op " << spec->Op() << " succeed by npu executor"; diff --git a/tf_adapter_2.x/npu_device/core/optimizers/meta/npu_control_edge_optimizer.cpp b/tf_adapter_2.x/npu_device/core/optimizers/meta/npu_control_edge_optimizer.cpp index 8fab2b5250f1bbb09d6f737a5962c8932bcc6197..35a3763ef38a501665261b4ad3a181d4f4e06e9b 100644 --- a/tf_adapter_2.x/npu_device/core/optimizers/meta/npu_control_edge_optimizer.cpp +++ b/tf_adapter_2.x/npu_device/core/optimizers/meta/npu_control_edge_optimizer.cpp @@ -26,7 +26,9 @@ const static std::string kNpuGetFloatStatusOp = "NpuGetFloatStatus"; namespace { bool IsFirstDropoutNode(const tensorflow::Node *node) { - if (node->type_string() != kDropOutGenMaskV3) { return false; } + if (node->type_string() != kDropOutGenMaskV3) { + return false; + } for (const auto edge : node->in_edges()) { if (edge->IsControlEdge() && edge->src()->type_string() == kDropOutDoMaskV3) { return false; @@ -84,7 +86,9 @@ void FineTuneDropoutControlEdge(tensorflow::Graph *graph, tensorflow::Node *firs } bool IsEdgeRedundant(const tensorflow::Edge *edge) { - if (!edge->IsControlEdge()) { return false; } + if (!edge->IsControlEdge()) { + return false; + } const std::string &src = edge->src()->type_string(); const std::string &dst = edge->dst()->type_string(); if ((dst == kHcomAllReduce && src != kNpuGetFloatStatusOp) || @@ -126,7 +130,9 @@ tensorflow::Status ControlEdgeOptimizeInner(TFE_Context *context, tensorflow::Gr bool function_optimized = false; NPU_REQUIRES_OK(ControlEdgeOptimizeInner(context, fbody->graph, function_optimized)); - if (!function_optimized) { continue; } + if (!function_optimized) { + continue; + } any_subgraph_optimized = true; tensorflow::FunctionDef optimized_fdef; diff --git a/tf_adapter_2.x/npu_device/core/optimizers/runtime/npu_trans_resource_input_to_node_optimizer.cpp b/tf_adapter_2.x/npu_device/core/optimizers/runtime/npu_trans_resource_input_to_node_optimizer.cpp index cc16ee9d98f4936eddca3b901e07c49395b7a2d5..df8c95b2486767e8282712053417a81e575c6a0e 100644 --- a/tf_adapter_2.x/npu_device/core/optimizers/runtime/npu_trans_resource_input_to_node_optimizer.cpp +++ b/tf_adapter_2.x/npu_device/core/optimizers/runtime/npu_trans_resource_input_to_node_optimizer.cpp @@ -141,7 +141,9 @@ tensorflow::Status TransWhileNode(TFE_Context *context, tensorflow::Graph *graph NPU_REQUIRES_OK(status); for (auto edge : node->in_edges()) { - if (IsSubstituteNode(edge->src())) { continue; } + if (IsSubstituteNode(edge->src())) { + continue; + } if (edge->IsControlEdge()) { DLOG() << "Add ctrl edge from " << edge->src()->name() << " to " << pruned_node->name(); graph->AddControlEdge(edge->src(), pruned_node); @@ -226,7 +228,9 @@ tensorflow::Status TransHasSubgraphNode(TFE_Context *context, tensorflow::Graph NPU_REQUIRES_OK(status); for (auto edge : node->in_edges()) { - if (IsSubstituteNode(edge->src())) { continue; } + if (IsSubstituteNode(edge->src())) { + continue; + } if (edge->IsControlEdge()) { DLOG() << "Add ctrl edge from " << edge->src()->name() << " to " << pruned_node->name(); graph->AddControlEdge(edge->src(), pruned_node); @@ -319,7 +323,9 @@ tensorflow::Status TransResourceInput2NodeOptimize(TFE_Context *context, NpuMuta std::map bypass_outputs; std::map indexed_retvals; for (auto node : mutable_graph->op_nodes()) { - if (!node->IsRetval()) { continue; } + if (!node->IsRetval()) { + continue; + } indexed_retvals[node->attrs().Find("index")->i()] = node; } @@ -335,7 +341,9 @@ tensorflow::Status TransResourceInput2NodeOptimize(TFE_Context *context, NpuMuta PruneGraphByFunctionSignature(*fdef, mutable_graph); for (auto node : mutable_graph->op_nodes()) { - if (!node->IsArg()) { continue; } + if (!node->IsArg()) { + continue; + } auto index = node->attrs().Find("index")->i(); const tensorflow::Tensor *tensor = nullptr;