diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 4ab462178acc07f4a84dc11ac690598214ae461a..c71357a44286923561984b8dbe62dbebdf3447f8 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -104,6 +104,10 @@ const float kMaxStepRatio = 0.9; const float kDefaultLossRatio = 1.05; const float kMinLossRatio = 1.01; const float kMaxLossRatio = 1.5; +const static int64_t kStringTypeDepth = 64LL; +const int64_t kUnknownShapeDepth = 3LL; +const size_t kMaxDepth = 128UL; +const int64_t kMaxBytes = 2 * 1024 * 1024 * 1024LL; const std::map fast_value_string_2_eunm = {{"fast", GeOp::FastValue::kfast}, {"fast1", GeOp::FastValue::kfast1}}; @@ -1452,8 +1456,58 @@ void GeOp::AddNodeAttrs(Node *node, bool &is_initialize) { } } +bool GeOp::IsUnknownShape(const PartialTensorShape &output_shapes) const { + if (output_shapes.unknown_rank()) { + return true; + } + for (int32_t i = 0; i < output_shapes.dims(); i++) { + if (output_shapes.dim_size(i) == -1) { + return true; + } + } + return false; +} + +int64_t GeOp::GetTensorElementNum(const vector &output_shapes, size_t index) { + PartialTensorShape tensor_shape = output_shapes[index]; + int64_t element_number = 1LL; + for (int32_t i = 0; i < tensor_shape.dims(); i++) { + element_number *= tensor_shape.dim_size(i); + } + return element_number; +} + +int64_t GeOp::GetChannelDepth(const vector &output_shapes, const vector &output_types) { + size_t output_shape_size = output_shapes.size(); + size_t output_type_size = output_types.size(); + if (output_shape_size != output_type_size) { + ADP_LOG(INFO) << "Output_shape_size : " << output_shape_size << "is not equal to output_type_size : " + << output_type_size; + return -1LL; + } + int64_t total_sizes = 0LL; + for (size_t i = 0UL; i < output_shape_size; i++) { + DataType tensor_data_type = output_types.at(i); + if (tensor_data_type == DT_STRING) { + ADP_LOG(INFO) << "Current tensor type is DT_STRING."; + return kStringTypeDepth; + } + if (IsUnknownShape(output_shapes[i])) { + ADP_LOG(INFO) << "Output_shape is unknown shape"; + return kUnknownShapeDepth; + } + int64_t element_number = GetTensorElementNum(output_shapes, i); + total_sizes += (element_number * static_cast(DataTypeSize(output_types.at(i)))); + } + if (total_sizes < 0LL) { + ADP_LOG(INFO) << "Data size < 0, and current size is " << total_sizes; + return -1LL; + } + return (total_sizes == 0LL) ? kMaxDepth : std::max(2L, (kMaxBytes / total_sizes)); +} + void GeOp::BuildQueueDataAndGetNextFromQueue(Graph &graph, const Node &getnext_node, - const std::string &channel_name) const { + const std::string &channel_name) { Node *get_next_from_queue = nullptr; Node *queue_data = nullptr; std::string get_next_from_queue_name = "get_next_from_queue_" + getnext_node.name(); @@ -1488,6 +1542,28 @@ void GeOp::BuildQueueDataAndGetNextFromQueue(Graph &graph, const Node &getnext_n queue_data_attr.set_s(queue_data_op_def_string); queue_data_node_def.mutable_attr()->insert({"op_def", queue_data_attr}); + const char *kTypeAttrName = "output_type"; + const char *kShapeAttrName = "output_shapes"; + vector type_attrs; + vector shape_attrs; + vector shape_proto_attrs; + if (tensorflow::TryGetNodeAttr(getnext_node.attrs(), kShapeAttrName, &shape_proto_attrs)) { + for (auto i = 0; i < getnext_node.num_outputs(); i++) { + const TensorShapeProto &shape_proto = *shape_proto_attrs[i]; + tensorflow::PartialTensorShape shape(shape_proto); + shape_attrs.push_back(shape_proto); + } + } + (void)tensorflow::TryGetNodeAttr(getnext_node.attrs(), kTypeAttrName, &type_attrs); + int64_t queue_depth = GetChannelDepth(shape_attrs, type_attrs); + size_t channel_depth = std::min(static_cast(queue_depth), kMaxDepth); + ADP_LOG(INFO) << "QueueData depth is : " << channel_depth; + if (channel_depth > 0 ) { + tensorflow::AttrValue queue_data_depth_attr; + queue_data_depth_attr.set_i(static_cast(channel_depth)); + queue_data_node_def.mutable_attr()->insert({"_flow_attr_depth", queue_data_depth_attr}); + } + const OpDef &get_next_op_def = get_next_from_queue->op_def(); NodeDef &get_next_node_def = const_cast(get_next_from_queue->def()); std::string get_next_op_def_string; diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index c85be81bf2730d314b8531775a3496bf9cda8562..e7cafe64945aa19e9c6222d04c156dcdfae5b9a7 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -147,8 +147,14 @@ public: void ProcessDpOpFuncDef(const Node &node) const; + bool IsUnknownShape(const PartialTensorShape &output_shapes) const; + + int64_t GetTensorElementNum(const vector &output_shapes, size_t index); + + int64_t GetChannelDepth(const vector &output_shapes, const vector &output_types); + void BuildQueueDataAndGetNextFromQueue(Graph &graph, const Node &getnext_node, - const std::string &channel_name) const; + const std::string &channel_name); void HandleDpOpAndGetNextNodes(Graph &graph);