diff --git a/tf_adapter/kernels/aicpu/npu_cpu_ops.cc b/tf_adapter/kernels/aicpu/npu_cpu_ops.cc index 7e4891543fce8ef572814852efaea58c12962a5c..3b5e0e54fb6e02587f01b5e00a37a7ff863c491b 100644 --- a/tf_adapter/kernels/aicpu/npu_cpu_ops.cc +++ b/tf_adapter/kernels/aicpu/npu_cpu_ops.cc @@ -150,14 +150,14 @@ class OCRIdentifyPreHandleOp : public OpKernel { }; class BatchDilatePolysOp : public OpKernel { - public : + public : explicit BatchDilatePolysOp(OpKernelConstruction *context):OpKernel(context){} ~BatchDilatePolysOp() override{} void Compute(OpKernelContext *context) override{ADP_LOG(INFO)<<"BatchDilatePolysOp Compute";} }; class OCRFindContoursOp : public OpKernel { - public : + public : explicit OCRFindContoursOp(OpKernelConstruction *context):OpKernel(context){} ~OCRFindContoursOp() override{} void Compute(OpKernelContext *context) override{ADP_LOG(INFO)<<"OCRFindContoursOp Compute";} @@ -184,6 +184,13 @@ class DequeueOp : public OpKernel { void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "DequeueOp Compute"; } }; +class NonZeroWithValueShapeOp : public OpKernel { + public: + explicit NonZeroWithValueShapeOp(OpKernelConstruction *context) : OpKernel(context) {} + ~NonZeroWithValueShapeOp() override {} + void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "NonZeroWithValueShapeOp Compute"; } +}; + REGISTER_KERNEL_BUILDER(Name("EmbeddingRankId").Device(DEVICE_CPU), EmbeddingRankIdOpKernel); REGISTER_KERNEL_BUILDER(Name("EmbeddingLocalIndex").Device(DEVICE_CPU), EmbeddingLocalIndexOpKernel); REGISTER_KERNEL_BUILDER(Name("LruCache").Device(DEVICE_CPU), LruCacheOp); @@ -200,6 +207,7 @@ REGISTER_KERNEL_BUILDER(Name("OCRFindContours").Device(DEVICE_CPU), OCRFindConto REGISTER_KERNEL_BUILDER(Name("OCRDetectionPostHandle").Device(DEVICE_CPU), OCRDetectionPostHandleOp); REGISTER_KERNEL_BUILDER(Name("ResizeAndClipPolys").Device(DEVICE_CPU), ResizeAndClipPolysOp); REGISTER_KERNEL_BUILDER(Name("Dequeue").Device(DEVICE_CPU), DequeueOp); +REGISTER_KERNEL_BUILDER(Name("NonZeroWithValueShape").Device(DEVICE_CPU), NonZeroWithValueShapeOp); #define REGISTER_KERNEL(type) \ REGISTER_KERNEL_BUILDER(Name("DeformableOffsets") \ diff --git a/tf_adapter/ops/aicpu/npu_cpu_ops.cc b/tf_adapter/ops/aicpu/npu_cpu_ops.cc index b5f7b3e72d603eb2c3a0c13430411cb717da3cdd..8fc4179f7fb2b4d5bf58285cefdd981574d9ef89 100644 --- a/tf_adapter/ops/aicpu/npu_cpu_ops.cc +++ b/tf_adapter/ops/aicpu/npu_cpu_ops.cc @@ -44,7 +44,7 @@ REGISTER_OP("EmbeddingRankId") Output rank_id: Tensors with the same shape as index.dim(0)*3. )doc"); -//regist embedding local index op +// regist embedding local index op REGISTER_OP("EmbeddingLocalIndex") .Input("addr_table: uint64") .Input("index: T") @@ -74,7 +74,7 @@ REGISTER_OP("EmbeddingLocalIndex") recover_idx: The sorted local_idx element corresponds to the position of the original input index. )doc"); -//regist lru cahe op +// regist lru cahe op REGISTER_OP("LruCache") .Output("cache: resource") .Attr("cache_size: int") @@ -84,7 +84,7 @@ REGISTER_OP("LruCache") .Attr("dtype: {uint32, uint64, int32, int64}") .SetIsStateful() .SetShapeFn(shape_inference::ScalarShape); -//regist cache add op +// regist cache add op REGISTER_OP("CacheAdd") .Input("cache: resource") .Input("ids: T") @@ -100,7 +100,7 @@ REGISTER_OP("CacheAdd") c->set_output(3, c->Vector(c->UnknownDim())); return Status::OK(); }); -//regist cache remote index to local op +// regist cache remote index to local op REGISTER_OP("CacheRemoteIndexToLocal") .Input("cache: resource") .Input("ids: T") @@ -110,7 +110,7 @@ REGISTER_OP("CacheRemoteIndexToLocal") c->set_output(0, c->Vector(c->Rank(c->input(1)))); return Status::OK(); }); -//regist cache all index to local op +// regist cache all index to local op REGISTER_OP("CacheAllIndexToLocal") .Input("cache: resource") .Output("local_idx: dtype") @@ -120,7 +120,7 @@ REGISTER_OP("CacheAllIndexToLocal") return Status::OK(); }); -//regist deformable offsets op +// regist deformable offsets op REGISTER_OP("DeformableOffsets") .Input("x: T") .Input("offsets: T") @@ -173,7 +173,7 @@ REGISTER_OP("DeformableOffsets") c->set_output(0, c->MakeShape(out_dims)); return Status::OK(); }); -//regist deformable offsets grad op +// regist deformable offsets grad op REGISTER_OP("DeformableOffsetsGrad") .Input("grad: T") .Input("x: T") @@ -195,7 +195,7 @@ REGISTER_OP("DeformableOffsetsGrad") c->set_output(1, input_offsets_shape); return Status::OK(); }); -//regist Random Choice With Mask op +// regist Random Choice With Mask op REGISTER_OP("RandomChoiceWithMask") .Input("x: bool") .Output("y: int32") @@ -219,7 +219,7 @@ REGISTER_OP("RandomChoiceWithMask") } return Status::OK(); }); -//regist dense image warp op +// regist dense image warp op REGISTER_OP("DenseImageWarp") .Input("image: T") .Input("flow: S") @@ -231,7 +231,7 @@ REGISTER_OP("DenseImageWarp") c->set_output(0, input_image_shape); return Status::OK(); }); -//regist dense image warp grad op +// regist dense image warp grad op REGISTER_OP("DenseImageWarpGrad") .Input("grad: T") .Input("image: T") @@ -509,4 +509,18 @@ REGISTER_OP("OCRDetectionPostHandle") c->set_output(3, c->Scalar()); return Status::OK(); }); + + REGISTER_OP("NonZeroWithValueShape") + .Input("value: T") + .Input("index: int32") + .Input("count: int32") + .Output("out_value: T") + .Output("out_index: int32") + .Attr("T: {double, float, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool}") + + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->Vector(c->UnknownDim())); + c->set_output(1, c->Vector(c->UnknownDim())); + return Status::OK(); + }); } // namespace tensorflow diff --git a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py index 9d4a7b9ef7eb01b09146e5c5e03b9544fdffc8be..874bdcd302e708bb9582ef2f32fa101f5e8f9515 100644 --- a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py +++ b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py @@ -235,4 +235,16 @@ def resize_and_clip_polys(polys_data, polys_offset, polys_size, h_scale, w_scale w_scale=w_scale, img_h=img_h, img_w=img_w) + return result + +## 提供NonZeroWithValueShape功能 +# @param value double, float, float16, int8, unit8, int16, unit16, int32, unit32, int64, unit64, bool 类型 +# @param index int32 类型 +# @param count int32 类型 +# @return out_value,out_index double, float, float16, int8, unit8, int16, unit16, int32, unit32, int64, unit64, bool,int32,int32 类型 +def non_zero_with_value_shape(value, index, count): + result = gen_npu_cpu_ops.non_zero_with_value_shape( + value=value, + index=index, + count=count) return result \ No newline at end of file diff --git a/tf_adapter/tests/st/CMakeLists.txt b/tf_adapter/tests/st/CMakeLists.txt index eb15a09c31b335da7a904cfb9585f462fa9da532..40b45c3b723003789484111107ab8fad8697a064 100644 --- a/tf_adapter/tests/st/CMakeLists.txt +++ b/tf_adapter/tests/st/CMakeLists.txt @@ -23,9 +23,15 @@ file(GLOB_RECURSE ST_SOURCES ${TFADAPTER_DIR}/tf_adapter/optimizers/*.cc ${TFADAPTER_DIR}/tf_adapter/util/*.cc ${TFADAPTER_DIR}/tf_adapter/kernels/*.cc - "optimizers/testcase/*.cc" - "kernels/testcase/*.cc" + "optimizers/testcase/om_partition_subgraphs_pass_test.cc" + "optimizers/testcase/get_attr_optimize_pass_test.cc" + "kernels/testcase/ocr_ops_test.cc" + "kernels/testcase/non_zero_with_value_ops_test.cc" + "kernels/testcase/non_zero_with_value_shape_ops_test.cc" + "kernels/testcase/geop_npu_test.cc" "util/testcase/*.cc" + "kernels/testcase/layer_norm_ops_test.cc" + "kernels/testcase/layer_norm_grad_ops_test.cc" ) add_executable(tfadapter_stest diff --git a/tf_adapter/tests/st/kernels/testcase/non_zero_with_value_shape_ops_test.cc b/tf_adapter/tests/st/kernels/testcase/non_zero_with_value_shape_ops_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..24239045fd63d33ede9f9ad354c70317460e37e5 --- /dev/null +++ b/tf_adapter/tests/st/kernels/testcase/non_zero_with_value_shape_ops_test.cc @@ -0,0 +1,63 @@ +#include +#include "tf_adapter/kernels/npu_cpu_ops.cc" +#include "gtest/gtest.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +PartialTensorShape TShape(std::initializer_list dims) { + return PartialTensorShape(dims); +} + +FakeInputFunctor FakeInputStub(DataType dt) { + return [dt](const OpDef& op_def, int in_index, const NodeDef& node_def, + NodeDefBuilder* builder) { + char c = 'a' + (in_index % 26); + string in_node = string(&c, 1); + builder->Input(in_node, 0, dt); + return Status::OK(); + }; +} + + +TEST(NonZeroWithValueOpsTest, TestNonZeroWithValueShape) { + DataTypeSlice input_types({DT_INT32, DT_INT32, DT_INT32}); + MemoryTypeSlice input_memory_types; + DataTypeSlice output_types({DT_INT32, DT_INT32}); + MemoryTypeSlice output_memory_types; + DeviceBase *device = new DeviceBase(Env::Default()); + NodeDef *node_def = new NodeDef(); + OpDef *op_def = new OpDef(); + OpKernelConstruction *context = new OpKernelConstruction(DEVICE_CPU, device, nullptr, node_def, op_def, nullptr, + input_types, input_memory_types, output_types, output_memory_types, + 1, nullptr); + NonZeroWithValueShapeOp cache(context); + delete device; + delete node_def; + delete op_def; + delete context; +} + +TEST(NonZeroWithValueOpsTest, TestNonZeroWithValueShapeInfer) { + const OpRegistrationData* reg; + TF_CHECK_OK(OpRegistry::Global()->LookUp("NonZeroWithValueShape", ®)); + OpDef op_def = reg->op_def; + NodeDef def; + TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) + .Input(FakeInputStub(DT_INT32)) + .Input(FakeInputStub(DT_INT32)) + .Input(FakeInputStub(DT_INT32)) + .Finalize(&def)); + shape_inference::InferenceContext c(0, &def, op_def,{TShape({})}, {}, {}, {}); + TF_CHECK_OK(reg->shape_inference_fn(&c)); +} +} // namespace +} // namespace tensorflow \ No newline at end of file diff --git a/tf_adapter/tests/ut/kernels/testcase/non_zero_with_value_shape_ops_test.cc b/tf_adapter/tests/ut/kernels/testcase/non_zero_with_value_shape_ops_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..03a21b11ebe20de1f15e4c29dd138fa0fb9338dc --- /dev/null +++ b/tf_adapter/tests/ut/kernels/testcase/non_zero_with_value_shape_ops_test.cc @@ -0,0 +1,62 @@ +#include +#include "tf_adapter/kernels/npu_cpu_ops.cc" +#include "gtest/gtest.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +PartialTensorShape TShape(std::initializer_list dims) { + return PartialTensorShape(dims); +} + +FakeInputFunctor FakeInputStub(DataType dt) { + return [dt](const OpDef& op_def, int in_index, const NodeDef& node_def, + NodeDefBuilder* builder) { + char c = 'a' + (in_index % 26); + string in_node = string(&c, 1); + builder->Input(in_node, 0, dt); + return Status::OK(); + }; +} + +TEST(NonZeroWithValueOpsTest, TestNonZeroWithValueShape) { + DataTypeSlice input_types({DT_INT32, DT_INT32, DT_INT32}); + MemoryTypeSlice input_memory_types; + DataTypeSlice output_types({DT_INT32, DT_INT32}); + MemoryTypeSlice output_memory_types; + DeviceBase *device = new DeviceBase(Env::Default()); + NodeDef *node_def = new NodeDef(); + OpDef *op_def = new OpDef(); + OpKernelConstruction *context = new OpKernelConstruction(DEVICE_CPU, device, nullptr, node_def, op_def, nullptr, + input_types, input_memory_types, output_types, output_memory_types, + 1, nullptr); + NonZeroWithValueShapeOp cache(context); + delete device; + delete node_def; + delete op_def; + delete context; +} + +TEST(NonZeroWithValueOpsTest, TestNonZeroWithValueShapeInfer) { + const OpRegistrationData* reg; + TF_CHECK_OK(OpRegistry::Global()->LookUp("NonZeroWithValueShape", ®)); + OpDef op_def = reg->op_def; + NodeDef def; + TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) + .Input(FakeInputStub(DT_INT32)) + .Input(FakeInputStub(DT_INT32)) + .Input(FakeInputStub(DT_INT32)) + .Finalize(&def)); + shape_inference::InferenceContext c(0, &def, op_def, {TShape({})}, {}, {}, {}); + TF_CHECK_OK(reg->shape_inference_fn(&c)); +} +} // namespace +} // namespace tensorflow \ No newline at end of file