diff --git a/tf_adapter/kernels/lru_cache_v2_ops.cc b/tf_adapter/kernels/lru_cache_v2_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..016b33546b7c486ea81ce119d411893639be6654 --- /dev/null +++ b/tf_adapter/kernels/lru_cache_v2_ops.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tf_adapter/common/adp_logger.h" + +namespace tensorflow { +class LRUCacheV2Op : public OpKernel { +public: + explicit LRUCacheV2Op(OpKernelConstruction *context) : OpKernel(context) {} + ~LRUCacheV2Op() override = default; + void Compute(OpKernelContext *context) override { + (void)context; + ADP_LOG(INFO) << "LRUCacheV2Op Compute running"; + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("LRUCacheV2").Device(DEVICE_CPU), LRUCacheV2Op); +} // namespace tensorflow diff --git a/tf_adapter/ops/aicore/npu_aicore_ops.cc b/tf_adapter/ops/aicore/npu_aicore_ops.cc index 9734c7ba77302fe338e4488198c4814f6b6c278b..80e7f3a8c59d53e2838864782690eedb087a0864 100644 --- a/tf_adapter/ops/aicore/npu_aicore_ops.cc +++ b/tf_adapter/ops/aicore/npu_aicore_ops.cc @@ -454,6 +454,32 @@ REGISTER_OP("DynamicRnnGrad") return Status::OK(); }); +REGISTER_OP("LruCacheV2") + .Input("index_list: T") + .Input("data: Ref(dtype)") + .Input("cache: Ref(dtype)") + .Input("tag: Ref(T)") + .Input("is_last_call: T") + .Output("data1:Ref(dtype)") + .Output("cache1: Ref(dtype)") + .Output("tag1: Ref(dtype)") + .Output("index_offset_list: T") + .Output("not_in_cache_index_list: T") + .Output("not_in_cache_number: T") + .Attr("T: {int32, int64}") + .Attr("dtype: {float32}") + .Attr("pre_route_count: int") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(1)); + c->set_output(1, c->input(2)); + c->set_output(2, c->input(3)); + c->set_output(3, c->input(0)); + c->set_output(4, c->input(0)); + c->set_output(5, c->MakeShape({1})); + return Status::OK(); + }); + REGISTER_OP("Centralization") .Input("x: T") .Output("y: T") diff --git a/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py b/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py index f15205ef99358b9e63c5de1c16aaa08dfed4356e..6288a218f9cf504b3c512a15185e4ecf9860af00 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py +++ b/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py @@ -125,6 +125,17 @@ def _DropOutDoMaskV3Grad(op, grad): return [result, None, None] +def lru_cache_v2(index_list, data, cache, tag, is_last_call, pre_route_count, name=None): + """ + LRUCacheV2 op + + """ + is_last_call = ops.convert_to_tensor(is_last_call, name="is_last_call") + data, cache, tag, index_offset_list, not_in_cache_index_list, not_in_cache_number = + npu_aicore_ops.lru_cache_v2(index_list, data, cache, tag, is_last_call, pre_route_count, name=name) + return [data, cache, tag, index_offset_list, not_in_cache_index_list, not_in_cache_number] + + def nonzero(x, transpose=False, output_type=dtypes.int64, name=None): """ nonezero op diff --git a/tf_adapter/tests/st/kernels/testcase/lru_cache_v2_ops_test.cc b/tf_adapter/tests/st/kernels/testcase/lru_cache_v2_ops_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..c7859804c4cef88fee19de6989f3d002b6252fbc --- /dev/null +++ b/tf_adapter/tests/st/kernels/testcase/lru_cache_v2_ops_test.cc @@ -0,0 +1,38 @@ +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/platform/test.h" +#include "tf_adapter/kernels/lru_cache_v2_ops.cc" +#include "gtest/gtest.h" +#include + +namespace tensorflow { +namespace { + +TEST(LruCacheV2Test, TestLruCacheV2) { + DataTypeSlice input_types({DT_INT32, DT_FLOAT_REF, DT_FLOAT_REF, DT_INT32_REF, DT_INT32}); + MemoryTypeSlice input_memory_types; + DataTypeSlice output_types( + {DT_FLOAT_REF, DT_FLOAT_REF, DT_FLOAT_REF, DT_INT32, DT_INT32, DT_INT32}); + MemoryTypeSlice output_memory_types; + DeviceBase *device = new DeviceBase(Env::Default()); + NodeDef *node_def = new NodeDef(); + OpDef *op_def = new OpDef(); + OpKernelConstruction *context = new OpKernelConstruction( + DEVICE_CPU, device, nullptr, node_def, op_def, nullptr, input_types, + input_memory_types, output_types, output_memory_types, 1, nullptr); + LRUCacheV2Op lrucachev2op(context); + OpKernelContext *ctx = nullptr; + lrucachev2op.Compute(ctx); + lrucachev2op.IsExpensive(); + delete device; + delete node_def; + delete op_def; + delete context; +} +} +} // namespace tensorflow \ No newline at end of file diff --git a/tf_adapter/tests/ut/kernels/testcase/lru_cache_v2_ops_test.cc b/tf_adapter/tests/ut/kernels/testcase/lru_cache_v2_ops_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..afc289660d3b4241939ac6d096b290e4f88283bc --- /dev/null +++ b/tf_adapter/tests/ut/kernels/testcase/lru_cache_v2_ops_test.cc @@ -0,0 +1,72 @@ +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tf_adapter/kernels/lru_cache_v2_ops.cc" +#include "gtest/gtest.h" +#include + +namespace tensorflow { +namespace { +PartialTensorShape TShape(std::initializer_list dims) { + return PartialTensorShape(dims); +} + +FakeInputFunctor FakeInputStub(DataType dt) { + return [dt](const OpDef &op_def, int in_index, const NodeDef &node_def, + NodeDefBuilder *builder) { + char c = 'a' + (in_index % 26); + string in_node = string(&c, 1); + builder->Input(in_node, 0, dt); + return Status::OK(); + }; +} + +TEST(LruCacheV2Test, TestLruCacheV2) { + DataTypeSlice input_types({DT_INT32, DT_FLOAT_REF, DT_FLOAT_REF, DT_INT32_REF, DT_INT32}); + MemoryTypeSlice input_memory_types; + DataTypeSlice output_types( + {DT_FLOAT_REF, DT_FLOAT_REF, DT_FLOAT_REF, DT_INT32, DT_INT32, DT_INT32}); + MemoryTypeSlice output_memory_types; + DeviceBase *device = new DeviceBase(Env::Default()); + NodeDef *node_def = new NodeDef(); + OpDef *op_def = new OpDef(); + OpKernelConstruction *context = new OpKernelConstruction( + DEVICE_CPU, device, nullptr, node_def, op_def, nullptr, input_types, + input_memory_types, output_types, output_memory_types, 1, nullptr); + LRUCacheV2Op lrucachev2op(context); + OpKernelContext *ctx = nullptr; + lrucachev2op.Compute(ctx); + lrucachev2op.IsExpensive(); + + delete device; + delete node_def; + delete op_def; + delete context; + + const OpRegistrationData *reg; + TF_CHECK_OK(OpRegistry::Global()->LookUp("LruCacheV2", ®)); + OpDef op_def1 = reg->op_def; + NodeDef def; + TF_CHECK_OK(NodeDefBuilder("dummy", &op_def1) + .Attr("T", DT_INT32) + .Attr("dtype", DT_FLOAT) + .Attr("pre_route_count", DT_INT32) + .Input(FakeInputStub(DT_INT32)) + .Input(FakeInputStub(DT_FLOAT_REF)) + .Input(FakeInputStub(DT_FLOAT_REF)) + .Input(FakeInputStub(DT_INT32_REF)) + .Input(FakeInputStub(DT_INT32)) + .Finalize(&def)); + shape_inference::InferenceContext c( + 0, &def, op_def1, + {TShape({1})}, {}, {}, {}); + TF_CHECK_OK(reg->shape_inference_fn(&c)); + +} +} +} // namespace tensorflow \ No newline at end of file