From 4c60f354b6dd04e25c32b1ca95044c99d2491d27 Mon Sep 17 00:00:00 2001
From: guopeian <guopeian1@hisilicon.com>
Date: Mon, 9 Nov 2020 16:27:23 +0800
Subject: [PATCH 1/3] LruCache python api

---
 tf_adapter/kernels/npu_cpu_ops.cc             | 29 +++++++++
 tf_adapter/ops/npu_cpu_ops.cc                 | 60 +++++++++++++++----
 .../python/npu_bridge/npu_cpu/lru_cache.py    | 36 +++++++++++
 tf_adapter/util/cache_interface.h             | 58 ++++++++++++++++++
 4 files changed, 170 insertions(+), 13 deletions(-)
 create mode 100644 tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py
 create mode 100644 tf_adapter/util/cache_interface.h
diff --git a/tf_adapter/kernels/npu_cpu_ops.cc b/tf_adapter/kernels/npu_cpu_ops.cc
index 6ed325521..ec4f44a38 100644
--- a/tf_adapter/kernels/npu_cpu_ops.cc
+++ b/tf_adapter/kernels/npu_cpu_ops.cc
@@ -29,6 +29,8 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/configure/framework/resource_op_kernel.h"
+#include "tf_adapter/util/cache_interface.h"
 
 namespace tensorflow {
 class EmbeddingRankIdOpKernel : public OpKernel {
@@ -37,5 +39,32 @@ class EmbeddingRankIdOpKernel : public OpKernel {
   ~EmbeddingRankIdOpKernel() {}
   void Compute(OpKernelContext *context) override { LOG(INFO) << "EmbeddingRankIdOp Compute."; }
 };
+
+class LruCacheOp : public ResourceOpKernel<CacheInterface> {
+ public:
+  explicit LruCacheOp(OpKernelConstruction* context) : ResourceOpKernel(context) {}
+  void Compute(OpKernelContext* context) override { LOG(INFO) << "LruCacheOp Compute"; }
+ private:
+  Status CreateResource(CacheInterface *context) override
+                        EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    return Status::OK();
+  }
+};
+
+class CacheAddOp : public OpKernel {
+ public:
+  explicit CacheAddOp(OpKernelConstruction *context) : OpKernel(context) {}
+  void Compute(OpKernelContext *context) override { LOG(INFO) << "CacheAddOp Compute"; }
+};
+
+class CacheRemoteIndexToLocalOp : public OpKernel {
+ public:
+  explicit CacheRemoteIndexToLocalOp(OpKernelConstruction *context) : OpKernel(context) {}
+  void Compute(OpKernelContext *context) override { LOG(INFO) << "CacheRemoteIndexToLocalOp Compute"; }
+};
+
 REGISTER_KERNEL_BUILDER(Name("EmbeddingRankId").Device(DEVICE_CPU), EmbeddingRankIdOpKernel);
+REGISTER_KERNEL_BUILDER(Name("LruCache").Device(DEVICE_CPU), LruCacheOp);
+REGISTER_KERNEL_BUILDER(Name("CacheAdd").Device(DEVICE_CPU), CacheAddOp);
+REGISTER_KERNEL_BUILDER(Name("CacheRemoteIndexToLocal").Device(DEVICE_CPU), CacheRemoteIndexToLocalOp);
 }  // namespace tensorflow
\ No newline at end of file
diff --git a/tf_adapter/ops/npu_cpu_ops.cc b/tf_adapter/ops/npu_cpu_ops.cc
index 82f79a670..a4922c4b8 100644
--- a/tf_adapter/ops/npu_cpu_ops.cc
+++ b/tf_adapter/ops/npu_cpu_ops.cc
@@ -35,19 +35,19 @@ using shape_inference::InferenceContext;
 using shape_inference::ShapeHandle;
 
 REGISTER_OP("EmbeddingRankId")
-    .Input("addr_table: uint64")
-    .Input("index: T")
-    .Output("rank_id: uint64")
-    .Attr("T: {int64,int32,uint64}")
-    .Attr("row_memory: int = 320")
-    .Attr("mode: string = 'mod' ")
-    .SetAllowsUninitializedInput()
-    .SetShapeFn([](shape_inference::InferenceContext *c) {
-      auto out_shape = c->MakeShape({c->Dim(c->input(1), 0), c->Dim(c->input(0), 1)});
-      c->set_output(0, out_shape);
-      return Status::OK();
-    })
-    .Doc(R"doc(
+  .Input("addr_table: uint64")
+  .Input("index: T")
+  .Output("rank_id: uint64")
+  .Attr("T: {int64,int32,uint64}")
+  .Attr("row_memory: int = 320")
+  .Attr("mode: string = 'mod' ")
+  .SetAllowsUninitializedInput()
+  .SetShapeFn([](shape_inference::InferenceContext *c) {
+    auto out_shape = c->MakeShape({c->Dim(c->input(1), 0), c->Dim(c->input(0), 1)});
+    c->set_output(0, out_shape);
+    return Status::OK();
+  })
+  .Doc(R"doc(
     Traverse the index calculation server and its position in the server.
     Arguments
         addr_table:    Tensors of addr_table.
@@ -55,4 +55,38 @@ REGISTER_OP("EmbeddingRankId")
     Output
         rank_id:    Tensors with the same shape as index.dim(0)*3.
     )doc");
+//regist lru cahe op
+REGISTER_OP("LruCache")
+  .Output("cache: resource")
+  .Attr("cache_size: int")
+  .Attr("container: string = ''")
+  .Attr("shared_name: string = 'LruCache'")
+  .SetIsStateful()
+  .SetShapeFn(shape_inference::ScalarShape);
+//regist cache add op
+REGISTER_OP("CacheAdd")
+  .Input("cache: resource")
+  .Input("ids: T")
+  .Output("swap_in_id: T")
+  .Output("swap_in_idx: int64")
+  .Output("swap_out_id: T")
+  .Output("swap_out_idx: int64")
+  .Attr("T: {int64, int32, uint64, uint32}")
+  .SetShapeFn([](shape_inference::InferenceContext *c) {
+    c->set_output(0, c->Vector(c->UnknownDim()));
+    c->set_output(1, c->Vector(c->UnknownDim()));
+    c->set_output(2, c->Vector(c->UnknownDim()));
+    c->set_output(3, c->Vector(c->UnknownDim()));
+    return Status::OK();
+  });
+//regist cache remote index to local op
+RGISTER_OP("CacheRemoteIndexToLocal")
+  .Input("cache: resource")
+  .Input("ids: T")
+  .Output("local_idx: int64")
+  .Attr("T: {int64, int32, uint32, uint64}")
+  .SetShapeFn([](shape_inference::InferenceContext *c) {
+    c->set_output(0, c->Vector(c->Rank(c->input(1))));
+    return Status::OK();
+  });
 }  // namespace tensorflow
diff --git a/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py b/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py
new file mode 100644
index 000000000..504ccb4a5
--- /dev/null
+++ b/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py
@@ -0,0 +1,36 @@
+from tensorflow.contrib.util import loader
+from tensorflow.python.framework import load_library
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import resource_loader
+from npu_bridge.helper import helper
+
+gen_npu_cpu_ops = helper.get_gen_ops()
+
+class LruCache(object):
+  def __init(self, cache_size=100000, load_factor=1):
+    self._cache_size=cache_size
+    self._load_factor=load_factor
+    self._cache=gen_npu_cpu_ops.lru_cache(
+        cache_size=self._cache_size,
+        load_factor=self._load_factor
+    )
+  
+  ##提供CacheAdd功能
+  # @param cache resource类型，保存lruCache资源
+  # @param ids int类型，输入索引
+  # @return result 对ids执行完swap in/swap out操作后输出的索引张量
+  def cache_add(self, ids):
+    result=gen_npu_cpu_ops.cache_add(
+        cache=self._cache,
+        ids=ids
+    )
+  
+  ##提供CacheRemoteIndexToLocal功能
+  # @param cache resource类型，保存lruCache资源
+  # @param ids int类型，输入索引
+  # return result 输入ids对应cache中的索引
+  def cache_remote_index_to_local(self, ids):
+    result=gen_npu_cpu_ops.cache_remote_index_to_local(
+        cache=self._cache,
+        ids=ids
+    )
\ No newline at end of file
diff --git a/tf_adapter/util/cache_interface.h b/tf_adapter/util/cache_interface.h
new file mode 100644
index 000000000..0e10735b8
--- /dev/null
+++ b/tf_adapter/util/cache_interface.h
@@ -0,0 +1,58 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_FRAMEWORK_CACHE_INTERFACE_H_
+#define TENSORFLOW_CORE_FRAMEWORK_CACHE_INTERFACE_H_
+
+#include <string>
+#include <vector>
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+//All implementations must be thread-safe.
+class CacheInterface : public ResourceBase {
+ public:
+  virtual void add(std::vector<uint64> &ids_vec,
+                   std::vector<uint64> &swap_in_id_temp,
+                   std::vector<uint64> &swap_in_idx_temp,
+                   std::vector<uint64> &swap_out_id_temp,
+                   std::vector<uint64> &swap_out_idx_temp,
+                   int64 &swap_in_num,
+                   int64 &swap_out_num) = 0;
+
+  virtual void remoteIndexToLocal(const std::vector<uint64> &ids_vec, Tensor &local_idx) = 0;
+  //Return the num of elements in cache
+  virtual int64 size() const = 0;
+  //Return a debug string for *this
+  string DebugString() const override { return strings::StrCat("A Cache of size: ", size()) }
+ protected:
+  virtual ~CacheInterface() {}
+};
+} //namespace tensorflow
+#endif //TENSORFLOW_CACHE_INTERFACE_H_
\ No newline at end of file
-- 
Gitee


From b14640d79b44f663315e9ecdb5809202d93ca7ac Mon Sep 17 00:00:00 2001
From: guopeian <guopeian1@hisilicon.com>
Date: Mon, 9 Nov 2020 16:40:12 +0800
Subject: [PATCH 2/3] LruCache python api

---
 tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py b/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py
index 504ccb4a5..701c5cf54 100644
--- a/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py
+++ b/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py
@@ -7,7 +7,7 @@ from npu_bridge.helper import helper
 gen_npu_cpu_ops = helper.get_gen_ops()
 
 class LruCache(object):
-  def __init(self, cache_size=100000, load_factor=1):
+  def __init__(self, cache_size=100000, load_factor=1):
     self._cache_size=cache_size
     self._load_factor=load_factor
     self._cache=gen_npu_cpu_ops.lru_cache(
-- 
Gitee


From 5a105e094da2b5fa056081d938992d5111fca842 Mon Sep 17 00:00:00 2001
From: guopeian <guopeian1@hisilicon.com>
Date: Mon, 9 Nov 2020 17:13:24 +0800
Subject: [PATCH 3/3] LruCache python api

---
 tf_adapter/kernels/npu_cpu_ops.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tf_adapter/kernels/npu_cpu_ops.cc b/tf_adapter/kernels/npu_cpu_ops.cc
index ec4f44a38..b79b9ee26 100644
--- a/tf_adapter/kernels/npu_cpu_ops.cc
+++ b/tf_adapter/kernels/npu_cpu_ops.cc
@@ -29,7 +29,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/configure/framework/resource_op_kernel.h"
+#include "tensorflow/core/framework/resource_op_kernel.h"
 #include "tf_adapter/util/cache_interface.h"
 
 namespace tensorflow {
-- 
Gitee