From 4c60f354b6dd04e25c32b1ca95044c99d2491d27 Mon Sep 17 00:00:00 2001 From: guopeian Date: Mon, 9 Nov 2020 16:27:23 +0800 Subject: [PATCH 1/3] LruCache python api --- tf_adapter/kernels/npu_cpu_ops.cc | 29 +++++++++ tf_adapter/ops/npu_cpu_ops.cc | 60 +++++++++++++++---- .../python/npu_bridge/npu_cpu/lru_cache.py | 36 +++++++++++ tf_adapter/util/cache_interface.h | 58 ++++++++++++++++++ 4 files changed, 170 insertions(+), 13 deletions(-) create mode 100644 tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py create mode 100644 tf_adapter/util/cache_interface.h diff --git a/tf_adapter/kernels/npu_cpu_ops.cc b/tf_adapter/kernels/npu_cpu_ops.cc index 6ed325521..ec4f44a38 100644 --- a/tf_adapter/kernels/npu_cpu_ops.cc +++ b/tf_adapter/kernels/npu_cpu_ops.cc @@ -29,6 +29,8 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/configure/framework/resource_op_kernel.h" +#include "tf_adapter/util/cache_interface.h" namespace tensorflow { class EmbeddingRankIdOpKernel : public OpKernel { @@ -37,5 +39,32 @@ class EmbeddingRankIdOpKernel : public OpKernel { ~EmbeddingRankIdOpKernel() {} void Compute(OpKernelContext *context) override { LOG(INFO) << "EmbeddingRankIdOp Compute."; } }; + +class LruCacheOp : public ResourceOpKernel { + public: + explicit LruCacheOp(OpKernelConstruction* context) : ResourceOpKernel(context) {} + void Compute(OpKernelContext* context) override { LOG(INFO) << "LruCacheOp Compute"; } + private: + Status CreateResource(CacheInterface *context) override + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + return Status::OK(); + } +}; + +class CacheAddOp : public OpKernel { + public: + explicit CacheAddOp(OpKernelConstruction *context) : OpKernel(context) {} + void Compute(OpKernelContext *context) override { LOG(INFO) << "CacheAddOp Compute"; } +}; + +class CacheRemoteIndexToLocalOp : public OpKernel { + public: + explicit CacheRemoteIndexToLocalOp(OpKernelConstruction *context) : OpKernel(context) {} + void Compute(OpKernelContext *context) override { LOG(INFO) << "CacheRemoteIndexToLocalOp Compute"; } +}; + REGISTER_KERNEL_BUILDER(Name("EmbeddingRankId").Device(DEVICE_CPU), EmbeddingRankIdOpKernel); +REGISTER_KERNEL_BUILDER(Name("LruCache").Device(DEVICE_CPU), LruCacheOp); +REGISTER_KERNEL_BUILDER(Name("CacheAdd").Device(DEVICE_CPU), CacheAddOp); +REGISTER_KERNEL_BUILDER(Name("CacheRemoteIndexToLocal").Device(DEVICE_CPU), CacheRemoteIndexToLocalOp); } // namespace tensorflow \ No newline at end of file diff --git a/tf_adapter/ops/npu_cpu_ops.cc b/tf_adapter/ops/npu_cpu_ops.cc index 82f79a670..a4922c4b8 100644 --- a/tf_adapter/ops/npu_cpu_ops.cc +++ b/tf_adapter/ops/npu_cpu_ops.cc @@ -35,19 +35,19 @@ using shape_inference::InferenceContext; using shape_inference::ShapeHandle; REGISTER_OP("EmbeddingRankId") - .Input("addr_table: uint64") - .Input("index: T") - .Output("rank_id: uint64") - .Attr("T: {int64,int32,uint64}") - .Attr("row_memory: int = 320") - .Attr("mode: string = 'mod' ") - .SetAllowsUninitializedInput() - .SetShapeFn([](shape_inference::InferenceContext *c) { - auto out_shape = c->MakeShape({c->Dim(c->input(1), 0), c->Dim(c->input(0), 1)}); - c->set_output(0, out_shape); - return Status::OK(); - }) - .Doc(R"doc( + .Input("addr_table: uint64") + .Input("index: T") + .Output("rank_id: uint64") + .Attr("T: {int64,int32,uint64}") + .Attr("row_memory: int = 320") + .Attr("mode: string = 'mod' ") + .SetAllowsUninitializedInput() + .SetShapeFn([](shape_inference::InferenceContext *c) { + auto out_shape = c->MakeShape({c->Dim(c->input(1), 0), c->Dim(c->input(0), 1)}); + c->set_output(0, out_shape); + return Status::OK(); + }) + .Doc(R"doc( Traverse the index calculation server and its position in the server. Arguments addr_table: Tensors of addr_table. @@ -55,4 +55,38 @@ REGISTER_OP("EmbeddingRankId") Output rank_id: Tensors with the same shape as index.dim(0)*3. )doc"); +//regist lru cahe op +REGISTER_OP("LruCache") + .Output("cache: resource") + .Attr("cache_size: int") + .Attr("container: string = ''") + .Attr("shared_name: string = 'LruCache'") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape); +//regist cache add op +REGISTER_OP("CacheAdd") + .Input("cache: resource") + .Input("ids: T") + .Output("swap_in_id: T") + .Output("swap_in_idx: int64") + .Output("swap_out_id: T") + .Output("swap_out_idx: int64") + .Attr("T: {int64, int32, uint64, uint32}") + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->Vector(c->UnknownDim())); + c->set_output(1, c->Vector(c->UnknownDim())); + c->set_output(2, c->Vector(c->UnknownDim())); + c->set_output(3, c->Vector(c->UnknownDim())); + return Status::OK(); + }); +//regist cache remote index to local op +RGISTER_OP("CacheRemoteIndexToLocal") + .Input("cache: resource") + .Input("ids: T") + .Output("local_idx: int64") + .Attr("T: {int64, int32, uint32, uint64}") + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->Vector(c->Rank(c->input(1)))); + return Status::OK(); + }); } // namespace tensorflow diff --git a/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py b/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py new file mode 100644 index 000000000..504ccb4a5 --- /dev/null +++ b/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py @@ -0,0 +1,36 @@ +from tensorflow.contrib.util import loader +from tensorflow.python.framework import load_library +from tensorflow.python.framework import ops +from tensorflow.python.platform import resource_loader +from npu_bridge.helper import helper + +gen_npu_cpu_ops = helper.get_gen_ops() + +class LruCache(object): + def __init(self, cache_size=100000, load_factor=1): + self._cache_size=cache_size + self._load_factor=load_factor + self._cache=gen_npu_cpu_ops.lru_cache( + cache_size=self._cache_size, + load_factor=self._load_factor + ) + + ##提供CacheAdd功能 + # @param cache resource类型,保存lruCache资源 + # @param ids int类型,输入索引 + # @return result 对ids执行完swap in/swap out操作后输出的索引张量 + def cache_add(self, ids): + result=gen_npu_cpu_ops.cache_add( + cache=self._cache, + ids=ids + ) + + ##提供CacheRemoteIndexToLocal功能 + # @param cache resource类型,保存lruCache资源 + # @param ids int类型,输入索引 + # return result 输入ids对应cache中的索引 + def cache_remote_index_to_local(self, ids): + result=gen_npu_cpu_ops.cache_remote_index_to_local( + cache=self._cache, + ids=ids + ) \ No newline at end of file diff --git a/tf_adapter/util/cache_interface.h b/tf_adapter/util/cache_interface.h new file mode 100644 index 000000000..0e10735b8 --- /dev/null +++ b/tf_adapter/util/cache_interface.h @@ -0,0 +1,58 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_FRAMEWORK_CACHE_INTERFACE_H_ +#define TENSORFLOW_CORE_FRAMEWORK_CACHE_INTERFACE_H_ + +#include +#include +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" + +namespace tensorflow { +//All implementations must be thread-safe. +class CacheInterface : public ResourceBase { + public: + virtual void add(std::vector &ids_vec, + std::vector &swap_in_id_temp, + std::vector &swap_in_idx_temp, + std::vector &swap_out_id_temp, + std::vector &swap_out_idx_temp, + int64 &swap_in_num, + int64 &swap_out_num) = 0; + + virtual void remoteIndexToLocal(const std::vector &ids_vec, Tensor &local_idx) = 0; + //Return the num of elements in cache + virtual int64 size() const = 0; + //Return a debug string for *this + string DebugString() const override { return strings::StrCat("A Cache of size: ", size()) } + protected: + virtual ~CacheInterface() {} +}; +} //namespace tensorflow +#endif //TENSORFLOW_CACHE_INTERFACE_H_ \ No newline at end of file -- Gitee From b14640d79b44f663315e9ecdb5809202d93ca7ac Mon Sep 17 00:00:00 2001 From: guopeian Date: Mon, 9 Nov 2020 16:40:12 +0800 Subject: [PATCH 2/3] LruCache python api --- tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py b/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py index 504ccb4a5..701c5cf54 100644 --- a/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py +++ b/tf_adapter/python/npu_bridge/npu_cpu/lru_cache.py @@ -7,7 +7,7 @@ from npu_bridge.helper import helper gen_npu_cpu_ops = helper.get_gen_ops() class LruCache(object): - def __init(self, cache_size=100000, load_factor=1): + def __init__(self, cache_size=100000, load_factor=1): self._cache_size=cache_size self._load_factor=load_factor self._cache=gen_npu_cpu_ops.lru_cache( -- Gitee From 5a105e094da2b5fa056081d938992d5111fca842 Mon Sep 17 00:00:00 2001 From: guopeian Date: Mon, 9 Nov 2020 17:13:24 +0800 Subject: [PATCH 3/3] LruCache python api --- tf_adapter/kernels/npu_cpu_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf_adapter/kernels/npu_cpu_ops.cc b/tf_adapter/kernels/npu_cpu_ops.cc index ec4f44a38..b79b9ee26 100644 --- a/tf_adapter/kernels/npu_cpu_ops.cc +++ b/tf_adapter/kernels/npu_cpu_ops.cc @@ -29,7 +29,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/configure/framework/resource_op_kernel.h" +#include "tensorflow/core/framework/resource_op_kernel.h" #include "tf_adapter/util/cache_interface.h" namespace tensorflow { -- Gitee