From 9cd975de1baff3d7088944faeeb6b728b172be3d Mon Sep 17 00:00:00 2001 From: caiguangxing Date: Sat, 12 Nov 2022 15:48:28 +0800 Subject: [PATCH] cpu_allocator add only tf1 --- CMakeLists.txt | 1 + inc/runtime/rt_mem_queue.h | 4 + tf_adapter/kernels/geop_npu.cc | 1 + tf_adapter/util/mbuf_allocator.cc | 191 ++++++++++++++++++++++++++++++ 4 files changed, 197 insertions(+) create mode 100644 tf_adapter/util/mbuf_allocator.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index ef1abd0d7..0885892e7 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -162,6 +162,7 @@ else() target_compile_options(tf_adapter PUBLIC $<$:-std=c++11> + -g -O2 -DNDEBUG -ftrapv diff --git a/inc/runtime/rt_mem_queue.h b/inc/runtime/rt_mem_queue.h index 1d708e869..4eb1a0ee6 100644 --- a/inc/runtime/rt_mem_queue.h +++ b/inc/runtime/rt_mem_queue.h @@ -229,6 +229,10 @@ RTS_API rtError_t rtMemGrpCreate(const char *name, const rtMemGrpConfig_t *cfg) RTS_API rtError_t rtMemGrpAddProc(const char *name, int32_t pid, const rtMemGrpShareAttr_t *attr) WEAKFUC; +RTS_API rtError_t rtBuffAlloc(uint64_t size, void **buff) WEAKFUC; + +RTS_API rtError_t rtMbufBuild(void *buff, uint64_t size, rtMbufPtr_t *mbuf) WEAKFUC; + RTS_API rtError_t rtMemGrpAttach(const char *name, int32_t timeout) WEAKFUC; RTS_API rtError_t rtMemGrpQuery(int32_t cmd, const rtMemGrpQueryInput_t *input, rtMemGrpQueryOutput_t *output) WEAKFUC; diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 13d6f2719..65324e8a4 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -306,6 +306,7 @@ void GeOp::Initialize(OpKernelConstruction *ctx) { mutex_lock lock{mu_}; int64 startTime = InferShapeUtil::GetCurrentTimestap(); ADP_LOG(INFO) << "[GEOP] Begin GeOp initialize."; + ADP_LOG(INFO) << "cgx 1115 GeOp::Initialize"; if (init_flag_) { ADP_LOG(WARNING) << "[GEOP] GEOP already Initialize."; return; diff --git a/tf_adapter/util/mbuf_allocator.cc b/tf_adapter/util/mbuf_allocator.cc new file mode 100644 index 000000000..dfd678da0 --- /dev/null +++ b/tf_adapter/util/mbuf_allocator.cc @@ -0,0 +1,191 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifdef TF_VERSION_TF2 + +#else + +#include +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/allocator_registry.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/variant.h" +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/mem.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/platform/types.h" +#include "tf_adapter/common/adapter_logger.h" +#include "inc/external/acl/error_codes/rt_error_codes.h" +#include "runtime/rt_mem_queue.h" +#include "graph/def_types.h" + + +namespace tensorflow { +// If true, cpu allocator collects more stats. +static bool cpu_allocator_collect_stats = false; +const uint64_t kRuntimeTensorDescSize = 1024UL; + +void EnableCPUAllocatorStats(bool enable) { + cpu_allocator_collect_stats = enable; +} + +bool CPUAllocatorStatsEnabled() { return cpu_allocator_collect_stats; } + +static const int kMaxTotalAllocationWarnings = 1; +static const int kMaxSingleAllocationWarnings = 5; +// If cpu_allocator_collect_stats is true, warn when the total allocated memory +// exceeds this threshold. +static const double kTotalAllocationWarningThreshold = 0.5; +// Individual allocations large than this amount will trigger a warning. +static const double kLargeAllocationWarningThreshold = 0.1; + +// Cache first invocation to port::AvailableRam, as it can be expensive. +static size_t LargeAllocationWarningBytes() { + static int64_t value = static_cast(port::AvailableRam() * + kLargeAllocationWarningThreshold); + return value; +} + +static int64_t TotalAllocationWarningBytes() { + static int64_t value = static_cast(port::AvailableRam() * + kTotalAllocationWarningThreshold); + return value; +} + +namespace { +// A default Allocator for CPU devices. ProcessState::GetCPUAllocator() will +// return a different version that may perform better, but may also lack the +// optional stats triggered by the functions above. todo_(tucker): migrate all +// uses of cpu_allocator() except tests to use ProcessState instead. +class CPUAllocator : public Allocator { +public: + CPUAllocator() + : single_allocation_warning_count_(0), + total_allocation_warning_count_(0) { + ADP_LOG(INFO) << "cgx 1126 CPUAllocator construct MbufInit already executed"; + } + + ~CPUAllocator() override { + ADP_LOG(INFO) << "cgx 1126 CPUAllocator ~CPUAllocator"; + } + + string Name() override { return "mbufAllocator"; } + + void *AllocateRaw(size_t alignment, size_t num_bytes) override { + ADP_LOG(INFO) << "cgx 1126 CPUAllocator AllocateRaw"; + if (num_bytes > LargeAllocationWarningBytes() && + single_allocation_warning_count_ < kMaxSingleAllocationWarnings) { + ++single_allocation_warning_count_; + ADP_LOG(WARNING) << "cgx 1126 Allocation of " << num_bytes << " exceeds " + << 100 * kLargeAllocationWarningThreshold + << "% of system memory."; + } + + void *p = port::AlignedMalloc(num_bytes, alignment); + if (cpu_allocator_collect_stats) { + const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p); + mutex_lock l(mu_); + ++stats_.num_allocs; + stats_.bytes_in_use += alloc_size; + stats_.peak_bytes_in_use = + std::max(stats_.peak_bytes_in_use, stats_.bytes_in_use); + stats_.largest_alloc_size = + std::max(stats_.largest_alloc_size, alloc_size); + + if (stats_.bytes_in_use > TotalAllocationWarningBytes() && + total_allocation_warning_count_ < kMaxTotalAllocationWarnings) { + ++total_allocation_warning_count_; + ADP_LOG(WARNING) << "cgx 1126 Total allocated memory " << stats_.bytes_in_use + << "exceeds " << 100 * kTotalAllocationWarningThreshold + << "% of system memory"; + } + } + ADP_LOG(INFO) << "cgx 1124 cpu_data=" << p; + return p; + } + void DeallocateRaw(void *ptr) override { + ADP_LOG(INFO) << "cgx 1126 CPUAllocator DeallocateRaw\n"; + if (cpu_allocator_collect_stats) { + const std::size_t alloc_size = + port::MallocExtension_GetAllocatedSize(ptr); + mutex_lock l(mu_); + stats_.bytes_in_use -= alloc_size; + } + port::AlignedFree(ptr); + } + + absl::optional GetStats() override { + mutex_lock l(mu_); + return stats_; + } + + void ClearStats() override { + mutex_lock l(mu_); + stats_.num_allocs = 0; + stats_.peak_bytes_in_use = stats_.bytes_in_use; + stats_.largest_alloc_size = 0; + } + + size_t AllocatedSizeSlow(const void *ptr) const override { + return port::MallocExtension_GetAllocatedSize(ptr); + } + +private: + mutex mu_; + AllocatorStats stats_ GUARDED_BY(mu_); + + // Use for single allocations to avoid mutex contention when + // statistics are disabled. + std::atomic single_allocation_warning_count_; + int total_allocation_warning_count_ GUARDED_BY(mu_); + + TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator); +}; + +class CPUAllocatorFactory : public AllocatorFactory { +public: + Allocator *CreateAllocator() override { return new CPUAllocator; } + + SubAllocator *CreateSubAllocator(int numa_node) override { + return new CPUSubAllocator(new CPUAllocator); + } + +private: + class CPUSubAllocator : public SubAllocator { + public: + explicit CPUSubAllocator(CPUAllocator *cpu_allocator) + : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) { + } + + void *Alloc(size_t alignment, size_t num_bytes) override { + ADP_LOG(INFO) << "cgx 1126 CPUSubAllocator Alloc"; + return cpu_allocator_->AllocateRaw(alignment, num_bytes); + } + + void Free(void *ptr, size_t num_bytes) override { + ADP_LOG(INFO) << "cgx 1126 CPUSubAllocator Free"; + cpu_allocator_->DeallocateRaw(ptr); + } + + private: + CPUAllocator *cpu_allocator_; + }; +}; +REGISTER_MEM_ALLOCATOR("DefaultMBUFAllocator", 110, CPUAllocatorFactory); +} // namespace +} // tensorflow +#endif -- Gitee