From 9450675b4f5aad84529c1dc62d25780c55ec7a92 Mon Sep 17 00:00:00 2001 From: guopeian Date: Tue, 2 Jul 2024 16:44:45 +0800 Subject: [PATCH 1/2] profilier --- tf_adapter/kernels/geop_npu.cc | 2 + .../python/npu_bridge/profiler/profiler.py | 44 +++++++ tf_adapter/swig/ge_plugin.i | 9 ++ .../depends/ascendcl/src/ascendcl_stub.cc | 25 ++++ .../depends/ascendcl/src/ascendcl_stub.h | 1 + .../st/kernels/testcase/geop_npu_test.cc | 11 ++ .../ut/kernels/testcase/geop_npu_test.cc | 11 ++ tf_adapter/util/npu_attrs.cc | 5 + tf_adapter/util/profiler.cc | 116 ++++++++++++++++++ tf_adapter/util/profiler.h | 48 ++++++++ tf_adapter/util/profiler_interface.cc | 28 +++++ tf_adapter/util/profiler_interface.h | 25 ++++ .../python/npu_device/configs/option_base.py | 2 - tf_adapter_2.x/tests/stub/acl_stub.cpp | 26 ++++ 14 files changed, 351 insertions(+), 2 deletions(-) create mode 100644 tf_adapter/python/npu_bridge/profiler/profiler.py create mode 100644 tf_adapter/util/profiler.cc create mode 100644 tf_adapter/util/profiler.h create mode 100644 tf_adapter/util/profiler_interface.cc create mode 100644 tf_adapter/util/profiler_interface.h diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 8e5b40376..2571608df 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -80,6 +80,7 @@ #include "tf_adapter_2.x/npu_device/core/npu_micros.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tf_adapter/util/profiler.h" namespace tensorflow { #ifdef TF_VERSION_TF2 @@ -1415,6 +1416,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { << ", kernel_name:" << ctx->op_kernel().name() << "[ " << (run_end_time - run_start_time) << "us]"; done(); }; + OP_REQUIRES_ASYNC(ctx, Profiler::GetInstance().Start(), errors::Internal("Start profiling failed"), done); OP_REQUIRES_OK_ASYNC(ctx, CompileAndRunGraph(ctx, input_vec, inputs, input_shapes, callback, done), done); int64_t end_time = InferShapeUtil::GetCurrentTimestap(); diff --git a/tf_adapter/python/npu_bridge/profiler/profiler.py b/tf_adapter/python/npu_bridge/profiler/profiler.py new file mode 100644 index 000000000..3fedaa7a0 --- /dev/null +++ b/tf_adapter/python/npu_bridge/profiler/profiler.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from npu_bridge import tf_adapter +class profiler(object): + def __init__( + self, + *, + task_time = True, + aic_metrics = "", + output_path = "" + ): + self._task_time = task_time + self._aic_metrics = aic_metrics + self._output_path = output_path + + def __enter__(self): + self.start() + return self + + def __exit__(self, exe_type, exe_val, exc_tb): + self.stop() + + def start(self): + if tf_adapter.ProfilerStart(self._task_time, self._aic_metrics, self._output_path) == False: + raise RuntimeError("Start profiler failed") + + def stop(self): + if tf_adapter.ProfilerStop() == False: + raise RuntimeError("Stop profiler failed") \ No newline at end of file diff --git a/tf_adapter/swig/ge_plugin.i b/tf_adapter/swig/ge_plugin.i index 8dbbb3949..e7390e6c8 100644 --- a/tf_adapter/swig/ge_plugin.i +++ b/tf_adapter/swig/ge_plugin.i @@ -43,6 +43,7 @@ namespace std{ %{ #include "tf_adapter/util/npu_plugin.h" +#include "tf_adapter/util/profiler_interface.h" extern int32_t InitRdmaPool(size_t size); @@ -57,6 +58,10 @@ extern int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_ extern int32_t SetDeviceSatMode(uint32_t mode); extern int32_t GetDeviceSatMode(); + +extern bool ProfilerStart(const bool &task_time, const std::string &aic_metrics, const std::string &output_path); + +extern bool ProfilerStop(); %} %template(var_info_vec) std::vector; @@ -130,3 +135,7 @@ extern int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_ extern int32_t SetDeviceSatMode(uint32_t mode); extern int32_t GetDeviceSatMode(); + +extern bool ProfilerStart(const bool &task_time, const std::string &aic_metrics, const std::string &output_path); + +extern bool ProfilerStop(); \ No newline at end of file diff --git a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc index 94c71d925..1a0102185 100644 --- a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc +++ b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc @@ -59,6 +59,31 @@ namespace acl { } } +aclError aclprofInit(const char *profilerResultPath, size_t length) { + return ACL_SUCCESS; +} + +aclError aclprofFinalize() { + return ACL_SUCCESS; +} + +aclError aclprofStart(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclError aclprofStop(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, + aclprofAicoreMetrics aicoreMetrics, const aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig) { + return nullptr; +} + aclError acltdtDestroyChannel(acltdtChannelHandle *handle) { if (handle == nullptr) { return ACL_ERROR_INVALID_PARAM; diff --git a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h index acabc023c..7d10a08cc 100644 --- a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h +++ b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h @@ -21,6 +21,7 @@ #include "acl/acl_base.h" #include "acl/acl_tdt.h" #include "acl/acl_rt.h" +#include "acl/acl_prof.h" #include "graph/ascend_string.h" #include "graph/ge_error_codes.h" #include "graph/small_vector.h" diff --git a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc index 041a948d3..9cb6f6d95 100644 --- a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc @@ -9,6 +9,7 @@ #include "gtest/gtest.h" #include "ge_stub.h" #include "callback_executor.h" +#include "tf_adapter/util/profiler_interface.h" #define private public #include "tf_adapter/kernels/geop_npu.h" #undef private @@ -488,6 +489,16 @@ TEST_F(GeOpTest, GeOpFuncSubGraphTest) { EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp12_0").ok()); } +TEST_F(GeOpTest, GeOpFuncTestWithProfiling) { + NpuClose(); + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(ProfilerStart(true, "PipeUtilization", "./")); + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); + EXPECT_TRUE(ProfilerStop()); +} + TEST_F(GeOpTest, GeOpDynamicDimsTest) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_dims.pbtxt"; diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index e856fb82e..3b491bb51 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -12,6 +12,7 @@ #include "tf_adapter/util/npu_plugin.h" #include "tf_adapter/util/util.h" #include "callback_executor.h" +#include "tf_adapter/util/profiler_interface.h" #define private public #include "tf_adapter/kernels/geop_npu.h" #undef private @@ -236,6 +237,16 @@ TEST_F(GeOpTest, GeOpFuncTest) { EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); } +TEST_F(GeOpTest, GeOpFuncTestWithProfiling) { + NpuClose(); + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(ProfilerStart(true, "PipeUtilization", "./")); + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); + EXPECT_TRUE(ProfilerStop()); +} + TEST_F(GeOpTest, GeOpFuncTest_NpuCompile) { NpuClose(); NodeDef node_def; diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index f970327d6..c22f8256b 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -31,6 +31,8 @@ #include "ge/ge_api.h" #include "ge/ge_api_types.h" #include "tf_adapter_2.x/npu_device/core/npu_micros.h" +#include "tf_adapter/util/profiler.h" + namespace tensorflow { namespace { bool kIsNewDataTransfer = true; @@ -1941,6 +1943,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options } if (params.count("profiling_mode") > 0) { profiling_mode = params.at("profiling_mode").b(); + if (profiling_mode && Profiler::GetInstance().IsEnableProfiler()) { + return errors::InvalidArgument("Option profiling_mode cannot set true when use tf_adapter.profiler"); + } } if (profiling_mode) { if (params.count("profiling_options") > 0 && params.at("profiling_options").s() != "") { diff --git a/tf_adapter/util/profiler.cc b/tf_adapter/util/profiler.cc new file mode 100644 index 000000000..a6fa6521a --- /dev/null +++ b/tf_adapter/util/profiler.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiler.h" +#include "tf_adapter/common/adapter_logger.h" +#include "npu_attrs.h" + +namespace { +std::map kNpuMetricsMap = { + {"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION}, + {"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION}, + {"MemoryBandwidth", ACL_AICORE_MEMORY_BANDWIDTH}, + {"L0bAndWidth", ACL_AICORE_L0B_AND_WIDTH}, + {"ResourceConflictRatio", ACL_AICORE_RESOURCE_CONFLICT_RATIO}, + {"MemoryUb", ACL_AICORE_MEMORY_UB}, + {"L2Cache", ACL_AICORE_L2_CACHE}, + {"", ACL_AICORE_NONE} +}; +} +namespace tensorflow { +Profiler &Profiler::GetInstance() { + static Profiler instance; + return instance; +} + +bool Profiler::EnableProfiler(const bool &task_time, + const std::string &aic_metrics, const std::string &output_path) { + mutex_lock lock{mu_}; + if (enable_flag_) { + ADP_LOG(WARNING) << "Profiling has been enable"; + } + ADP_LOG(INFO) << "Enable profiling"; + const auto iter = kNpuMetricsMap.find(aic_metrics); + if (iter != kNpuMetricsMap.cend()) { + aic_metrics_ = iter->second; + } else { + ADP_LOG(ERROR) << "Profiling options: aic_metrics cannot set to: " << aic_metrics; + return false; + } + task_time_ = task_time; + output_path_ = output_path; + enable_flag_ = true; + return true; +} + +bool Profiler::Start() { + mutex_lock lock{mu_}; + if (!enable_flag_) { + return true; + } + if (has_start_) { + ADP_LOG(WARNING) << "Profiling has been start"; + return true; + } + if (output_path_.empty()) { + output_path_ = "./"; + } + if (aclprofInit(output_path_.c_str(), output_path_.size()) != ACL_ERROR_NONE) { + return false; + } + uint32_t device_id = 0U; + if (!GetDeviceID(device_id).ok()) { + return false; + } + uint64_t datatype_config = 0UL; + if (task_time_) { + datatype_config |= ACL_PROF_TASK_TIME; + } + if (aic_metrics_ != ACL_AICORE_NONE) { + datatype_config |= ACL_PROF_AICORE_METRICS; + } + prof_config_ = aclprofCreateConfig(&device_id, 1U, aic_metrics_, nullptr, datatype_config); + if (prof_config_ == nullptr) { + return false; + } + // prof_config_ = aclprofCreateConfig(&device_id, 1U, aic_metrics, nullptr); + if (aclprofStart(prof_config_) != ACL_ERROR_NONE) { + return false; + } + has_start_ = true; + return true; +} + +bool Profiler::Stop() { + mutex_lock lock{mu_}; + if (!enable_flag_) { + return true; + } + if (aclprofStop(prof_config_) != ACL_ERROR_NONE) { + return false; + } + if (aclprofDestroyConfig(prof_config_) != ACL_ERROR_NONE) { + return false; + } + prof_config_ = nullptr; + if (aclprofFinalize() != ACL_ERROR_NONE) { + return false; + } + has_start_ = false; + enable_flag_ = false; + return true; +} +} \ No newline at end of file diff --git a/tf_adapter/util/profiler.h b/tf_adapter/util/profiler.h new file mode 100644 index 000000000..ff95bf477 --- /dev/null +++ b/tf_adapter/util/profiler.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORFLOW_UTILS_PROFILER_H_ +#define TENSORFLOW_UTILS_PROFILER_H_ +#include "acl/acl_prof.h" +#include +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + class Profiler { + public: + static Profiler &GetInstance(); + bool EnableProfiler(const bool &task_time, + const std::string &aic_metrics, const std::string &output_path); + bool Start(); + bool Stop(); + bool IsEnableProfiler() { return enable_flag_; } + private: + Profiler() = default; + explicit Profiler(const Profiler &obj) = delete; + Profiler& operator=(const Profiler &obj) = delete; + explicit Profiler(Profiler &&obj) = delete; + Profiler& operator=(Profiler &&obj) = delete; + aclprofConfig *prof_config_{nullptr}; + bool task_time_{false}; + aclprofAicoreMetrics aic_metrics_{ACL_AICORE_NONE}; + bool enable_flag_{false}; + bool has_start_{false}; + mutex mu_; + std::string output_path_; + }; +} + +#endif \ No newline at end of file diff --git a/tf_adapter/util/profiler_interface.cc b/tf_adapter/util/profiler_interface.cc new file mode 100644 index 000000000..3f3fc673a --- /dev/null +++ b/tf_adapter/util/profiler_interface.cc @@ -0,0 +1,28 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiler_interface.h" +#include "profiler.h" + +bool ProfilerStart(const bool &task_time, + const std::string &aic_metrics, + const std::string &output_path) { + return tensorflow::Profiler::GetInstance().EnableProfiler(task_time, aic_metrics, output_path); +} + +bool ProfilerStop() { + return tensorflow::Profiler::GetInstance().Stop(); +} \ No newline at end of file diff --git a/tf_adapter/util/profiler_interface.h b/tf_adapter/util/profiler_interface.h new file mode 100644 index 000000000..27aba7f63 --- /dev/null +++ b/tf_adapter/util/profiler_interface.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORFLOW_UTILS_PROFILER_INTERFACE_H_ +#define TENSORFLOW_UTILS_PROFILER_INTERFACE_H_ +#include + +bool ProfilerStart(const bool &task_time, const std::string &aic_metrics, const std::string &output_path); + +bool ProfilerStop(); + +#endif \ No newline at end of file diff --git a/tf_adapter_2.x/python/npu_device/configs/option_base.py b/tf_adapter_2.x/python/npu_device/configs/option_base.py index 8fe4d2623..526aac6e4 100644 --- a/tf_adapter_2.x/python/npu_device/configs/option_base.py +++ b/tf_adapter_2.x/python/npu_device/configs/option_base.py @@ -15,8 +15,6 @@ # ============================================================================== """NPU basic configurations""" - - class OptionValue: """Options for setting npu basic configurations""" def __init__(self, default, optional): diff --git a/tf_adapter_2.x/tests/stub/acl_stub.cpp b/tf_adapter_2.x/tests/stub/acl_stub.cpp index 8d1c7393b..f01408cfd 100644 --- a/tf_adapter_2.x/tests/stub/acl_stub.cpp +++ b/tf_adapter_2.x/tests/stub/acl_stub.cpp @@ -22,6 +22,7 @@ limitations under the License. #include "acl/acl_op_compiler.h" #include "acl/acl_rt.h" #include "acl/acl_tdt.h" +#include "acl/acl_prof.h" namespace { const uint32_t kDeviceSatModeLimit = 2U; @@ -65,6 +66,31 @@ struct acltdtChannelHandle { extern "C" { #endif +aclError aclprofInit(const char *profilerResultPath, size_t length) { + return ACL_SUCCESS; +} + +aclError aclprofFinalize() { + return ACL_SUCCESS; +} + +aclError aclprofStart(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclError aclprofStop(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, + aclprofAicoreMetrics aicoreMetrics, const aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig) { + return nullptr; +} + aclError aclopCompileAndExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], -- Gitee From 53f8982c6c4c2a8c3cec625c9b16c66951b6a7af Mon Sep 17 00:00:00 2001 From: guopeian Date: Thu, 11 Jul 2024 21:06:14 +0800 Subject: [PATCH 2/2] Level --- tf_adapter/kernels/geop_npu.cc | 4 +- .../python/npu_bridge/profiler/profiler.py | 12 ++-- tf_adapter/swig/ge_plugin.i | 4 +- .../st/kernels/testcase/geop_npu_test.cc | 2 +- .../ut/kernels/testcase/geop_npu_test.cc | 2 +- tf_adapter/util/profiler.cc | 69 +++++++++++++------ tf_adapter/util/profiler.h | 6 +- tf_adapter/util/profiler_interface.cc | 4 +- tf_adapter/util/profiler_interface.h | 2 +- 9 files changed, 69 insertions(+), 36 deletions(-) diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 2571608df..591a1ec82 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -1133,6 +1133,9 @@ Status GeOp::RunGraph(OpKernelContext *ctx, const uint32_t &graph_id, const std::vector &inputs, ge::RunAsyncCallback callback) { // call ge session runGraphAsync api + if (!Profiler::GetInstance().Start()) { + return errors::Internal("Start profiling failed"); + } ADP_LOG(INFO) << "[GEOP] Call ge session RunGraphAsync, kernel_name: " << ctx->op_kernel().name() << ", tf session: " << tf_session_ << ", graph id: " << graph_id; @@ -1416,7 +1419,6 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { << ", kernel_name:" << ctx->op_kernel().name() << "[ " << (run_end_time - run_start_time) << "us]"; done(); }; - OP_REQUIRES_ASYNC(ctx, Profiler::GetInstance().Start(), errors::Internal("Start profiling failed"), done); OP_REQUIRES_OK_ASYNC(ctx, CompileAndRunGraph(ctx, input_vec, inputs, input_shapes, callback, done), done); int64_t end_time = InferShapeUtil::GetCurrentTimestap(); diff --git a/tf_adapter/python/npu_bridge/profiler/profiler.py b/tf_adapter/python/npu_bridge/profiler/profiler.py index 3fedaa7a0..6cd7f0ad9 100644 --- a/tf_adapter/python/npu_bridge/profiler/profiler.py +++ b/tf_adapter/python/npu_bridge/profiler/profiler.py @@ -20,11 +20,13 @@ class profiler(object): def __init__( self, *, - task_time = True, - aic_metrics = "", - output_path = "" + level: str = "L0", + aic_metrics: str = "", + output_path: str = "" ): - self._task_time = task_time + if not isinstance(level, str): + raise ValueError('{} should be str'.format(level)) + self._level = level, self._aic_metrics = aic_metrics self._output_path = output_path @@ -36,7 +38,7 @@ class profiler(object): self.stop() def start(self): - if tf_adapter.ProfilerStart(self._task_time, self._aic_metrics, self._output_path) == False: + if tf_adapter.ProfilerStart(self._level, self._aic_metrics, self._output_path) == False: raise RuntimeError("Start profiler failed") def stop(self): diff --git a/tf_adapter/swig/ge_plugin.i b/tf_adapter/swig/ge_plugin.i index e7390e6c8..617048e60 100644 --- a/tf_adapter/swig/ge_plugin.i +++ b/tf_adapter/swig/ge_plugin.i @@ -59,7 +59,7 @@ extern int32_t SetDeviceSatMode(uint32_t mode); extern int32_t GetDeviceSatMode(); -extern bool ProfilerStart(const bool &task_time, const std::string &aic_metrics, const std::string &output_path); +extern bool ProfilerStart(const std::string &level, const std::string &aic_metrics, const std::string &output_path); extern bool ProfilerStop(); %} @@ -136,6 +136,6 @@ extern int32_t SetDeviceSatMode(uint32_t mode); extern int32_t GetDeviceSatMode(); -extern bool ProfilerStart(const bool &task_time, const std::string &aic_metrics, const std::string &output_path); +extern bool ProfilerStart(const std::string &level, const std::string &aic_metrics, const std::string &output_path); extern bool ProfilerStop(); \ No newline at end of file diff --git a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc index 9cb6f6d95..75ab23093 100644 --- a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc @@ -494,7 +494,7 @@ TEST_F(GeOpTest, GeOpFuncTestWithProfiling) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt"; gtl::InlinedVector inputs; - EXPECT_TRUE(ProfilerStart(true, "PipeUtilization", "./")); + EXPECT_TRUE(ProfilerStart("L0", "PipeUtilization", "./")); EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); EXPECT_TRUE(ProfilerStop()); } diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index 3b491bb51..1baa86155 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -242,7 +242,7 @@ TEST_F(GeOpTest, GeOpFuncTestWithProfiling) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt"; gtl::InlinedVector inputs; - EXPECT_TRUE(ProfilerStart(true, "PipeUtilization", "./")); + EXPECT_TRUE(ProfilerStart("L0", "PipeUtilization", "./")); EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); EXPECT_TRUE(ProfilerStop()); } diff --git a/tf_adapter/util/profiler.cc b/tf_adapter/util/profiler.cc index a6fa6521a..1ddf73177 100644 --- a/tf_adapter/util/profiler.cc +++ b/tf_adapter/util/profiler.cc @@ -19,6 +19,10 @@ #include "npu_attrs.h" namespace { +constexpr uint64_t Level_none = 0; +constexpr uint64_t Level0 = ACL_PROF_TASK_TIME_L0 | ACL_PROF_ACL_API; +constexpr uint64_t Level1 = ACL_PROF_TASK_TIME | ACL_PROF_ACL_API | ACL_PROF_HCCL_TRACE | ACL_PROF_AICORE_METRICS; +constexpr uint64_t Level2 = Level1 | ACL_PROF_RUNTIME_API | ACL_PROF_AICPU; std::map kNpuMetricsMap = { {"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION}, {"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION}, @@ -26,8 +30,13 @@ std::map kNpuMetricsMap = { {"L0bAndWidth", ACL_AICORE_L0B_AND_WIDTH}, {"ResourceConflictRatio", ACL_AICORE_RESOURCE_CONFLICT_RATIO}, {"MemoryUb", ACL_AICORE_MEMORY_UB}, - {"L2Cache", ACL_AICORE_L2_CACHE}, - {"", ACL_AICORE_NONE} + {"L2Cache", ACL_AICORE_L2_CACHE} +}; +std::map kProfilerLevelMap = { + {"L0", Level0}, + {"L1", Level1}, + {"L2", Level2}, + {"None", Level_none} }; } namespace tensorflow { @@ -36,22 +45,50 @@ Profiler &Profiler::GetInstance() { return instance; } -bool Profiler::EnableProfiler(const bool &task_time, +bool Profiler::GetLevel(const std::string &level) { + if (level.empty()) { + level_ = Level0; + return true; + } + const auto level_iter = kProfilerLevelMap.find(level); + if (level_iter != kProfilerLevelMap.cend()) { + level_ = level_iter->second; + return true; + } + ADP_LOG(ERROR) << "Profiling options: level cannot set to: " << level; + LOG(ERROR) << "Profiling options: level cannot set to: " << level; + return false; +} + +bool Profiler::GetAicMetrics(const std::string &aic_metrics) { + if (aic_metrics.empty()) { + aic_metrics_ = ACL_AICORE_NONE; + return true; + } + const auto metrics_iter = kNpuMetricsMap.find(aic_metrics); + if (metrics_iter != kNpuMetricsMap.cend()) { + aic_metrics_ = metrics_iter->second; + level_ |= ACL_PROF_AICORE_METRICS; + return true; + } + ADP_LOG(ERROR) << "Profiling options: aic_metrics cannot set to: " << aic_metrics; + return false; +} + +bool Profiler::EnableProfiler(const std::string &level, const std::string &aic_metrics, const std::string &output_path) { mutex_lock lock{mu_}; if (enable_flag_) { ADP_LOG(WARNING) << "Profiling has been enable"; } ADP_LOG(INFO) << "Enable profiling"; - const auto iter = kNpuMetricsMap.find(aic_metrics); - if (iter != kNpuMetricsMap.cend()) { - aic_metrics_ = iter->second; - } else { - ADP_LOG(ERROR) << "Profiling options: aic_metrics cannot set to: " << aic_metrics; + if (!GetLevel(level)) { return false; } - task_time_ = task_time; - output_path_ = output_path; + if (!GetAicMetrics(aic_metrics)) { + return false; + } + output_path_ = output_path.empty() ? "./" : output_path; enable_flag_ = true; return true; } @@ -65,9 +102,6 @@ bool Profiler::Start() { ADP_LOG(WARNING) << "Profiling has been start"; return true; } - if (output_path_.empty()) { - output_path_ = "./"; - } if (aclprofInit(output_path_.c_str(), output_path_.size()) != ACL_ERROR_NONE) { return false; } @@ -75,14 +109,7 @@ bool Profiler::Start() { if (!GetDeviceID(device_id).ok()) { return false; } - uint64_t datatype_config = 0UL; - if (task_time_) { - datatype_config |= ACL_PROF_TASK_TIME; - } - if (aic_metrics_ != ACL_AICORE_NONE) { - datatype_config |= ACL_PROF_AICORE_METRICS; - } - prof_config_ = aclprofCreateConfig(&device_id, 1U, aic_metrics_, nullptr, datatype_config); + prof_config_ = aclprofCreateConfig(&device_id, 1U, aic_metrics_, nullptr, level_); if (prof_config_ == nullptr) { return false; } diff --git a/tf_adapter/util/profiler.h b/tf_adapter/util/profiler.h index ff95bf477..3c94e1964 100644 --- a/tf_adapter/util/profiler.h +++ b/tf_adapter/util/profiler.h @@ -24,7 +24,7 @@ namespace tensorflow { class Profiler { public: static Profiler &GetInstance(); - bool EnableProfiler(const bool &task_time, + bool EnableProfiler(const std::string &level, const std::string &aic_metrics, const std::string &output_path); bool Start(); bool Stop(); @@ -35,8 +35,10 @@ namespace tensorflow { Profiler& operator=(const Profiler &obj) = delete; explicit Profiler(Profiler &&obj) = delete; Profiler& operator=(Profiler &&obj) = delete; + bool GetLevel(const std::string &level); + bool GetAicMetrics(const std::string &aic_metrics); aclprofConfig *prof_config_{nullptr}; - bool task_time_{false}; + uint64_t level_{0UL}; aclprofAicoreMetrics aic_metrics_{ACL_AICORE_NONE}; bool enable_flag_{false}; bool has_start_{false}; diff --git a/tf_adapter/util/profiler_interface.cc b/tf_adapter/util/profiler_interface.cc index 3f3fc673a..29ec717a7 100644 --- a/tf_adapter/util/profiler_interface.cc +++ b/tf_adapter/util/profiler_interface.cc @@ -17,10 +17,10 @@ #include "profiler_interface.h" #include "profiler.h" -bool ProfilerStart(const bool &task_time, +bool ProfilerStart(const std::string &level, const std::string &aic_metrics, const std::string &output_path) { - return tensorflow::Profiler::GetInstance().EnableProfiler(task_time, aic_metrics, output_path); + return tensorflow::Profiler::GetInstance().EnableProfiler(level, aic_metrics, output_path); } bool ProfilerStop() { diff --git a/tf_adapter/util/profiler_interface.h b/tf_adapter/util/profiler_interface.h index 27aba7f63..be386a1d2 100644 --- a/tf_adapter/util/profiler_interface.h +++ b/tf_adapter/util/profiler_interface.h @@ -18,7 +18,7 @@ #define TENSORFLOW_UTILS_PROFILER_INTERFACE_H_ #include -bool ProfilerStart(const bool &task_time, const std::string &aic_metrics, const std::string &output_path); +bool ProfilerStart(const std::string &level, const std::string &aic_metrics, const std::string &output_path); bool ProfilerStop(); -- Gitee