diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 8e5b403763d79f0b4499848c3d5d367615a40f19..591a1ec82bb04e7944dd949f7c8f16fe8078ca2f 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -80,6 +80,7 @@ #include "tf_adapter_2.x/npu_device/core/npu_micros.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tf_adapter/util/profiler.h" namespace tensorflow { #ifdef TF_VERSION_TF2 @@ -1132,6 +1133,9 @@ Status GeOp::RunGraph(OpKernelContext *ctx, const uint32_t &graph_id, const std::vector &inputs, ge::RunAsyncCallback callback) { // call ge session runGraphAsync api + if (!Profiler::GetInstance().Start()) { + return errors::Internal("Start profiling failed"); + } ADP_LOG(INFO) << "[GEOP] Call ge session RunGraphAsync, kernel_name: " << ctx->op_kernel().name() << ", tf session: " << tf_session_ << ", graph id: " << graph_id; diff --git a/tf_adapter/python/npu_bridge/profiler/profiler.py b/tf_adapter/python/npu_bridge/profiler/profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..6cd7f0ad99f828675531e6f8f6516d972ecc8b6a --- /dev/null +++ b/tf_adapter/python/npu_bridge/profiler/profiler.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from npu_bridge import tf_adapter +class profiler(object): + def __init__( + self, + *, + level: str = "L0", + aic_metrics: str = "", + output_path: str = "" + ): + if not isinstance(level, str): + raise ValueError('{} should be str'.format(level)) + self._level = level, + self._aic_metrics = aic_metrics + self._output_path = output_path + + def __enter__(self): + self.start() + return self + + def __exit__(self, exe_type, exe_val, exc_tb): + self.stop() + + def start(self): + if tf_adapter.ProfilerStart(self._level, self._aic_metrics, self._output_path) == False: + raise RuntimeError("Start profiler failed") + + def stop(self): + if tf_adapter.ProfilerStop() == False: + raise RuntimeError("Stop profiler failed") \ No newline at end of file diff --git a/tf_adapter/swig/ge_plugin.i b/tf_adapter/swig/ge_plugin.i index 8dbbb3949276994d5b54803a3b777a70d57742b4..617048e601f3ff54b9a9423f67aed71bc66b36f6 100644 --- a/tf_adapter/swig/ge_plugin.i +++ b/tf_adapter/swig/ge_plugin.i @@ -43,6 +43,7 @@ namespace std{ %{ #include "tf_adapter/util/npu_plugin.h" +#include "tf_adapter/util/profiler_interface.h" extern int32_t InitRdmaPool(size_t size); @@ -57,6 +58,10 @@ extern int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_ extern int32_t SetDeviceSatMode(uint32_t mode); extern int32_t GetDeviceSatMode(); + +extern bool ProfilerStart(const std::string &level, const std::string &aic_metrics, const std::string &output_path); + +extern bool ProfilerStop(); %} %template(var_info_vec) std::vector; @@ -130,3 +135,7 @@ extern int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_ extern int32_t SetDeviceSatMode(uint32_t mode); extern int32_t GetDeviceSatMode(); + +extern bool ProfilerStart(const std::string &level, const std::string &aic_metrics, const std::string &output_path); + +extern bool ProfilerStop(); \ No newline at end of file diff --git a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc index 94c71d9251154b35ed03204798660044798bc571..1a0102185afa71b4743d4c458bd141a54e12b20b 100644 --- a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc +++ b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc @@ -59,6 +59,31 @@ namespace acl { } } +aclError aclprofInit(const char *profilerResultPath, size_t length) { + return ACL_SUCCESS; +} + +aclError aclprofFinalize() { + return ACL_SUCCESS; +} + +aclError aclprofStart(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclError aclprofStop(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, + aclprofAicoreMetrics aicoreMetrics, const aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig) { + return nullptr; +} + aclError acltdtDestroyChannel(acltdtChannelHandle *handle) { if (handle == nullptr) { return ACL_ERROR_INVALID_PARAM; diff --git a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h index acabc023cf82c20f4a5ec757ab35d74073fa9c04..7d10a08cce807dd90e0a19ad287b3826160dfde3 100644 --- a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h +++ b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h @@ -21,6 +21,7 @@ #include "acl/acl_base.h" #include "acl/acl_tdt.h" #include "acl/acl_rt.h" +#include "acl/acl_prof.h" #include "graph/ascend_string.h" #include "graph/ge_error_codes.h" #include "graph/small_vector.h" diff --git a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc index 041a948d32dd34d50de59ad4a80b7c7000b601be..75ab230933e54354111e7bb9c58d3f394957d7e4 100644 --- a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc @@ -9,6 +9,7 @@ #include "gtest/gtest.h" #include "ge_stub.h" #include "callback_executor.h" +#include "tf_adapter/util/profiler_interface.h" #define private public #include "tf_adapter/kernels/geop_npu.h" #undef private @@ -488,6 +489,16 @@ TEST_F(GeOpTest, GeOpFuncSubGraphTest) { EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp12_0").ok()); } +TEST_F(GeOpTest, GeOpFuncTestWithProfiling) { + NpuClose(); + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(ProfilerStart("L0", "PipeUtilization", "./")); + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); + EXPECT_TRUE(ProfilerStop()); +} + TEST_F(GeOpTest, GeOpDynamicDimsTest) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_dims.pbtxt"; diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index e856fb82ed0db344abde85859bb9680664774ebc..1baa86155420431f064838750ef68ec85e67e5d4 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -12,6 +12,7 @@ #include "tf_adapter/util/npu_plugin.h" #include "tf_adapter/util/util.h" #include "callback_executor.h" +#include "tf_adapter/util/profiler_interface.h" #define private public #include "tf_adapter/kernels/geop_npu.h" #undef private @@ -236,6 +237,16 @@ TEST_F(GeOpTest, GeOpFuncTest) { EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); } +TEST_F(GeOpTest, GeOpFuncTestWithProfiling) { + NpuClose(); + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(ProfilerStart("L0", "PipeUtilization", "./")); + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); + EXPECT_TRUE(ProfilerStop()); +} + TEST_F(GeOpTest, GeOpFuncTest_NpuCompile) { NpuClose(); NodeDef node_def; diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index f970327d645135c857dd4368697bfeca400e9792..c22f8256b98b7c911beb4da81008d7acd675cbc5 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -31,6 +31,8 @@ #include "ge/ge_api.h" #include "ge/ge_api_types.h" #include "tf_adapter_2.x/npu_device/core/npu_micros.h" +#include "tf_adapter/util/profiler.h" + namespace tensorflow { namespace { bool kIsNewDataTransfer = true; @@ -1941,6 +1943,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options } if (params.count("profiling_mode") > 0) { profiling_mode = params.at("profiling_mode").b(); + if (profiling_mode && Profiler::GetInstance().IsEnableProfiler()) { + return errors::InvalidArgument("Option profiling_mode cannot set true when use tf_adapter.profiler"); + } } if (profiling_mode) { if (params.count("profiling_options") > 0 && params.at("profiling_options").s() != "") { diff --git a/tf_adapter/util/profiler.cc b/tf_adapter/util/profiler.cc new file mode 100644 index 0000000000000000000000000000000000000000..1ddf7317708b4fa6ec841d7bc08db58d6ae97e5d --- /dev/null +++ b/tf_adapter/util/profiler.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiler.h" +#include "tf_adapter/common/adapter_logger.h" +#include "npu_attrs.h" + +namespace { +constexpr uint64_t Level_none = 0; +constexpr uint64_t Level0 = ACL_PROF_TASK_TIME_L0 | ACL_PROF_ACL_API; +constexpr uint64_t Level1 = ACL_PROF_TASK_TIME | ACL_PROF_ACL_API | ACL_PROF_HCCL_TRACE | ACL_PROF_AICORE_METRICS; +constexpr uint64_t Level2 = Level1 | ACL_PROF_RUNTIME_API | ACL_PROF_AICPU; +std::map kNpuMetricsMap = { + {"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION}, + {"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION}, + {"MemoryBandwidth", ACL_AICORE_MEMORY_BANDWIDTH}, + {"L0bAndWidth", ACL_AICORE_L0B_AND_WIDTH}, + {"ResourceConflictRatio", ACL_AICORE_RESOURCE_CONFLICT_RATIO}, + {"MemoryUb", ACL_AICORE_MEMORY_UB}, + {"L2Cache", ACL_AICORE_L2_CACHE} +}; +std::map kProfilerLevelMap = { + {"L0", Level0}, + {"L1", Level1}, + {"L2", Level2}, + {"None", Level_none} +}; +} +namespace tensorflow { +Profiler &Profiler::GetInstance() { + static Profiler instance; + return instance; +} + +bool Profiler::GetLevel(const std::string &level) { + if (level.empty()) { + level_ = Level0; + return true; + } + const auto level_iter = kProfilerLevelMap.find(level); + if (level_iter != kProfilerLevelMap.cend()) { + level_ = level_iter->second; + return true; + } + ADP_LOG(ERROR) << "Profiling options: level cannot set to: " << level; + LOG(ERROR) << "Profiling options: level cannot set to: " << level; + return false; +} + +bool Profiler::GetAicMetrics(const std::string &aic_metrics) { + if (aic_metrics.empty()) { + aic_metrics_ = ACL_AICORE_NONE; + return true; + } + const auto metrics_iter = kNpuMetricsMap.find(aic_metrics); + if (metrics_iter != kNpuMetricsMap.cend()) { + aic_metrics_ = metrics_iter->second; + level_ |= ACL_PROF_AICORE_METRICS; + return true; + } + ADP_LOG(ERROR) << "Profiling options: aic_metrics cannot set to: " << aic_metrics; + return false; +} + +bool Profiler::EnableProfiler(const std::string &level, + const std::string &aic_metrics, const std::string &output_path) { + mutex_lock lock{mu_}; + if (enable_flag_) { + ADP_LOG(WARNING) << "Profiling has been enable"; + } + ADP_LOG(INFO) << "Enable profiling"; + if (!GetLevel(level)) { + return false; + } + if (!GetAicMetrics(aic_metrics)) { + return false; + } + output_path_ = output_path.empty() ? "./" : output_path; + enable_flag_ = true; + return true; +} + +bool Profiler::Start() { + mutex_lock lock{mu_}; + if (!enable_flag_) { + return true; + } + if (has_start_) { + ADP_LOG(WARNING) << "Profiling has been start"; + return true; + } + if (aclprofInit(output_path_.c_str(), output_path_.size()) != ACL_ERROR_NONE) { + return false; + } + uint32_t device_id = 0U; + if (!GetDeviceID(device_id).ok()) { + return false; + } + prof_config_ = aclprofCreateConfig(&device_id, 1U, aic_metrics_, nullptr, level_); + if (prof_config_ == nullptr) { + return false; + } + // prof_config_ = aclprofCreateConfig(&device_id, 1U, aic_metrics, nullptr); + if (aclprofStart(prof_config_) != ACL_ERROR_NONE) { + return false; + } + has_start_ = true; + return true; +} + +bool Profiler::Stop() { + mutex_lock lock{mu_}; + if (!enable_flag_) { + return true; + } + if (aclprofStop(prof_config_) != ACL_ERROR_NONE) { + return false; + } + if (aclprofDestroyConfig(prof_config_) != ACL_ERROR_NONE) { + return false; + } + prof_config_ = nullptr; + if (aclprofFinalize() != ACL_ERROR_NONE) { + return false; + } + has_start_ = false; + enable_flag_ = false; + return true; +} +} \ No newline at end of file diff --git a/tf_adapter/util/profiler.h b/tf_adapter/util/profiler.h new file mode 100644 index 0000000000000000000000000000000000000000..3c94e1964f04356fe85f3d876e56b18602a1de49 --- /dev/null +++ b/tf_adapter/util/profiler.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORFLOW_UTILS_PROFILER_H_ +#define TENSORFLOW_UTILS_PROFILER_H_ +#include "acl/acl_prof.h" +#include +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + class Profiler { + public: + static Profiler &GetInstance(); + bool EnableProfiler(const std::string &level, + const std::string &aic_metrics, const std::string &output_path); + bool Start(); + bool Stop(); + bool IsEnableProfiler() { return enable_flag_; } + private: + Profiler() = default; + explicit Profiler(const Profiler &obj) = delete; + Profiler& operator=(const Profiler &obj) = delete; + explicit Profiler(Profiler &&obj) = delete; + Profiler& operator=(Profiler &&obj) = delete; + bool GetLevel(const std::string &level); + bool GetAicMetrics(const std::string &aic_metrics); + aclprofConfig *prof_config_{nullptr}; + uint64_t level_{0UL}; + aclprofAicoreMetrics aic_metrics_{ACL_AICORE_NONE}; + bool enable_flag_{false}; + bool has_start_{false}; + mutex mu_; + std::string output_path_; + }; +} + +#endif \ No newline at end of file diff --git a/tf_adapter/util/profiler_interface.cc b/tf_adapter/util/profiler_interface.cc new file mode 100644 index 0000000000000000000000000000000000000000..29ec717a73aa113666845344bca7cf6969bad49d --- /dev/null +++ b/tf_adapter/util/profiler_interface.cc @@ -0,0 +1,28 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiler_interface.h" +#include "profiler.h" + +bool ProfilerStart(const std::string &level, + const std::string &aic_metrics, + const std::string &output_path) { + return tensorflow::Profiler::GetInstance().EnableProfiler(level, aic_metrics, output_path); +} + +bool ProfilerStop() { + return tensorflow::Profiler::GetInstance().Stop(); +} \ No newline at end of file diff --git a/tf_adapter/util/profiler_interface.h b/tf_adapter/util/profiler_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..be386a1d213bd5fc733f527e7c9358ef3e590057 --- /dev/null +++ b/tf_adapter/util/profiler_interface.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORFLOW_UTILS_PROFILER_INTERFACE_H_ +#define TENSORFLOW_UTILS_PROFILER_INTERFACE_H_ +#include + +bool ProfilerStart(const std::string &level, const std::string &aic_metrics, const std::string &output_path); + +bool ProfilerStop(); + +#endif \ No newline at end of file diff --git a/tf_adapter_2.x/python/npu_device/configs/option_base.py b/tf_adapter_2.x/python/npu_device/configs/option_base.py index 8fe4d262325d165eb7cb21a94e38a93ecd4cbc45..526aac6e40c27050fe353717a8d0b2f7c5bd4c84 100644 --- a/tf_adapter_2.x/python/npu_device/configs/option_base.py +++ b/tf_adapter_2.x/python/npu_device/configs/option_base.py @@ -15,8 +15,6 @@ # ============================================================================== """NPU basic configurations""" - - class OptionValue: """Options for setting npu basic configurations""" def __init__(self, default, optional): diff --git a/tf_adapter_2.x/tests/stub/acl_stub.cpp b/tf_adapter_2.x/tests/stub/acl_stub.cpp index 8d1c7393b2d3553f00d3476fc71487d6deab7216..f01408cfdc093c6a421ca5e80e18401dc686468a 100644 --- a/tf_adapter_2.x/tests/stub/acl_stub.cpp +++ b/tf_adapter_2.x/tests/stub/acl_stub.cpp @@ -22,6 +22,7 @@ limitations under the License. #include "acl/acl_op_compiler.h" #include "acl/acl_rt.h" #include "acl/acl_tdt.h" +#include "acl/acl_prof.h" namespace { const uint32_t kDeviceSatModeLimit = 2U; @@ -65,6 +66,31 @@ struct acltdtChannelHandle { extern "C" { #endif +aclError aclprofInit(const char *profilerResultPath, size_t length) { + return ACL_SUCCESS; +} + +aclError aclprofFinalize() { + return ACL_SUCCESS; +} + +aclError aclprofStart(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclError aclprofStop(const aclprofConfig *profilerConfig) { + return ACL_SUCCESS; +} + +aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, + aclprofAicoreMetrics aicoreMetrics, const aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig) { + return nullptr; +} + aclError aclopCompileAndExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],