From fd44e5c0810eb0bba52a91cb65e113c792226ca6 Mon Sep 17 00:00:00 2001 From: medivh-x Date: Tue, 25 May 2021 21:48:21 +0800 Subject: [PATCH 1/3] add tensorflow debug dump tools for gpu --- tf_adapter_2.x/CMakeLists.txt | 11 +- .../toolkits/tfdbg_ascend/CMakeLists.txt | 22 +++ .../toolkits/tfdbg_ascend/dump_op.cpp | 57 ++++++++ .../toolkits/tfdbg_ascend/dump_pass.cpp | 127 ++++++++++++++++++ .../toolkits/tfdbg_ascend/python/MANIFEST.in | 2 + .../toolkits/tfdbg_ascend/python/setup.py | 11 ++ .../python/tfdbg_ascend/__init__.py | 2 + .../python/tfdbg_ascend/tfdbg_ascend.py | 22 +++ 8 files changed, 250 insertions(+), 4 deletions(-) create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/CMakeLists.txt create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/python/MANIFEST.in create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/python/setup.py create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/__init__.py create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py diff --git a/tf_adapter_2.x/CMakeLists.txt b/tf_adapter_2.x/CMakeLists.txt index 985e56b85..aed583999 100644 --- a/tf_adapter_2.x/CMakeLists.txt +++ b/tf_adapter_2.x/CMakeLists.txt @@ -13,7 +13,7 @@ if (DEFINED ASCEND_CI_BUILD_DIR) set(CMAKE_C_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 ${CMAKE_C_FLAGS}") set(CMAKE_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 ${CMAKE_CXX_FLAGS}") include_directories(${PYTHON_INCLUDE_DIR}) -else() +else () if (NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/COMPILE_FLAGS OR NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/TF_INSTALLED_PATH OR NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/ASCEND_INSTALLED_PATH OR NOT EXISTS @@ -36,14 +36,17 @@ include(${CMAKE_CURRENT_LIST_DIR}/cmake/acl/module.cmake) include(${CMAKE_CURRENT_LIST_DIR}/cmake/tensorflow/module.cmake) include(${CMAKE_CURRENT_LIST_DIR}/cmake/graph_engine/module.cmake) -file(COPY ${CMAKE_CURRENT_LIST_DIR}/python DESTINATION ${CMAKE_BINARY_DIR}/dist) -set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/dist/python/npu_device) +set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/dist/libs) add_subdirectory(npu_device) add_subdirectory(npu_ops) +add_subdirectory(toolkits/tfdbg_ascend) add_custom_target(ascend_adapter2 ALL - COMMAND cd ${CMAKE_BINARY_DIR}/dist/python/ && ${PYTHON_BIN_PATH} setup.py bdist_wheel + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_LIST_DIR}/python ${CMAKE_BINARY_DIR}/dist/ascend_adapter2 + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/dist/libs/_npu_device_backends.so ${CMAKE_BINARY_DIR}/dist/ascend_adapter2/npu_device + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/dist/libs/_npu_ops.so ${CMAKE_BINARY_DIR}/dist/ascend_adapter2/npu_device + COMMAND cd ${CMAKE_BINARY_DIR}/dist/ascend_adapter2 && ${PYTHON_BIN_PATH} setup.py bdist_wheel DEPENDS _npu_ops _npu_device_backends VERBATIM) diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/CMakeLists.txt b/tf_adapter_2.x/toolkits/tfdbg_ascend/CMakeLists.txt new file mode 100644 index 000000000..53af073ea --- /dev/null +++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.14) +project(TfdbgAscend) + +file(GLOB_RECURSE SOURCES ${CMAKE_CURRENT_LIST_DIR}/*.cpp) +foreach (CPP_SOURCE ${SOURCES}) + file(RELATIVE_PATH RELATIVE_CPP_SOURCE ${CMAKE_CURRENT_SOURCE_DIR} ${CPP_SOURCE}) + set_property(SOURCE ${CPP_SOURCE} PROPERTY COMPILE_DEFINITIONS __FILE__=\"${RELATIVE_CPP_SOURCE}\") +endforeach (CPP_SOURCE) + +add_library(_tfdbg_ascend SHARED EXCLUDE_FROM_ALL ${SOURCES}) +set_target_properties(_tfdbg_ascend PROPERTIES PREFIX "") + +target_link_libraries(_tfdbg_ascend PRIVATE + tensorflow_libs) + +add_custom_target(tfdbg_ascend + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_LIST_DIR}/python ${CMAKE_BINARY_DIR}/dist/tfdbg_ascend + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/dist/libs/_tfdbg_ascend.so ${CMAKE_BINARY_DIR}/dist/tfdbg_ascend/tfdbg_ascend + COMMAND cd ${CMAKE_BINARY_DIR}/dist/tfdbg_ascend && ${PYTHON_BIN_PATH} setup.py bdist_wheel + DEPENDS _tfdbg_ascend + VERBATIM + ) diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp new file mode 100644 index 000000000..6cd6dee44 --- /dev/null +++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp @@ -0,0 +1,57 @@ +/* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "absl/algorithm/container.h" +#include "absl/memory/memory.h" +#include "tensorflow/c/c_api.h" +#include "tensorflow/c/c_api_internal.h" +#include "tensorflow/c/eager/c_api_experimental.h" +#include "tensorflow/c/eager/c_api_internal.h" +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/util/env_var.h" + +namespace tensorflow { + +REGISTER_OP("AscendDump") + .Input("inputs: Tin") + .Attr("tensor_names: list(string)") + .Attr("Tin: list(type)") + .SetIsStateful(); + +class AscendDump : public OpKernel { + public: + explicit AscendDump(OpKernelConstruction *ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("tensor_names", &tensor_names_)); + } + + void Compute(OpKernelContext *ctx) override { + OpInputList inputs; + OP_REQUIRES_OK(ctx, ctx->input_list("inputs", &inputs)); + for (int64 i = 0; i < inputs.size(); i++) { + std::cout << tensor_names_[i] << " " << inputs[i].DebugString(inputs[i].NumElements()) << std::endl; + } + } + + private: + std::vector tensor_names_; +}; + +REGISTER_KERNEL_BUILDER(Name("AscendDump").Device(DEVICE_CPU).Priority(999), AscendDump); + +} // namespace tensorflow \ No newline at end of file diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp new file mode 100644 index 000000000..8280084ba --- /dev/null +++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp @@ -0,0 +1,127 @@ +/* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/c_api.h" +#include "tensorflow/c/eager/c_api.h" +#include "tensorflow/c/eager/c_api_experimental.h" +#include "tensorflow/c/tf_status.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/platform/logging.h" + +#include "absl/algorithm/container.h" +#include "tensorflow/c/c_api_internal.h" +#include "tensorflow/c/eager/immediate_execution_operation.h" +#include "tensorflow/c/eager/tfe_context_internal.h" +#include "tensorflow/c/eager/tfe_op_internal.h" +#include "tensorflow/c/eager/tfe_tensorhandle_internal.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" + +namespace tensorflow { + +class DbgDumpPass : public GraphOptimizationPass { + public: + DbgDumpPass() = default; + ~DbgDumpPass() override = default; + Status Run(const GraphOptimizationPassOptions &options) override; + + private: + Status ProcessGraph(Graph *graph, FunctionLibraryDefinition *func_lib); +}; + +Status DbgDumpPass::Run(const GraphOptimizationPassOptions &options) { + if (options.graph == nullptr && options.partition_graphs == nullptr) { + return Status::OK(); + } + + FunctionLibraryDefinition *func_lib = options.flib_def; + Status status = Status::OK(); + if (options.graph != nullptr) { + std::unique_ptr *graph = options.graph; + TF_RETURN_IF_ERROR(ProcessGraph((*graph).get(), func_lib)); + } else if (options.partition_graphs != nullptr) { + for (auto &pg : *options.partition_graphs) { + TF_RETURN_IF_ERROR(ProcessGraph(pg.second.get(), func_lib)); + } + } + + return Status::OK(); +} + +Status DumpOutputs(Graph *graph, Node *node) { + if (node->num_outputs() == 0) { + return Status::OK(); + } + + std::vector tensor_names; + std::vector copyable_outputs; + std::unordered_set output_idxes; + std::unordered_set output_nodes; + for (int i = 0; i < node->num_outputs(); i++) { + if (DataTypeCanUseMemcpy(node->output_type(i))) { + tensor_names.emplace_back(node->name() + ":" + std::to_string(i)); + copyable_outputs.emplace_back(NodeBuilder::NodeOut(node, i)); + output_idxes.insert(i); + } + } + + if (copyable_outputs.empty()) { + return Status::OK(); + } + + for (auto edge : node->out_edges()) { + if (output_idxes.count(edge->src_output())) { + output_nodes.insert(edge->dst()); + } + } + + Node *dump_node = nullptr; + TF_RETURN_IF_ERROR(NodeBuilder(node->name() + "_dump_outputs", "AscendDump") + .Input(copyable_outputs) + .Attr("tensor_names", tensor_names) + .Finalize(graph, &dump_node)); + + for (auto n : output_nodes) { + graph->AddControlEdge(dump_node, n); + } + return Status::OK(); +} + +Status DbgDumpPass::ProcessGraph(Graph *graph, FunctionLibraryDefinition *func_lib) { + static std::atomic uuid{0}; + + if (graph == nullptr) { + return Status::OK(); + } + + std::string graph_key = std::to_string(uuid.fetch_add(1)) + "_" + std::to_string(graph->num_nodes()); + + WriteTextProto(Env::Default(), "Graph_" + graph_key + ".before.pbtxt", graph->ToGraphDefDebug()); + + int num_nodes = graph->num_node_ids(); + for (int i = 0; i < num_nodes; i++) { + Node *node = graph->FindNodeId(i); + if (node == nullptr || !node->IsOp()) { + continue; + } + DumpOutputs(graph, node); + } + + WriteTextProto(Env::Default(), "Graph_" + graph_key + ".after.pbtxt", graph->ToGraphDefDebug()); + + return Status::OK(); +} +// REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, DbgDumpPass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0, DbgDumpPass); +} // namespace tensorflow diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/MANIFEST.in b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/MANIFEST.in new file mode 100644 index 000000000..41b6083f2 --- /dev/null +++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/MANIFEST.in @@ -0,0 +1,2 @@ +recursive-include * *.py +recursive-include * _tfdbg_ascend.so \ No newline at end of file diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/setup.py b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/setup.py new file mode 100644 index 000000000..5bc2f030b --- /dev/null +++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup, Extension +from setuptools import find_packages + +setup(name='tfdbg_ascend', + version='0.1', + description='This is a demo package', + long_description='This is a demo package', + packages=find_packages(), + include_package_data=True, + ext_modules=[], + zip_safe=False) diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/__init__.py b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/__init__.py new file mode 100644 index 000000000..3b3173c1f --- /dev/null +++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/__init__.py @@ -0,0 +1,2 @@ +from tfdbg_ascend.tfdbg_ascend import enable +from tfdbg_ascend.tfdbg_ascend import disable diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py new file mode 100644 index 000000000..af235f8e5 --- /dev/null +++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py @@ -0,0 +1,22 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. +# Description: Common depends and micro defines for and only for data preprocess module + +import os +import absl.logging as logging +import tensorflow as tf +import tfdbg_ascend + +try: + __handle = tf.load_op_library(os.path.dirname(tfdbg_ascend.__file__) + "/_tfdbg_ascend.so") +except Exception as e: + logging.error(e) + + +def enable(): + logging.info("tensorflow debug dump provide by npu enabled") + pass + + +def disable(): + logging.info("tensorflow debug dump provide by npu disabled") + pass -- Gitee From cfefc651270a7c34d0053191d703d0bca3c722de Mon Sep 17 00:00:00 2001 From: medivh-x Date: Wed, 26 May 2021 20:06:00 +0800 Subject: [PATCH 2/3] support dump npy --- .../toolkits/tfdbg_ascend/dump_op.cpp | 91 ++++++++++++++++++- .../toolkits/tfdbg_ascend/dump_pass.cpp | 7 ++ 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp index 6cd6dee44..8c5a94757 100644 --- a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp +++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp @@ -16,6 +16,7 @@ limitations under the License. #include "absl/algorithm/container.h" #include "absl/memory/memory.h" +#include "absl/strings/str_replace.h" #include "tensorflow/c/c_api.h" #include "tensorflow/c/c_api_internal.h" #include "tensorflow/c/eager/c_api_experimental.h" @@ -26,11 +27,92 @@ limitations under the License. #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/util/env_var.h" +namespace { +const static char kSysEndian = []() { + int x = 1; + return (((char *)&x)[0]) ? '<' : '>'; +}(); + +char TensorDtype2Np(tensorflow::DataType dtype) { + if (tensorflow::DataTypeIsFloating(dtype)) { + return 'f'; + } else if (tensorflow::DataTypeIsSigned(dtype)) { + return 'i'; + } else if (tensorflow::DataTypeIsUnsigned(dtype)) { + return 'u'; + } else if (tensorflow::DataTypeIsComplex(dtype)) { + return 'c'; + } else { + return 'b'; + } +} + +std::string TensorShape2Npy(tensorflow::TensorShape shape) { + auto num_dims = shape.dims(); + if (num_dims == 0) { + return "()"; + } else if (num_dims == 1) { + return "(" + std::to_string(shape.dim_size(0)) + ",)"; + } + std::string shape_string = "("; + for (int i = 0; i < num_dims - 1; i++) { + shape_string += std::to_string(shape.dim_size(0)); + shape_string += ", "; + } + return shape_string + std::to_string(shape.dim_size(num_dims - 1)) + ")"; +} + +std::string AssembleNpyHeader(tensorflow::Tensor tensor) { + std::string dict; + dict += "{'descr': '"; + dict += kSysEndian; + dict += TensorDtype2Np(tensor.dtype()); + dict += std::to_string(tensorflow::DataTypeSize(tensor.dtype())); + dict += "', 'fortran_order': False, 'shape': "; + dict += TensorShape2Npy(tensor.shape()); + dict += ", }"; + // pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n + int remainder = 16 - (10 + dict.size()) % 16; + dict.insert(dict.end(), remainder, ' '); + dict.back() = '\n'; + + std::string header; + header += (char)0x93; + header += "NUMPY"; + header += (char)0x01; // major version of numpy format + header += (char)0x00; // minor version of numpy format + auto size = (uint16_t)dict.size(); + char *size_bits = (char *)(&size); + header += *size_bits; + header += *(size_bits + 1); + header.insert(header.end(), dict.begin(), dict.end()); + + return header; +} + +void WriteTensor2Npy(tensorflow::Tensor tensor, std::string fname) { + FILE *fp = NULL; + auto shape = tensor.shape().dim_sizes(); + int64_t num_elements = tensor.NumElements(); + + fp = fopen(fname.c_str(), "wb"); + + std::string header = AssembleNpyHeader(tensor); + + fseek(fp, 0, SEEK_SET); + fwrite(&header[0], sizeof(char), header.size(), fp); + fseek(fp, 0, SEEK_END); + fwrite(tensor.tensor_data().data(), tensorflow::DataTypeSize(tensor.dtype()), num_elements, fp); + fclose(fp); +} +} // namespace + namespace tensorflow { REGISTER_OP("AscendDump") .Input("inputs: Tin") .Attr("tensor_names: list(string)") + .Attr("op_type: string") .Attr("Tin: list(type)") .SetIsStateful(); @@ -38,18 +120,25 @@ class AscendDump : public OpKernel { public: explicit AscendDump(OpKernelConstruction *ctx) : OpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("tensor_names", &tensor_names_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("op_type", &op_type_)); } void Compute(OpKernelContext *ctx) override { OpInputList inputs; OP_REQUIRES_OK(ctx, ctx->input_list("inputs", &inputs)); + std::string nanos_uuid = std::to_string(Env::Default()->NowMicros()); for (int64 i = 0; i < inputs.size(); i++) { - std::cout << tensor_names_[i] << " " << inputs[i].DebugString(inputs[i].NumElements()) << std::endl; + std::string tensor_name = absl::StrReplaceAll(tensor_names_[i], {{"/", "."}, {":", "."}}); + std::string file_name = absl::StrCat(tensor_name, ".", op_type_, ".", nanos_uuid, ".npy"); + VLOG(1) << "Dump " << tensor_names_[i] << " to " << file_name; + WriteTensor2Npy(inputs[i], file_name); + VLOG(1) << tensor_names_[i] << " " << inputs[i].DebugString() << std::endl; } } private: std::vector tensor_names_; + std::string op_type_; }; REGISTER_KERNEL_BUILDER(Name("AscendDump").Device(DEVICE_CPU).Priority(999), AscendDump); diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp index 8280084ba..0ca370936 100644 --- a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp +++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp @@ -90,6 +90,7 @@ Status DumpOutputs(Graph *graph, Node *node) { TF_RETURN_IF_ERROR(NodeBuilder(node->name() + "_dump_outputs", "AscendDump") .Input(copyable_outputs) .Attr("tensor_names", tensor_names) + .Attr("op_type", node->type_string()) .Finalize(graph, &dump_node)); for (auto n : output_nodes) { @@ -105,6 +106,12 @@ Status DbgDumpPass::ProcessGraph(Graph *graph, FunctionLibraryDefinition *func_l return Status::OK(); } + for (auto node : graph->op_nodes()) { + if (node->type_string() == "AscendDump") { + return Status::OK(); + } + } + std::string graph_key = std::to_string(uuid.fetch_add(1)) + "_" + std::to_string(graph->num_nodes()); WriteTextProto(Env::Default(), "Graph_" + graph_key + ".before.pbtxt", graph->ToGraphDefDebug()); -- Gitee From 7ef68e0501b96fad833dca780e37e77ec4dd8324 Mon Sep 17 00:00:00 2001 From: medivh-x Date: Fri, 28 May 2021 19:25:41 +0800 Subject: [PATCH 3/3] inlining nest tf function in dump mode --- build.sh | 2 +- tf_adapter_2.x/CI_Build | 4 +-- tf_adapter_2.x/README.md | 4 +-- .../python/tfdbg_ascend/tfdbg_ascend.py | 35 +++++++++++++++++++ 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/build.sh b/build.sh index 72e4536a9..bdab4aaa7 100644 --- a/build.sh +++ b/build.sh @@ -89,7 +89,7 @@ release_tfadapter() { logging "Create output directory" mk_dir "${RELEASE_PATH}" RELEASE_TARGET="tfadapter.tar" - cd ${CMAKE_PATH}/dist/python/dist && mkdir -p tfplugin/bin && cp -r "${BASE_PATH}/script" tfplugin/ && mv npu_bridge-*.whl tfplugin/bin && mv "${BASE_PATH}/tf_adapter_2.x/build/dist/python/dist/npu_device-0.1-py3-none-any.whl" tfplugin/bin && tar cfz "${RELEASE_TARGET}" * && mv "${RELEASE_TARGET}" "${RELEASE_PATH}" + cd ${CMAKE_PATH}/dist/python/dist && mkdir -p tfplugin/bin && cp -r "${BASE_PATH}/script" tfplugin/ && mv npu_bridge-*.whl tfplugin/bin && mv "${BASE_PATH}/tf_adapter_2.x/build/dist/ascend_adapter2/dist/npu_device-0.1-py3-none-any.whl" tfplugin/bin && tar cfz "${RELEASE_TARGET}" * && mv "${RELEASE_TARGET}" "${RELEASE_PATH}" } main() { diff --git a/tf_adapter_2.x/CI_Build b/tf_adapter_2.x/CI_Build index 7dc668c3a..d09664044 100644 --- a/tf_adapter_2.x/CI_Build +++ b/tf_adapter_2.x/CI_Build @@ -7,8 +7,8 @@ CONFIGURE_DIR=$(dirname "$0") cd "${CONFIGURE_DIR}" if [ "$(arch)" != "x86_64" ];then - mkdir -p build/dist/python/dist/ - touch build/dist/python/dist/npu_device-0.1-py3-none-any.whl + mkdir -p build/dist/ascend_adapter2/dist/ + touch build/dist/ascend_adapter2/dist/npu_device-0.1-py3-none-any.whl exit 0 fi diff --git a/tf_adapter_2.x/README.md b/tf_adapter_2.x/README.md index 8e96832f0..b80057306 100644 --- a/tf_adapter_2.x/README.md +++ b/tf_adapter_2.x/README.md @@ -77,7 +77,7 @@ make -j8 编译结束后,安装包会生成在 ``` -./dist/python/dist/npu_device-0.1-py3-none-any.whl +./dist/ascend_adapter2/dist/npu_device-0.1-py3-none-any.whl ``` #### 安装 @@ -91,7 +91,7 @@ make install 将Ascend Adapter安装到配置时指定的 python 解释器包目录下,或者使用 pip3 安装 Ascend Adapter 到您期望的位置。 ``` -pip3 install ./dist/python/dist/npu_device-0.1-py3-none-any.whl --upgrade +pip3 install ./dist/ascend_adapter2/dist/npu_device-0.1-py3-none-any.whl --upgrade ``` ## 贡献 diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py index af235f8e5..7a22ed05b 100644 --- a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py +++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py @@ -5,12 +5,47 @@ import os import absl.logging as logging import tensorflow as tf import tfdbg_ascend +import threading try: __handle = tf.load_op_library(os.path.dirname(tfdbg_ascend.__file__) + "/_tfdbg_ascend.so") except Exception as e: logging.error(e) +_hacked_tensorflow_function = tf.function +_thread_local = threading.local() + + +def never_nested_function(func=None, *args, **kwargs): + def never_nested_decorator(f): + if kwargs.get('experimental_compile'): + logging.info("Skip xla compile tf function %s in debug dump mode", f.__name__) + kwargs['experimental_compile'] = False + tf_decorated_func = _hacked_tensorflow_function(*args, **kwargs)(f) + + def wrapper(*func_args, **func_kwargs): + if not hasattr(_thread_local, "entrance_function"): + _thread_local.entrance_function = None + if _thread_local.entrance_function is not None: + logging.info("Inlining nested tf function %s under %s in debug dump mode", f.__name__, + _thread_local.entrance_function) + return f(*func_args, **func_kwargs) + _thread_local.entrance_function = f.__name__ + result = tf_decorated_func(*func_args, **func_kwargs) + _thread_local.entrance_function = None + return result + + wrapper.__name__ = f.__name__ # We should never change origin function name in decorator + return wrapper + + if func is not None: + return never_nested_decorator(func) + else: + return never_nested_decorator + + +tf.function = never_nested_function + def enable(): logging.info("tensorflow debug dump provide by npu enabled") -- Gitee