From fd44e5c0810eb0bba52a91cb65e113c792226ca6 Mon Sep 17 00:00:00 2001
From: medivh-x <xp15250981628@outlook.com>
Date: Tue, 25 May 2021 21:48:21 +0800
Subject: [PATCH 1/3] add tensorflow debug dump tools for gpu

---
 tf_adapter_2.x/CMakeLists.txt                 |  11 +-
 .../toolkits/tfdbg_ascend/CMakeLists.txt      |  22 +++
 .../toolkits/tfdbg_ascend/dump_op.cpp         |  57 ++++++++
 .../toolkits/tfdbg_ascend/dump_pass.cpp       | 127 ++++++++++++++++++
 .../toolkits/tfdbg_ascend/python/MANIFEST.in  |   2 +
 .../toolkits/tfdbg_ascend/python/setup.py     |  11 ++
 .../python/tfdbg_ascend/__init__.py           |   2 +
 .../python/tfdbg_ascend/tfdbg_ascend.py       |  22 +++
 8 files changed, 250 insertions(+), 4 deletions(-)
 create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/CMakeLists.txt
 create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp
 create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp
 create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/python/MANIFEST.in
 create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/python/setup.py
 create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/__init__.py
 create mode 100644 tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py

diff --git a/tf_adapter_2.x/CMakeLists.txt b/tf_adapter_2.x/CMakeLists.txt
index 985e56b85..aed583999 100644
--- a/tf_adapter_2.x/CMakeLists.txt
+++ b/tf_adapter_2.x/CMakeLists.txt
@@ -13,7 +13,7 @@ if (DEFINED ASCEND_CI_BUILD_DIR)
     set(CMAKE_C_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 ${CMAKE_C_FLAGS}")
     set(CMAKE_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 ${CMAKE_CXX_FLAGS}")
     include_directories(${PYTHON_INCLUDE_DIR})
-else()
+else ()
     if (NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/COMPILE_FLAGS OR NOT EXISTS
             ${CMAKE_CURRENT_LIST_DIR}/tools/TF_INSTALLED_PATH OR NOT EXISTS
             ${CMAKE_CURRENT_LIST_DIR}/tools/ASCEND_INSTALLED_PATH OR NOT EXISTS
@@ -36,14 +36,17 @@ include(${CMAKE_CURRENT_LIST_DIR}/cmake/acl/module.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/cmake/tensorflow/module.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/cmake/graph_engine/module.cmake)
 
-file(COPY ${CMAKE_CURRENT_LIST_DIR}/python DESTINATION ${CMAKE_BINARY_DIR}/dist)
-set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/dist/python/npu_device)
+set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/dist/libs)
 
 add_subdirectory(npu_device)
 add_subdirectory(npu_ops)
+add_subdirectory(toolkits/tfdbg_ascend)
 
 add_custom_target(ascend_adapter2 ALL
-        COMMAND cd ${CMAKE_BINARY_DIR}/dist/python/ && ${PYTHON_BIN_PATH} setup.py bdist_wheel
+        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_LIST_DIR}/python ${CMAKE_BINARY_DIR}/dist/ascend_adapter2
+        COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/dist/libs/_npu_device_backends.so ${CMAKE_BINARY_DIR}/dist/ascend_adapter2/npu_device
+        COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/dist/libs/_npu_ops.so ${CMAKE_BINARY_DIR}/dist/ascend_adapter2/npu_device
+        COMMAND cd ${CMAKE_BINARY_DIR}/dist/ascend_adapter2 && ${PYTHON_BIN_PATH} setup.py bdist_wheel
         DEPENDS _npu_ops _npu_device_backends
         VERBATIM)
 
diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/CMakeLists.txt b/tf_adapter_2.x/toolkits/tfdbg_ascend/CMakeLists.txt
new file mode 100644
index 000000000..53af073ea
--- /dev/null
+++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/CMakeLists.txt
@@ -0,0 +1,22 @@
+cmake_minimum_required(VERSION 3.14)
+project(TfdbgAscend)
+
+file(GLOB_RECURSE SOURCES ${CMAKE_CURRENT_LIST_DIR}/*.cpp)
+foreach (CPP_SOURCE ${SOURCES})
+    file(RELATIVE_PATH RELATIVE_CPP_SOURCE ${CMAKE_CURRENT_SOURCE_DIR} ${CPP_SOURCE})
+    set_property(SOURCE ${CPP_SOURCE} PROPERTY COMPILE_DEFINITIONS __FILE__=\"${RELATIVE_CPP_SOURCE}\")
+endforeach (CPP_SOURCE)
+
+add_library(_tfdbg_ascend SHARED EXCLUDE_FROM_ALL ${SOURCES})
+set_target_properties(_tfdbg_ascend PROPERTIES PREFIX "")
+
+target_link_libraries(_tfdbg_ascend PRIVATE
+        tensorflow_libs)
+
+add_custom_target(tfdbg_ascend
+        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_LIST_DIR}/python ${CMAKE_BINARY_DIR}/dist/tfdbg_ascend
+        COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/dist/libs/_tfdbg_ascend.so ${CMAKE_BINARY_DIR}/dist/tfdbg_ascend/tfdbg_ascend
+        COMMAND cd ${CMAKE_BINARY_DIR}/dist/tfdbg_ascend && ${PYTHON_BIN_PATH} setup.py bdist_wheel
+        DEPENDS _tfdbg_ascend
+        VERBATIM
+        )
diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp
new file mode 100644
index 000000000..6cd6dee44
--- /dev/null
+++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp
@@ -0,0 +1,57 @@
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <limits>
+
+#include "absl/algorithm/container.h"
+#include "absl/memory/memory.h"
+#include "tensorflow/c/c_api.h"
+#include "tensorflow/c/c_api_internal.h"
+#include "tensorflow/c/eager/c_api_experimental.h"
+#include "tensorflow/c/eager/c_api_internal.h"
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/util/env_var.h"
+
+namespace tensorflow {
+
+REGISTER_OP("AscendDump")
+  .Input("inputs: Tin")
+  .Attr("tensor_names: list(string)")
+  .Attr("Tin: list(type)")
+  .SetIsStateful();
+
+class AscendDump : public OpKernel {
+ public:
+  explicit AscendDump(OpKernelConstruction *ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("tensor_names", &tensor_names_));
+  }
+
+  void Compute(OpKernelContext *ctx) override {
+    OpInputList inputs;
+    OP_REQUIRES_OK(ctx, ctx->input_list("inputs", &inputs));
+    for (int64 i = 0; i < inputs.size(); i++) {
+      std::cout << tensor_names_[i] << " " << inputs[i].DebugString(inputs[i].NumElements()) << std::endl;
+    }
+  }
+
+ private:
+  std::vector<std::string> tensor_names_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("AscendDump").Device(DEVICE_CPU).Priority(999), AscendDump);
+
+}  // namespace tensorflow
\ No newline at end of file
diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp
new file mode 100644
index 000000000..8280084ba
--- /dev/null
+++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp
@@ -0,0 +1,127 @@
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/c/c_api.h"
+#include "tensorflow/c/eager/c_api.h"
+#include "tensorflow/c/eager/c_api_experimental.h"
+#include "tensorflow/c/tf_status.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
+#include "tensorflow/core/platform/logging.h"
+
+#include "absl/algorithm/container.h"
+#include "tensorflow/c/c_api_internal.h"
+#include "tensorflow/c/eager/immediate_execution_operation.h"
+#include "tensorflow/c/eager/tfe_context_internal.h"
+#include "tensorflow/c/eager/tfe_op_internal.h"
+#include "tensorflow/c/eager/tfe_tensorhandle_internal.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/common_runtime/optimization_registry.h"
+
+namespace tensorflow {
+
+class DbgDumpPass : public GraphOptimizationPass {
+ public:
+  DbgDumpPass() = default;
+  ~DbgDumpPass() override = default;
+  Status Run(const GraphOptimizationPassOptions &options) override;
+
+ private:
+  Status ProcessGraph(Graph *graph, FunctionLibraryDefinition *func_lib);
+};
+
+Status DbgDumpPass::Run(const GraphOptimizationPassOptions &options) {
+  if (options.graph == nullptr && options.partition_graphs == nullptr) {
+    return Status::OK();
+  }
+
+  FunctionLibraryDefinition *func_lib = options.flib_def;
+  Status status = Status::OK();
+  if (options.graph != nullptr) {
+    std::unique_ptr<Graph> *graph = options.graph;
+    TF_RETURN_IF_ERROR(ProcessGraph((*graph).get(), func_lib));
+  } else if (options.partition_graphs != nullptr) {
+    for (auto &pg : *options.partition_graphs) {
+      TF_RETURN_IF_ERROR(ProcessGraph(pg.second.get(), func_lib));
+    }
+  }
+
+  return Status::OK();
+}
+
+Status DumpOutputs(Graph *graph, Node *node) {
+  if (node->num_outputs() == 0) {
+    return Status::OK();
+  }
+
+  std::vector<std::string> tensor_names;
+  std::vector<NodeBuilder::NodeOut> copyable_outputs;
+  std::unordered_set<int> output_idxes;
+  std::unordered_set<Node *> output_nodes;
+  for (int i = 0; i < node->num_outputs(); i++) {
+    if (DataTypeCanUseMemcpy(node->output_type(i))) {
+      tensor_names.emplace_back(node->name() + ":" + std::to_string(i));
+      copyable_outputs.emplace_back(NodeBuilder::NodeOut(node, i));
+      output_idxes.insert(i);
+    }
+  }
+
+  if (copyable_outputs.empty()) {
+    return Status::OK();
+  }
+
+  for (auto edge : node->out_edges()) {
+    if (output_idxes.count(edge->src_output())) {
+      output_nodes.insert(edge->dst());
+    }
+  }
+
+  Node *dump_node = nullptr;
+  TF_RETURN_IF_ERROR(NodeBuilder(node->name() + "_dump_outputs", "AscendDump")
+                       .Input(copyable_outputs)
+                       .Attr("tensor_names", tensor_names)
+                       .Finalize(graph, &dump_node));
+
+  for (auto n : output_nodes) {
+    graph->AddControlEdge(dump_node, n);
+  }
+  return Status::OK();
+}
+
+Status DbgDumpPass::ProcessGraph(Graph *graph, FunctionLibraryDefinition *func_lib) {
+  static std::atomic<int64_t> uuid{0};
+
+  if (graph == nullptr) {
+    return Status::OK();
+  }
+
+  std::string graph_key = std::to_string(uuid.fetch_add(1)) + "_" + std::to_string(graph->num_nodes());
+
+  WriteTextProto(Env::Default(), "Graph_" + graph_key + ".before.pbtxt", graph->ToGraphDefDebug());
+
+  int num_nodes = graph->num_node_ids();
+  for (int i = 0; i < num_nodes; i++) {
+    Node *node = graph->FindNodeId(i);
+    if (node == nullptr || !node->IsOp()) {
+      continue;
+    }
+    DumpOutputs(graph, node);
+  }
+
+  WriteTextProto(Env::Default(), "Graph_" + graph_key + ".after.pbtxt", graph->ToGraphDefDebug());
+
+  return Status::OK();
+}
+// REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, DbgDumpPass);
+REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0, DbgDumpPass);
+}  // namespace tensorflow
diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/MANIFEST.in b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/MANIFEST.in
new file mode 100644
index 000000000..41b6083f2
--- /dev/null
+++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/MANIFEST.in
@@ -0,0 +1,2 @@
+recursive-include * *.py
+recursive-include * _tfdbg_ascend.so
\ No newline at end of file
diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/setup.py b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/setup.py
new file mode 100644
index 000000000..5bc2f030b
--- /dev/null
+++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/setup.py
@@ -0,0 +1,11 @@
+from setuptools import setup, Extension
+from setuptools import find_packages
+
+setup(name='tfdbg_ascend',
+      version='0.1',
+      description='This is a demo package',
+      long_description='This is a demo package',
+      packages=find_packages(),
+      include_package_data=True,
+      ext_modules=[],
+      zip_safe=False)
diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/__init__.py b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/__init__.py
new file mode 100644
index 000000000..3b3173c1f
--- /dev/null
+++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/__init__.py
@@ -0,0 +1,2 @@
+from tfdbg_ascend.tfdbg_ascend import enable
+from tfdbg_ascend.tfdbg_ascend import disable
diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py
new file mode 100644
index 000000000..af235f8e5
--- /dev/null
+++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py
@@ -0,0 +1,22 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+# Description: Common depends and micro defines for and only for data preprocess module
+
+import os
+import absl.logging as logging
+import tensorflow as tf
+import tfdbg_ascend
+
+try:
+    __handle = tf.load_op_library(os.path.dirname(tfdbg_ascend.__file__) + "/_tfdbg_ascend.so")
+except Exception as e:
+    logging.error(e)
+
+
+def enable():
+    logging.info("tensorflow debug dump provide by npu enabled")
+    pass
+
+
+def disable():
+    logging.info("tensorflow debug dump provide by npu disabled")
+    pass
-- 
Gitee


From cfefc651270a7c34d0053191d703d0bca3c722de Mon Sep 17 00:00:00 2001
From: medivh-x <xp15250981628@outlook.com>
Date: Wed, 26 May 2021 20:06:00 +0800
Subject: [PATCH 2/3] support dump npy

---
 .../toolkits/tfdbg_ascend/dump_op.cpp         | 91 ++++++++++++++++++-
 .../toolkits/tfdbg_ascend/dump_pass.cpp       |  7 ++
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp
index 6cd6dee44..8c5a94757 100644
--- a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp
+++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_op.cpp
@@ -16,6 +16,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/memory/memory.h"
+#include "absl/strings/str_replace.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/c/eager/c_api_experimental.h"
@@ -26,11 +27,92 @@ limitations under the License.
 #include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/util/env_var.h"
 
+namespace {
+const static char kSysEndian = []() {
+  int x = 1;
+  return (((char *)&x)[0]) ? '<' : '>';
+}();
+
+char TensorDtype2Np(tensorflow::DataType dtype) {
+  if (tensorflow::DataTypeIsFloating(dtype)) {
+    return 'f';
+  } else if (tensorflow::DataTypeIsSigned(dtype)) {
+    return 'i';
+  } else if (tensorflow::DataTypeIsUnsigned(dtype)) {
+    return 'u';
+  } else if (tensorflow::DataTypeIsComplex(dtype)) {
+    return 'c';
+  } else {
+    return 'b';
+  }
+}
+
+std::string TensorShape2Npy(tensorflow::TensorShape shape) {
+  auto num_dims = shape.dims();
+  if (num_dims == 0) {
+    return "()";
+  } else if (num_dims == 1) {
+    return "(" + std::to_string(shape.dim_size(0)) + ",)";
+  }
+  std::string shape_string = "(";
+  for (int i = 0; i < num_dims - 1; i++) {
+    shape_string += std::to_string(shape.dim_size(0));
+    shape_string += ", ";
+  }
+  return shape_string + std::to_string(shape.dim_size(num_dims - 1)) + ")";
+}
+
+std::string AssembleNpyHeader(tensorflow::Tensor tensor) {
+  std::string dict;
+  dict += "{'descr': '";
+  dict += kSysEndian;
+  dict += TensorDtype2Np(tensor.dtype());
+  dict += std::to_string(tensorflow::DataTypeSize(tensor.dtype()));
+  dict += "', 'fortran_order': False, 'shape': ";
+  dict += TensorShape2Npy(tensor.shape());
+  dict += ", }";
+  // pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
+  int remainder = 16 - (10 + dict.size()) % 16;
+  dict.insert(dict.end(), remainder, ' ');
+  dict.back() = '\n';
+
+  std::string header;
+  header += (char)0x93;
+  header += "NUMPY";
+  header += (char)0x01;  // major version of numpy format
+  header += (char)0x00;  // minor version of numpy format
+  auto size = (uint16_t)dict.size();
+  char *size_bits = (char *)(&size);
+  header += *size_bits;
+  header += *(size_bits + 1);
+  header.insert(header.end(), dict.begin(), dict.end());
+
+  return header;
+}
+
+void WriteTensor2Npy(tensorflow::Tensor tensor, std::string fname) {
+  FILE *fp = NULL;
+  auto shape = tensor.shape().dim_sizes();
+  int64_t num_elements = tensor.NumElements();
+
+  fp = fopen(fname.c_str(), "wb");
+
+  std::string header = AssembleNpyHeader(tensor);
+
+  fseek(fp, 0, SEEK_SET);
+  fwrite(&header[0], sizeof(char), header.size(), fp);
+  fseek(fp, 0, SEEK_END);
+  fwrite(tensor.tensor_data().data(), tensorflow::DataTypeSize(tensor.dtype()), num_elements, fp);
+  fclose(fp);
+}
+}  // namespace
+
 namespace tensorflow {
 
 REGISTER_OP("AscendDump")
   .Input("inputs: Tin")
   .Attr("tensor_names: list(string)")
+  .Attr("op_type: string")
   .Attr("Tin: list(type)")
   .SetIsStateful();
 
@@ -38,18 +120,25 @@ class AscendDump : public OpKernel {
  public:
   explicit AscendDump(OpKernelConstruction *ctx) : OpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("tensor_names", &tensor_names_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("op_type", &op_type_));
   }
 
   void Compute(OpKernelContext *ctx) override {
     OpInputList inputs;
     OP_REQUIRES_OK(ctx, ctx->input_list("inputs", &inputs));
+    std::string nanos_uuid = std::to_string(Env::Default()->NowMicros());
     for (int64 i = 0; i < inputs.size(); i++) {
-      std::cout << tensor_names_[i] << " " << inputs[i].DebugString(inputs[i].NumElements()) << std::endl;
+      std::string tensor_name = absl::StrReplaceAll(tensor_names_[i], {{"/", "."}, {":", "."}});
+      std::string file_name = absl::StrCat(tensor_name, ".", op_type_, ".", nanos_uuid, ".npy");
+      VLOG(1) << "Dump " << tensor_names_[i] << " to " << file_name;
+      WriteTensor2Npy(inputs[i], file_name);
+      VLOG(1) << tensor_names_[i] << " " << inputs[i].DebugString() << std::endl;
     }
   }
 
  private:
   std::vector<std::string> tensor_names_;
+  std::string op_type_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("AscendDump").Device(DEVICE_CPU).Priority(999), AscendDump);
diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp
index 8280084ba..0ca370936 100644
--- a/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp
+++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/dump_pass.cpp
@@ -90,6 +90,7 @@ Status DumpOutputs(Graph *graph, Node *node) {
   TF_RETURN_IF_ERROR(NodeBuilder(node->name() + "_dump_outputs", "AscendDump")
                        .Input(copyable_outputs)
                        .Attr("tensor_names", tensor_names)
+                       .Attr("op_type", node->type_string())
                        .Finalize(graph, &dump_node));
 
   for (auto n : output_nodes) {
@@ -105,6 +106,12 @@ Status DbgDumpPass::ProcessGraph(Graph *graph, FunctionLibraryDefinition *func_l
     return Status::OK();
   }
 
+  for (auto node : graph->op_nodes()) {
+    if (node->type_string() == "AscendDump") {
+      return Status::OK();
+    }
+  }
+
   std::string graph_key = std::to_string(uuid.fetch_add(1)) + "_" + std::to_string(graph->num_nodes());
 
   WriteTextProto(Env::Default(), "Graph_" + graph_key + ".before.pbtxt", graph->ToGraphDefDebug());
-- 
Gitee


From 7ef68e0501b96fad833dca780e37e77ec4dd8324 Mon Sep 17 00:00:00 2001
From: medivh-x <xp15250981628@outlook.com>
Date: Fri, 28 May 2021 19:25:41 +0800
Subject: [PATCH 3/3] inlining nest tf function in dump mode

---
 build.sh                                      |  2 +-
 tf_adapter_2.x/CI_Build                       |  4 +--
 tf_adapter_2.x/README.md                      |  4 +--
 .../python/tfdbg_ascend/tfdbg_ascend.py       | 35 +++++++++++++++++++
 4 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/build.sh b/build.sh
index 72e4536a9..bdab4aaa7 100644
--- a/build.sh
+++ b/build.sh
@@ -89,7 +89,7 @@ release_tfadapter() {
   logging "Create output directory"
   mk_dir "${RELEASE_PATH}"
   RELEASE_TARGET="tfadapter.tar"
-  cd ${CMAKE_PATH}/dist/python/dist && mkdir -p tfplugin/bin && cp -r "${BASE_PATH}/script" tfplugin/ && mv npu_bridge-*.whl tfplugin/bin && mv "${BASE_PATH}/tf_adapter_2.x/build/dist/python/dist/npu_device-0.1-py3-none-any.whl" tfplugin/bin && tar cfz "${RELEASE_TARGET}" * && mv "${RELEASE_TARGET}" "${RELEASE_PATH}"
+  cd ${CMAKE_PATH}/dist/python/dist && mkdir -p tfplugin/bin && cp -r "${BASE_PATH}/script" tfplugin/ && mv npu_bridge-*.whl tfplugin/bin && mv "${BASE_PATH}/tf_adapter_2.x/build/dist/ascend_adapter2/dist/npu_device-0.1-py3-none-any.whl" tfplugin/bin && tar cfz "${RELEASE_TARGET}" * && mv "${RELEASE_TARGET}" "${RELEASE_PATH}"
 }
 
 main() {
diff --git a/tf_adapter_2.x/CI_Build b/tf_adapter_2.x/CI_Build
index 7dc668c3a..d09664044 100644
--- a/tf_adapter_2.x/CI_Build
+++ b/tf_adapter_2.x/CI_Build
@@ -7,8 +7,8 @@ CONFIGURE_DIR=$(dirname "$0")
 cd "${CONFIGURE_DIR}"
 
 if [ "$(arch)" != "x86_64" ];then
-  mkdir -p build/dist/python/dist/
-  touch build/dist/python/dist/npu_device-0.1-py3-none-any.whl
+  mkdir -p build/dist/ascend_adapter2/dist/
+  touch build/dist/ascend_adapter2/dist/npu_device-0.1-py3-none-any.whl
   exit 0
 fi
 
diff --git a/tf_adapter_2.x/README.md b/tf_adapter_2.x/README.md
index 8e96832f0..b80057306 100644
--- a/tf_adapter_2.x/README.md
+++ b/tf_adapter_2.x/README.md
@@ -77,7 +77,7 @@ make -j8
 编译结束后，安装包会生成在
 
 ```
-./dist/python/dist/npu_device-0.1-py3-none-any.whl
+./dist/ascend_adapter2/dist/npu_device-0.1-py3-none-any.whl
 ```
 
 #### 安装
@@ -91,7 +91,7 @@ make install
 将Ascend Adapter安装到配置时指定的 python 解释器包目录下，或者使用 pip3 安装 Ascend Adapter 到您期望的位置。
 
 ```
-pip3 install ./dist/python/dist/npu_device-0.1-py3-none-any.whl --upgrade
+pip3 install ./dist/ascend_adapter2/dist/npu_device-0.1-py3-none-any.whl --upgrade
 ```
 
 ## 贡献
diff --git a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py
index af235f8e5..7a22ed05b 100644
--- a/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py
+++ b/tf_adapter_2.x/toolkits/tfdbg_ascend/python/tfdbg_ascend/tfdbg_ascend.py
@@ -5,12 +5,47 @@ import os
 import absl.logging as logging
 import tensorflow as tf
 import tfdbg_ascend
+import threading
 
 try:
     __handle = tf.load_op_library(os.path.dirname(tfdbg_ascend.__file__) + "/_tfdbg_ascend.so")
 except Exception as e:
     logging.error(e)
 
+_hacked_tensorflow_function = tf.function
+_thread_local = threading.local()
+
+
+def never_nested_function(func=None, *args, **kwargs):
+    def never_nested_decorator(f):
+        if kwargs.get('experimental_compile'):
+            logging.info("Skip xla compile tf function %s in debug dump mode", f.__name__)
+        kwargs['experimental_compile'] = False
+        tf_decorated_func = _hacked_tensorflow_function(*args, **kwargs)(f)
+
+        def wrapper(*func_args, **func_kwargs):
+            if not hasattr(_thread_local, "entrance_function"):
+                _thread_local.entrance_function = None
+            if _thread_local.entrance_function is not None:
+                logging.info("Inlining nested tf function %s under %s in debug dump mode", f.__name__,
+                             _thread_local.entrance_function)
+                return f(*func_args, **func_kwargs)
+            _thread_local.entrance_function = f.__name__
+            result = tf_decorated_func(*func_args, **func_kwargs)
+            _thread_local.entrance_function = None
+            return result
+
+        wrapper.__name__ = f.__name__  # We should never change origin function name in decorator
+        return wrapper
+
+    if func is not None:
+        return never_nested_decorator(func)
+    else:
+        return never_nested_decorator
+
+
+tf.function = never_nested_function
+
 
 def enable():
     logging.info("tensorflow debug dump provide by npu enabled")
-- 
Gitee