From 80784a65df9734be295ff81f886d9d0b9c84f779 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:41:13 +0000
Subject: [PATCH 01/38] add
 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/set_conda.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../DeepCTR_Series_for_TensorFlow/test/set_conda.sh             | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/set_conda.sh

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/set_conda.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/set_conda.sh
new file mode 100644
index 000000000..febb0fa34
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/set_conda.sh
@@ -0,0 +1,2 @@
+export PATH=/home/anaconda3/bin:$PATH
+export LD_LIBRARY_PATH=/home/anaconda3/lib:$LD_LIBRARY_PATH
\ No newline at end of file
-- 
Gitee


From 50abd2b337537d9efcc282fc4c69cae3aec82737 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:41:57 +0000
Subject: [PATCH 02/38] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20precision=5Ftool?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../DeepCTR_Series_for_TensorFlow/examples/precision_tool/.keep   | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/.keep

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/.keep b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/.keep
new file mode 100644
index 000000000..e69de29bb
-- 
Gitee


From 341cf40c5858a70bb230cb1243e23a6d4744bed3 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:42:21 +0000
Subject: [PATCH 03/38] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20lib?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../examples/precision_tool/lib/.keep                             | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/.keep

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/.keep b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/.keep
new file mode 100644
index 000000000..e69de29bb
-- 
Gitee


From 118c35275bbde82f554014655d197e02056da7a8 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:42:49 +0000
Subject: [PATCH 04/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/adapter/fusion.py      | 112 ++++++++++
 .../lib/adapter/msquickcmp_adapter.py         |  87 ++++++++
 .../lib/adapter/offline_om_adapter.py         |  23 ++
 .../precision_tool/lib/adapter/overflow.py    | 194 +++++++++++++++++
 .../precision_tool/lib/adapter/tf_adapter.py  | 200 ++++++++++++++++++
 5 files changed, 616 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/fusion.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/msquickcmp_adapter.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/offline_om_adapter.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/overflow.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/tf_adapter.py

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/fusion.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/fusion.py
new file mode 100644
index 000000000..b440b1055
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/fusion.py
@@ -0,0 +1,112 @@
+# coding=utf-8
+import json
+import os
+import shutil
+from ..util.util import util
+from ..config import config as cfg
+
+
+FUSION_RESULT_FILE_NAME = 'fusion_result.json'
+EFFECT_TIMES_KEY = 'effect_times'
+GRAPH_FUSION_KEY = 'graph_fusion'
+UB_FUSION_KEY = 'ub_fusion'
+GRAPH_ID_KEYS = ['graphId', 'session_and_graph_id']
+
+
+class FusionResult(object):
+    def __init__(self, fusion_json):
+        self.fusion_json = fusion_json
+
+    def get_effect_graph_fusion(self):
+        """Get effect graph fusion rule"""
+        if GRAPH_FUSION_KEY in self.fusion_json:
+            return self._get_effect_fusion(self.fusion_json[GRAPH_FUSION_KEY])
+        return {}
+
+    def get_effect_ub_fusion(self):
+        """Get effect UB fusion rule"""
+        if UB_FUSION_KEY in self.fusion_json:
+            return self._get_effect_fusion(self.fusion_json[UB_FUSION_KEY])
+        return {}
+
+    def graph_id(self):
+        """Get graph id"""
+        for key in GRAPH_ID_KEYS:
+            if key in self.fusion_json:
+                return self.fusion_json[key]
+        return "NONE"
+
+    @staticmethod
+    def _get_effect_fusion(fusion):
+        res = {}
+        for fusion_name in fusion:
+            effect_times = int(fusion[fusion_name][EFFECT_TIMES_KEY])
+            if effect_times > 0:
+                res[fusion_name] = effect_times
+        return res
+
+
+class Fusion(object):
+    def __init__(self):
+        self.fusion_result = []
+        self.log = util.get_log()
+
+    def prepare(self, json_path='./'):
+        """Prepare fusion rule manager
+        :param json_path: path to fusion_result.json
+        :return: None
+        """
+        util.create_dir(cfg.FUSION_DIR)
+        file_path = os.path.join(json_path, FUSION_RESULT_FILE_NAME)
+        file_path_local = os.path.join(cfg.FUSION_DIR, FUSION_RESULT_FILE_NAME)
+        if not os.path.isfile(file_path):
+            if not os.path.isfile(file_path_local):
+                self.log.debug("Can not find fusion result json.")
+                return
+        else:
+            shutil.copy(file_path, cfg.FUSION_DIR)
+        fe_jsons = self._get_result_jsons(file_path_local)
+        for fe_json in fe_jsons:
+            self.fusion_result.append(FusionResult(fe_json))
+
+    def check(self):
+        """Check fusion rules
+        :return: None
+        """
+        self.log.info("Check effect fusion rule list.")
+        for fusion in self.fusion_result:
+            graph_fusion_table = self._build_table('Graph Fusion [GraphID: %s]' % fusion.graph_id(),
+                                                   fusion.get_effect_graph_fusion())
+            ub_fusion_table = self._build_table('UB Fusion [GraphID: %s]' % fusion.graph_id(),
+                                                fusion.get_effect_ub_fusion())
+            util.print_panel(util.create_columns([graph_fusion_table, ub_fusion_table]),
+                             title='GraphID:' + fusion.graph_id(), fit=True)
+
+    @staticmethod
+    def _get_result_jsons(file_name):
+        result_jsons = []
+        with open(file_name, 'r') as f:
+            txt = f.read()
+            try:
+                result_jsons = json.loads(txt)
+                if isinstance(result_jsons, dict):
+                    result_jsons = [result_jsons]
+            except ValueError:
+                sk = []
+                start = -1
+                for i in range(len(txt)):
+                    if txt[i] == '{':
+                        sk.append('{')
+                    if txt[i] == '}':
+                        sk.pop()
+                    if len(sk) == 0:
+                        result_jsons.append(json.loads(txt[start+1: i+1]))
+                        start = i
+        return result_jsons
+
+    @staticmethod
+    def _build_table(title, fusion):
+        table = util.create_table(title, ['Fusion Name', 'Effect times'])
+        for f in fusion:
+            table.add_row(f, str(fusion[f]))
+        return table
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/msquickcmp_adapter.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/msquickcmp_adapter.py
new file mode 100644
index 000000000..525dc8ee5
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/msquickcmp_adapter.py
@@ -0,0 +1,87 @@
+# coding=utf-8
+import os
+import time
+import pathlib
+import shutil
+from ..util.util import util
+from ..util.constant import Constant
+from ..config import config as cfg
+from ..util.precision_tool_exception import PrecisionToolException
+
+
+class MsQuickCmpAdapter(object):
+    def __init__(self, output_path):
+        self.output_path = output_path
+        self.log = util.get_log()
+
+    def run(self):
+        if self.output_path is None or not os.path.isdir(self.output_path):
+            raise PrecisionToolException("Invalid output path.")
+        if os.path.exists(cfg.DATA_ROOT_DIR):
+            raise PrecisionToolException("Precision data dir exist, can not adapt msquickcmp result.")
+
+        for dir_path, dir_names, file_names in os.walk(self.output_path, followlinks=True):
+            if 'model' in dir_names:
+                self._adapt_model(os.path.join(dir_path, 'model'))
+            if 'dump_data' in dir_names:
+                self._adapt_dump(os.path.join(dir_path, 'dump_data'))
+            for file_name in file_names:
+                if str(file_name).endswith(Constant.Suffix.CSV):
+                    self._adapt_vector_compare_result(os.path.join(dir_path, file_name))
+
+    def _adapt_model(self, path):
+        file_names = os.listdir(path)
+        graph_id = 0
+        for file_name in file_names:
+            if str(file_name).endswith(Constant.Suffix.JSON):
+                self.log.info("Find msquickcmp model json: %s", file_name)
+                util.create_dir(cfg.DEFAULT_NPU_GRAPH_DIR)
+                graph_file_name = 'ge_proto_%d_%s.txt' % (graph_id, cfg.BUILD_JSON_GRAPH_NAME)
+                graph_json_file_name = graph_file_name + Constant.Suffix.JSON
+                pathlib.Path(os.path.join(cfg.DEFAULT_NPU_GRAPH_DIR, graph_file_name)).touch()
+                src_path = os.path.join(path, file_name)
+                dst_path = os.path.join(cfg.DEFAULT_NPU_GRAPH_DIR, graph_json_file_name)
+                self.log.info("Copy graph file: %s->%s", src_path, dst_path)
+                shutil.copy(src_path, dst_path)
+                time.sleep(3)
+                pathlib.Path(dst_path).touch()
+        if not util.empty_dir(cfg.DEFAULT_NPU_GRAPH_DIR):
+            self.log.info("Adapt model success.")
+
+    def _adapt_dump(self, path):
+        dir_names = os.listdir(path)
+        if 'tf' in dir_names:
+            self._adapt_tf_dump(os.path.join(path, 'tf'))
+        if 'onnx' in dir_names:
+            self._adapt_tf_dump(os.path.join(path, 'onnx'))
+        if 'npu' in dir_names:
+            self._adapt_npu_dump(os.path.join(path, 'npu'))
+
+    def _adapt_tf_dump(self, path):
+        if util.empty_dir(path):
+            return
+        src_path = os.path.abspath(path)
+        util.create_dir(cfg.TF_DIR)
+        dst_path = cfg.TF_DUMP_DIR
+        self.log.info("Create symbol link file: %s->%s", src_path, dst_path)
+        os.symlink(src_path, dst_path)
+        self.log.info("Adapt tf dump success.")
+
+    def _adapt_npu_dump(self, path):
+        sub_dirs = os.listdir(path)
+        self.log.info("Find npu dump dir:%s", sub_dirs)
+        sub_dirs = filter(lambda x: str(x).isdigit(), sub_dirs)
+        for sub_dir in sub_dirs:
+            util.create_dir(cfg.DEFAULT_NPU_DUMP_DIR)
+            src_path = os.path.abspath(os.path.join(path, sub_dir))
+            dst_path = os.path.join(cfg.DEFAULT_NPU_DUMP_DIR, sub_dir)
+            self.log.info("Create symbol link file: %s->%s", src_path, dst_path)
+            os.symlink(src_path, dst_path)
+            self.log.info("Adapt npu dump success.")
+
+    def _adapt_vector_compare_result(self, path):
+        target_path = os.path.join(cfg.VECTOR_COMPARE_PATH, '0')
+        util.create_dir(target_path)
+        dst_path = os.path.join(target_path, os.path.basename(path))
+        shutil.copy(path, dst_path)
+        self.log.info("Adapt vector compare result.")
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/offline_om_adapter.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/offline_om_adapter.py
new file mode 100644
index 000000000..a6cc8a5fb
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/offline_om_adapter.py
@@ -0,0 +1,23 @@
+# coding=utf-8
+import os
+import time
+import pathlib
+import shutil
+from ..util.util import util
+from ..util.constant import Constant
+from ..config import config as cfg
+from ..util.precision_tool_exception import PrecisionToolException
+
+
+class OfflineOmAdapter(object):
+    """自动解析om文件至GE图"""
+    def __init__(self, file_name):
+        self.file_name = file_name
+        self.log = util.get_log()
+
+    @staticmethod
+    def validate(file_name):
+        return os.path.isfile(file_name) and str(file_name).endswith(Constant.Suffix.OM)
+    
+    def run(self):
+        self.log("To impl")
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/overflow.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/overflow.py
new file mode 100644
index 000000000..7908c18be
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/overflow.py
@@ -0,0 +1,194 @@
+# coding=utf-8
+import json
+import os
+
+from ..util.util import util
+from ..util.precision_tool_exception import PrecisionToolException
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.constant import Constant
+from ..config import config as cfg
+
+
+AI_CORE_OVERFLOW_STATUS = {
+    '0x8': '符号证书最小附属NEG符号位取反溢出',
+    '0x10': '整数加法、减法、乘法或乘加操作计算有溢出',
+    '0x20': '浮点计算有溢出',
+    '0x80': '浮点数转无符号数的输入是负数',
+    '0x100': 'FP32转FP16或32符号整数转FP16中出现溢出',
+    '0x400': 'CUBE累加出现溢出'
+}
+DHA_ATOMIC_ADD_STATUS = {
+    '0x9': '[atomic overflow] 向上溢出',
+    '0xA': '[atomic underflow] 向下溢出',
+    '0xB': '[atomic src nan] 源操作数非法',
+    '0xC': '[atomic dst nan] 目的操作数非法',
+    '0xD': '[atomic both nan] 源操作数和目的操作数均非法'
+}
+L2_ATOMIC_ADD_STATUS = {
+    '000': '[atomic no error] 无异常',
+    '001': '[atomic overflow] 向上溢出',
+    '010': '[atomic underflow] 向下溢出',
+    '011': '[atomic src nan] 源操作数非法',
+    '100': '[atomic dst nan] 目的操作数非法',
+    '101': '[atomic both nan] 源操作数和目的操作数均非法'
+}
+
+
+class Overflow(object):
+    def __init__(self):
+        """Init"""
+        self.log = util.get_log()
+        self.debug_files = None
+
+    @catch_tool_exception
+    def prepare(self):
+        """Prepare"""
+        # find right path in DUMP_FILES_NPU_ALL
+        util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR)
+        sub_dir = util.get_newest_dir(cfg.NPU_OVERFLOW_DUMP_DIR)
+        overflow_dump_files = util.list_npu_dump_files(os.path.join(cfg.NPU_OVERFLOW_DUMP_DIR, sub_dir))
+        self.debug_files = [item for item in overflow_dump_files.values() if item.op_type == 'Opdebug']
+        # sort by timestamp
+        self.debug_files = sorted(self.debug_files, key=lambda x: x.timestamp)
+        self.log.info("Find [%d] debug files in overflow dir.", len(self.debug_files))
+
+    def check(self, max_num=3):
+        """Check overflow info"""
+        if len(self.debug_files) == 0:
+            self.log.info("[Overflow] Checked success. find [0] overflow node!")
+            return
+        self.log.info("[Overflow] Find [%s] overflow debug file. Will show top %s ops.", len(self.debug_files), max_num)
+        for i, debug_file in enumerate(self.debug_files):
+            debug_decode_files = self._decode_file(debug_file, True)
+            with open(debug_decode_files[0].path, 'r') as f:
+                overflow_json = json.load(f)
+                util.print_panel(self._json_summary(overflow_json, debug_file))
+            if i >= max_num:
+                break
+
+    def _json_summary(self, json_txt, debug_file):
+        res = []
+        detail = {'task_id': -1}
+        if 'magic' in json_txt:
+            # version 2
+            detail = json_txt['acc_list']['data']
+            res.append(' - [AccType:%s][Status:%s][TaskId:%s]' % (
+                json_txt['acc_list'], detail['status'], detail['task_id']))
+        if 'AI Core' in json_txt and json_txt['AI Core']['status'] > 0:
+            detail = json_txt['AI Core']
+            res.append(' - [AI Core][Status:%s][TaskId:%s] %s' % (
+                detail['status'], detail['task_id'], self._decode_ai_core_status(detail['status'])))
+        if 'DHA Atomic Add' in json_txt and json_txt['DHA Atomic Add']['status'] > 0:
+            detail = json_txt['DHA Atomic Add']
+            res.append(' - [DHA Atomic Add][Status:%s][TaskId:%s] %s' % (
+                detail['status'], detail['task_id'], self._decode_dha_atomic_add_status(detail['status'])))
+        if 'L2 Atomic Add' in json_txt and json_txt['L2 Atomic Add']['status'] > 0:
+            detail = json_txt['L2 Atomic Add']
+            res.append(' - [L2 Atomic Add][Status:%s][TaskId:%s] %s' % (
+                detail['status'], detail['task_id'], self._decode_l2_atomic_add_status(detail['status'])))
+        if str(detail['task_id']) == '-1':
+            detail['task_id'] = debug_file.task_id
+        if str(detail['stream_id']) == '-1':
+            detail['stream_id'] = debug_file.stream_id
+        dump_file_info = self._find_dump_files_by_task_id(detail['task_id'], detail['stream_id'],
+                                                          debug_file.dir_path)
+        res.append(' - First overflow file timestamp [%s] -' % debug_file.timestamp)
+        if dump_file_info is None:
+            self.log.warning("Can not find any dump file for debug file: %s, op task id: %s", debug_file.file_name,
+                             detail['task_id'])
+        else:
+            dump_decode_files = self._decode_file(dump_file_info)
+            # sort input/output & index
+            sorted(dump_decode_files, key=lambda x: x.idx)
+            for anchor_type in ['input', 'output']:
+                for dump_decode_file in dump_decode_files:
+                    if dump_decode_file.type != anchor_type:
+                        continue
+                    res.append(' ├─ %s' % dump_decode_file.file_name)
+                    res.append('  └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(dump_decode_file.path))
+            res.insert(0, '[green][%s][%s][/green] %s' % (dump_file_info.op_type, dump_file_info.task_id,
+                                                          dump_file_info.op_name))
+        return Constant.NEW_LINE.join(res)
+
+    @staticmethod
+    def _decode_file(file_info, debug=False):
+        file_name = file_info.file_name
+        if debug:
+            decode_files = util.list_debug_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name)
+        else:
+            decode_files = util.list_npu_dump_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name)
+        if len(decode_files) == 0:
+            # decode info file
+            util.convert_dump_to_npy(file_info.path, cfg.OVERFLOW_DECODE_DIR)
+            if debug:
+                decode_files = util.list_debug_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name)
+            else:
+                decode_files = util.list_npu_dump_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name)
+        if len(decode_files) == 0:
+            raise PrecisionToolException("Decode overflow debug file: %s failed." % file_name)
+        decode_files = sorted(decode_files.values(), key=lambda x: x.timestamp)
+        return decode_files
+
+    @staticmethod
+    def _find_dump_files_by_task_id(task_id, stream_id, search_dir):
+        dump_files = util.list_npu_dump_files(search_dir)
+        dump_file_list = [item for item in dump_files.values() if item.op_type != 'Opdebug']
+        dump_file_list = sorted(dump_file_list, key=lambda x: x.timestamp)
+        for dump_file in dump_file_list:
+            if dump_file.task_id == int(task_id) and dump_file.stream_id == int(stream_id):
+                return dump_file
+        return None
+
+    def _decode_ai_core_status(self, status):
+        error_code = []
+        if type(status) is not int:
+            return error_code
+        bin_status = ''.join(reversed(bin(status)))
+        prefix = ''
+        self.log.debug('Decode AI Core Overflow status:[%s]', hex(status))
+        for i in range(len(bin_status)):
+            if bin_status[i] == '1':
+                if hex(int('1' + prefix, 2)) not in AI_CORE_OVERFLOW_STATUS:
+                    self.log.warning("Unknown AI Core overflow status: [%s]", hex(int('1' + prefix, 2)))
+                    continue
+                error_code.append(AI_CORE_OVERFLOW_STATUS[hex(int('1' + prefix, 2))])
+            prefix += '0'
+        return error_code
+
+    def _decode_l2_atomic_add_status(self, status):
+        if type(status) is not int:
+            return 'status is not int.'
+        code, _ = self._sub_bin_code(status, 16, 18)
+        if code in L2_ATOMIC_ADD_STATUS:
+            return L2_ATOMIC_ADD_STATUS[code]
+        return 'Status invalid'
+
+    def _decode_dha_atomic_add_status(self, status):
+        if type(status) is not int:
+            return 'status is not int.'
+        _, code = self._sub_bin_code(status, 8, 15)
+        if code in DHA_ATOMIC_ADD_STATUS:
+            return DHA_ATOMIC_ADD_STATUS[status]
+        return 'Status invalid'
+
+    @staticmethod
+    def _sub_bin_code(status, start, end):
+        """ Get specific bit code from status in bin format
+        :param status: status num
+        :param start: start bit
+        :param end: end bit
+        :return: result in bin format and hex format
+        """
+        bin_code = bin(status).replace('0b', '')
+        append_num = end + 1 - len(bin_code)
+        if append_num > 0:
+            bin_list = ['0'] * append_num
+            bin_list.append(bin_code)
+            bin_code = ''.join(bin_list)
+        bin_start = len(bin_code) - end - 1
+        bin_end = len(bin_code) - start
+        bin_start = max(0, bin_start)
+        bin_code = bin_code[bin_start: bin_end]
+        return bin_code, hex(int(bin_code, 2))
+
+
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/tf_adapter.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/tf_adapter.py
new file mode 100644
index 000000000..980b9fe33
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/tf_adapter.py
@@ -0,0 +1,200 @@
+# coding=utf-8
+import os
+from ..util.util import util
+from ..config import config as cfg
+FLAG_DUMP_GE_GRAPH = 'DUMP_GE_GRAPH'
+FLAG_DUMP_GRAPH_LEVEL = 'DUMP_GRAPH_LEVEL'
+FLAG_DUMP_GRAPH_PATH = 'DUMP_GRAPH_PATH'
+FLAG_NPU_DUMP_GRAPH = 'NPU_DUMP_GRAPH'
+FUSION_SWITCH_FILE = os.path.join(os.path.dirname(__file__), '../config/fusion_switch.cfg')
+FUSION_OFF_FILE = os.path.join(os.path.dirname(__file__), '../config/fusion_off.cfg')
+
+
+class TfAdapter(object):
+    def __init__(self):
+        self.log = util.get_log()
+
+    def sess_dump(self, sess):
+        """wrapper session with dumping debug wrapper.
+        In session run mode. Use sess=sess_dump(sess)
+        :param sess: origin session
+        :return: Session
+        """
+        from tensorflow.python import debug as tf_debug
+        self._init()
+        return tf_debug.DumpingDebugWrapperSession(sess, cfg.TF_DEBUG_DUMP_DIR)
+
+    def estimator_dump(self):
+        """In estimator mode. estim_spec = tf.estimator.EstimatorSpec(traing_hooks=[estimator_dump()])
+        :return:
+        """
+        from tensorflow.python import debug as tf_debug
+        self._init()
+        return tf_debug.DumpingDebugHook(cfg.TF_DEBUG_DUMP_DIR)
+
+    def session_dump_config(self, session_config=None, action=None, dump_layer=None):
+        """
+        In TF session mode. set dump_config in session_config.
+        exp. config = session_dump_config()
+             config.[set your own configs]
+             with tf.Session(config=config) as sess:
+                sess.run(_)
+                tf_debug.LocalCLIDebugWrapperSession(sess=sess, ui_type="readline")
+        :param session_config: original session config
+        :param action: if set action, no need to start app with cli wrapper
+        :return: config_pb2.ConfigProto
+        """
+        from tensorflow.core.protobuf import config_pb2
+        from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+        if ((not isinstance(session_config, config_pb2.ConfigProto)) and
+                (not issubclass(type(session_config), config_pb2.ConfigProto))):
+            session_config = config_pb2.ConfigProto()
+        custom_op = None
+        for existed_custom_op in session_config.graph_options.rewrite_options.custom_optimizers:
+            if existed_custom_op.name == 'NpuOptimizer':
+                custom_op = existed_custom_op
+        if custom_op is None:
+            custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+        custom_op.name = 'NpuOptimizer'
+        custom_op.parameter_map['use_off_line'].b = True
+        self.update_custom_op(custom_op, action, dump_layer)
+        session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+        return session_config
+
+    def estimator_dump_config(self, action=None):
+        """return DumpConfig.
+        In estimator mode. set dump_config in NPURunConfig().
+        exp. config = NPURunConfig(dump_config=estimator_dum_config(), session_config=session_config)
+        :return: DumpConfig
+        """
+        from npu_bridge.npu_init import DumpConfig
+        self._init()
+        if self._is_overflow(action):
+            config = DumpConfig(enable_dump_debug=True, dump_path=cfg.NPU_OVERFLOW_DUMP_DIR, dump_mode="all")
+        elif self._is_dump(action):
+            config = DumpConfig(enable_dump=True, dump_path=cfg.DEFAULT_NPU_DUMP_DIR, dump_step=cfg.TF_DUMP_STEP,
+                                dump_mode="all")
+        else:
+            config = DumpConfig()
+        return config
+
+    def npu_device_dump_config(self, npu_device, action):
+        """For tf2.x
+        :param npu_device: npu_device
+        :param action: dump | overflow| fusion_off | fusion_switch
+        :return: npu_device
+        """
+        self._init()
+        if self._is_overflow(action):
+            npu_device.global_options().dump_config.enable_dump_debug = True
+            npu_device.global_options().dump_config.dump_path = cfg.NPU_OVERFLOW_DUMP_DIR
+            npu_device.global_options().dump_config.dump_debug_mode = "all"
+            npu_device.global_options().op_debug_level = cfg.OP_DEBUG_LEVEL
+        if self._is_dump(action):
+            npu_device.global_options().dump_config.enable_dump = True
+            npu_device.global_options().dump_config.dump_path = cfg.DEFAULT_NPU_DUMP_DIR
+            npu_device.global_options().dump_config.dump_mode = "all"
+            npu_device.global_options().op_debug_level = cfg.OP_DEBUG_LEVEL
+            npu_device.global_options().dump_config.dump_step = cfg.TF_DUMP_STEP
+            if self._is_dump_stats(action):
+                npu_device.global_options().dump_config.dump_data = "stats"
+        if self._is_fusion_off(action):
+            npu_device.global_options().fusion_switch_file = FUSION_OFF_FILE
+            print("[PrecisionTool] Set fusion switch file: ", FUSION_OFF_FILE)
+        if self._is_fusion_switch(action):
+            npu_device.global_options().fusion_switch_file = FUSION_SWITCH_FILE
+            print("[PrecisionTool] Set fusion switch file: ", FUSION_SWITCH_FILE)
+        return npu_device
+
+    def update_custom_op(self, custom_op, action=None, dump_layer=None):
+        """Update custom_op
+        :param custom_op: origin custom op
+        :param action: dump | overflow | fusion_off | fusion_switch
+        :return:
+        """
+        import tensorflow as tf
+        self._init()
+        custom_op.parameter_map['debug_dir'].s = tf.compat.as_bytes(cfg.DEFAULT_OP_DEBUG_DIR)
+        if self._is_overflow(action):
+            custom_op.parameter_map['enable_dump_debug'].b = True
+            custom_op.parameter_map['dump_debug_mode'].s = tf.compat.as_bytes("all")
+            custom_op.parameter_map['dump_path'].s = tf.compat.as_bytes(cfg.NPU_OVERFLOW_DUMP_DIR)
+            custom_op.parameter_map['op_debug_level'].i = cfg.OP_DEBUG_LEVEL
+        elif self._is_dump(action):
+            custom_op.parameter_map['enable_dump'].b = True
+            custom_op.parameter_map['dump_mode'].s = tf.compat.as_bytes("all")
+            custom_op.parameter_map['dump_path'].s = tf.compat.as_bytes(cfg.DEFAULT_NPU_DUMP_DIR)
+            custom_op.parameter_map['op_debug_level'].i = cfg.OP_DEBUG_LEVEL
+            custom_op.parameter_map['dump_step'].s = tf.compat.as_bytes(cfg.TF_DUMP_STEP)
+            if self._is_dump_stats(action):
+                custom_op.parameter_map['dump_data'].s = tf.compat.as_bytes("stats")
+            if dump_layer is not None:
+                custom_op.parameter_map['dump_layer'].s = tf.compat.as_bytes(dump_layer)
+        if self._is_fusion_off(action):
+            custom_op.parameter_map['fusion_switch_file'].s = tf.compat.as_bytes(FUSION_OFF_FILE)
+            print("[PrecisionTool] Set fusion switch file: ", FUSION_OFF_FILE)
+        elif self._is_fusion_switch(action):
+            custom_op.parameter_map['fusion_switch_file'].s = tf.compat.as_bytes(FUSION_SWITCH_FILE)
+            print("[PrecisionTool] Set fusion switch file: ", FUSION_SWITCH_FILE)
+        if self._is_prof(action):
+            custom_op.parameter_map["profiling_mode"].b = True
+            custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes('{"output":"%s",\
+                "storage_limit": "1000MB","training_trace":"on","l2":"on","hccl":"on","task_trace":"on",\
+                    "aicpu":"on","fp_point":"", "bp_point":"","aic_metrics":"PipeUtilization","msproftx":"on"}' % 
+                    cfg.PROFILING_DIR) 
+        return custom_op
+
+    def _init(self):
+        util.create_dir(cfg.DEFAULT_OP_DEBUG_DIR)
+        util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR)
+        util.create_dir(cfg.DEFAULT_NPU_DUMP_DIR)
+        util.create_dir(cfg.DEFAULT_NPU_GRAPH_DIR)
+        util.create_dir(cfg.PROFILING_DIR)
+        self._set_dump_graph_flags()
+
+    @staticmethod
+    def _set_dump_graph_flags():
+        os.environ[FLAG_DUMP_GE_GRAPH] = str(cfg.DUMP_GE_GRAPH_VALUE)
+        os.environ[FLAG_DUMP_GRAPH_LEVEL] = str(cfg.DUMP_GRAPH_LEVEL_VALUE)
+        os.environ[FLAG_DUMP_GRAPH_PATH] = cfg.DEFAULT_NPU_GRAPH_DIR
+        os.environ[FLAG_NPU_DUMP_GRAPH] = 'true'
+
+    @staticmethod
+    def _is_dump(action):
+        if action is not None:
+            return 'dump' in action
+        if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] == 'True':
+            print("[PrecisionTool] enable npu dump >======")
+            return True
+        return False
+    
+    @staticmethod
+    def _is_dump_stats(action):
+        if action is not None:
+            return 'dump_stats' in action
+        if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] == 'True':
+            print("[PrecisionTool] enable npu dump >======")
+            return True
+        return False
+
+    @staticmethod
+    def _is_overflow(action):
+        if action is not None:
+            return 'overflow' in action
+        if cfg.PRECISION_TOOL_OVERFLOW_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG] == 'True':
+            print("[PrecisionTool] enable npu overflow >======")
+            return True
+        return False
+
+    @staticmethod
+    def _is_fusion_off(action):
+        return 'fusion_off' in action if action is not None else False
+
+    @staticmethod
+    def _is_fusion_switch(action):
+        return ('fusion_switch' in action) if action is not None else False
+
+    @staticmethod
+    def _is_prof(action):
+        return ('prof' in action) if action is not None else False
+
-- 
Gitee


From 9cbb9d1596337655f4503dc28375900248610b99 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:43:15 +0000
Subject: [PATCH 05/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/compare/compare.py     | 215 ++++++++++++++++++
 .../lib/compare/compare_result.py             | 143 ++++++++++++
 2 files changed, 358 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare_result.py

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare.py
new file mode 100644
index 000000000..e166f5042
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare.py
@@ -0,0 +1,215 @@
+# coding=utf-8
+"""
+Compare
+"""
+import json
+import os
+import numpy as np
+from .compare_result import CompareResult
+from ..util.constant import Constant
+from ..util.util import util
+from ..config import config as cfg
+from ..util.precision_tool_exception import PrecisionToolException
+from ..util.precision_tool_exception import catch_tool_exception
+
+
+class Compare(object):
+    def __init__(self):
+        """Init"""
+        super(Compare, self).__init__()
+        self.log = util.get_log()
+        self.vector_compare_results = {}
+
+    @catch_tool_exception
+    def prepare(self):
+        util.create_dir(cfg.VECTOR_COMPARE_PATH)
+
+    def npu_tf_vector_compare(self, graphs, npu_root_dir, tf_root_dir, result_dir):
+        """Compare npu dump data with tf dump data
+        :param graphs: npu ge graph json file list
+        :param npu_root_dir:
+        :param tf_root_dir:
+        :param result_dir: result dir
+        :return:
+        """
+        for graph_file in graphs:
+            self.log.info("Compare npu tf with graph %s", graph_file)
+            sub_graphs = self._get_sub_graphs(graph_file)
+            if sub_graphs is None:
+                continue
+            for sub_graph in sub_graphs:
+                npu_dir = self._get_sub_dir_by_sub_graph_name(sub_graph, npu_root_dir)
+
+                if npu_dir is None:
+                    self.log.warning("Can not find any sub graph dir named %s", npu_dir)
+                    # for some infer case, sub_graph name may not match sub dir name.
+                    npu_dir_0 = self._get_sub_dir_by_sub_graph_name(sub_graph + '_0', npu_root_dir)
+                    if npu_dir_0 is None:
+                        self.log.warning("Can not find any sub graph dir named %s", npu_dir_0)
+                        continue
+                    npu_dir = npu_dir_0
+                self.vector_compare(npu_dir, tf_root_dir, result_dir, graph_file)
+
+    @catch_tool_exception
+    def _get_sub_dir_by_sub_graph_name(self, sub_graph, npu_root_dir):
+        sub_graph_dirs = []
+        for dir_path, dir_names, file_names in os.walk(npu_root_dir, followlinks=True):
+            if sub_graph in dir_names:
+                # walk sub graph dir
+                for sub_dir_path, sub_dir_names, sub_file_names in os.walk(os.path.join(dir_path, sub_graph),
+                                                                           followlinks=True):
+                    if len(sub_dir_names) == 0:
+                        sub_graph_dirs.append(sub_dir_path)
+        if len(sub_graph_dirs) == 0:
+            return None
+        self.log.warning("Find [%d] dirs in sub graph dir [%s], %s, compare first.", len(sub_graph_dirs), sub_graph,
+                         sub_graph_dirs)
+        return sub_graph_dirs[0]
+
+    @catch_tool_exception
+    def _get_sub_graphs(self, graph_file):
+        with open(graph_file, 'r') as f:
+            graph_json = json.load(f)
+            if 'graph' not in graph_json:
+                raise PrecisionToolException("No graph in file: %s" % graph_file)
+            sub_graphs = []
+            for graph in graph_json['graph']:
+                sub_graphs.append(graph['name'])
+        return sub_graphs
+
+    '''
+    @staticmethod
+    def _get_ge_default_dirs(self, root_dir):
+        for dir_path, dir_names, file_names in os.walk(root_dir, followlinks=True):
+            for dir_name in dir_names:
+    '''
+
+    def npu_vector_compare(self, debug_0_root_dir, debug_1_root_dir):
+        """Compare two npu dump data
+        :param debug_0_root_dir:
+        :param debug_1_root_dir:
+        :return:
+        """
+        # debug_0_sub_dirs = self._get_ge_default_dirs(debug_0_root_dir)
+        # debug_1_sub_dirs = self._get_ge_default_dirs(debug_1_root_dir)
+
+    def vector_compare(self, lh_path, rh_path, result_dir, graph_json=None):
+        """Compare all ops"""
+        if lh_path is None or util.empty_dir(lh_path):
+            raise PrecisionToolException("No valid dump file in %s" % lh_path)
+        if rh_path is None or util.empty_dir(rh_path):
+            raise PrecisionToolException("No valid dump file in %s" % rh_path)
+        self.log.info("Start vector compare process...")
+        util.compare_vector(lh_path, rh_path, graph_json, result_dir)
+        self.log.info("Vector compare process finish.")
+
+    def _get_compare_result_by_file_name(self, file_name):
+        results = []
+        if file_name is None:
+            sub_dir = util.get_newest_dir(cfg.VECTOR_COMPARE_PATH)
+            if sub_dir == '':
+                raise PrecisionToolException("Empty vector compare path:%s" % cfg.VECTOR_COMPARE_PATH)
+            file_name = os.path.join(cfg.VECTOR_COMPARE_PATH, sub_dir)
+        if os.path.isfile(file_name):
+            results.append(CompareResult(file_name))
+        if os.path.isdir(file_name):
+            vector_compare_result_files = util.list_vector_compare_result_files(file_name)
+            if vector_compare_result_files is None or len(vector_compare_result_files) == 0:
+                raise PrecisionToolException("Can not find any vector compare result in dir:%s" % file_name)
+            file_list = sorted(vector_compare_result_files.values(), key=lambda x: x.timestamp)
+            file_names = [x.file_name for x in file_list]
+            self.log.debug("Find %s result files in dir %s", file_names, file_name)
+            for file in file_list:
+                results.append(CompareResult(file.path))
+        return results
+
+    @catch_tool_exception
+    def vector_summary(self, file_name=None, cos_sim_threshold=0.98, limit=1):
+        """Print not NaN result in vector compare result"""
+        compare_results = self._get_compare_result_by_file_name(file_name)
+        error_ops = []
+        for compare_result in compare_results:
+            err_ops = compare_result.get_op_by_cosine_sim_threshold(cos_sim_threshold, limit)
+            self.log.info("Find %d ops less then %s in %s", len(err_ops), cos_sim_threshold, compare_result.file_path)
+            error_ops.extend(err_ops)
+        if len(error_ops) == 0:
+            self.log.info("Can not find any compare result over threshold: %s" % cos_sim_threshold)
+        else:
+            for i, error_op in enumerate(error_ops):
+                if i < limit:
+                    error_op.summary(cos_sim_threshold)
+        return error_ops
+
+    def compare_data(self, left, right, save_txt=False, rl=0.001, al=0.001, diff_count=20):
+        """Compare data"""
+        left = self._detect_file(left)
+        right = self._detect_file(right)
+        if left is None or right is None:
+            raise PrecisionToolException("invalid input or output")
+        # save to txt
+        if save_txt:
+            util.save_npy_to_txt(left)
+            util.save_npy_to_txt(right)
+        # compare data
+        total_cnt, all_close, cos_sim, err_percent = self._do_compare_data(left, right, rl, al, diff_count)
+        content = ['Left:', ' ├─ NpyFile: %s' % left]
+        if save_txt:
+            content.append(' ├─ TxtFile: [green]%s.txt[/green]' % left)
+        content.append(' └─ NpySpec: [yellow]%s[/yellow]' % util.gen_npy_info_txt(left))
+        content.append('Right:')
+        content.append(' ├─ NpyFile: %s' % right)
+        if save_txt:
+            content.append(' ├─ TxtFile: [green]%s.txt[/green]' % right)
+        content.append(' └─ NpySpec: [yellow]%s[/yellow]' % util.gen_npy_info_txt(right))
+        content.append('NumCnt:   %s' % total_cnt)
+        content.append('AllClose: %s' % all_close)
+        content.append('CosSim:   %s' % cos_sim)
+        content.append('ErrorPer: %s  (rl= %s, al= %s)' % (err_percent, rl, al))
+        util.print_panel(Constant.NEW_LINE.join(content))
+
+    def _do_compare_data(self, left, right, rl=0.001, al=0.001, diff_count=20):
+        data_left = np.load(left).astype(np.float32)
+        data_right = np.load(right).astype(np.float32)
+        shape_left = data_left.shape
+        shape_right = data_right.shape
+        if shape_left != shape_right:
+            self.log.warning("Data shape not equal: %s vs %s", data_left.shape, data_right.shape)
+        data_left = data_left.reshape(-1)
+        data_right = data_right.reshape(-1)
+        if data_left.shape[0] != data_right.shape[0]:
+            self.log.warning("Data size not equal: %s vs %s", data_left.shape, data_right.shape)
+            if data_left.shape[0] < data_right.shape[0]:
+                data_left = np.pad(data_left, (0, data_right.shape[0] - data_left.shape[0]), 'constant')
+            else:
+                data_right = np.pad(data_right,(0, data_left.shape[0] - data_right.shape[0]), 'constant')
+        all_close = np.allclose(data_left, data_right, atol=al, rtol=rl)
+        # cos_sim = 1 - spatial.distance.cosine(data_left, data_right)
+        cos_sim = np.dot(data_left, data_right) / (
+                np.sqrt(np.dot(data_left, data_left)) * np.sqrt(np.dot(data_right, data_right)))
+        err_cnt = 0
+        total_cnt = data_left.shape[0]
+        diff_table_columns = ['Index', 'Left', 'Right', 'Diff']
+        err_table = util.create_table("Error Item Table", diff_table_columns)
+        top_table = util.create_table("Top Item Table", diff_table_columns)
+        for i in range(total_cnt):
+            abs_diff = abs(data_left[i] - data_right[i])
+            if i < diff_count:
+                top_table.add_row(str(i), str(data_left[i]), str(data_right[i]), str(abs_diff))
+            if abs_diff > (al + rl * abs(data_right[i])):
+                if err_cnt < diff_count:
+                    err_table.add_row(str(i), str(data_left[i]), str(data_right[i]), str(abs_diff))
+                err_cnt += 1
+        err_percent = float(err_cnt / total_cnt)
+        util.print(util.create_columns([err_table, top_table]))
+        return total_cnt, all_close, cos_sim, err_percent
+
+    def _detect_file(self, file_name):
+        """Find files in npu/overflow/cpu dump dir"""
+        if os.path.isfile(file_name):
+            return file_name
+        for parent_dir in [cfg.TMP_DIR, cfg.TF_DUMP_DIR]:
+            file_infos = util.list_numpy_files(parent_dir, file_name)
+            if len(file_infos) > 0:
+                self.log.info("Find %s, choose first one.", list(file_infos.keys()))
+                return list(file_infos.values())[0].path
+        return None
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare_result.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare_result.py
new file mode 100644
index 000000000..5e67dd24d
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare_result.py
@@ -0,0 +1,143 @@
+import collections
+import os
+import numpy as np
+from ..util.util import util
+from ..util.constant import Constant
+from ..util.precision_tool_exception import PrecisionToolException
+from ..util.precision_tool_exception import catch_tool_exception
+
+
+class RowMap(object):
+    """
+    'Index': 0,
+    'LeftOp': 1,
+    'RightOp': 2,
+    'TensorIdx': 3,    # TensorIndex
+    'CosSim': 4,    # CosineSimilarity
+    'MaxAbs': 5,   # MaxAbsoluteError
+    'ARE': 6,   # AccumulatedRelativeError
+    'RED': 7,   # RelativeEuclideanDistance
+    'KLD': 8,   # KullbackLeiblerDivergence
+    'StandardDeviation': 9     # StandardDeviation
+    """
+    def __init__(self, item=None):
+        self.index = 0
+        self.left = 1
+        self.right = 2
+        self.tensor_index = 3
+        self.cosine_similarity = 4
+        self.max_abs = 5
+        if item is not None:
+            self.update(item)
+
+    def update(self, item):
+        for i, value in enumerate(item):
+            self.left = i if value == 'LeftOp' else self.left
+            self.right = i if value == 'RightOp' else self.right
+            self.tensor_index = i if value == 'TensorIndex' else self.tensor_index
+            self.cosine_similarity = i if value == 'CosineSimilarity' else self.cosine_similarity
+            self.max_abs = i if value == 'MaxAbsoluteError' else self.max_abs
+
+
+class CompareItem(object):
+    def __init__(self, op_name, item, row_map):
+        self.row_map = row_map
+        self.index = int(item[self.row_map.index])
+        self.op_name = op_name
+        self.left = item[self.row_map.left].split(" ")
+        self.right = item[self.row_map.right].split(" ")
+        self.input = []
+        self.output = []
+
+    def update(self, item):
+        tensor_index = item[self.row_map.tensor_index]
+        if tensor_index not in ['NaN', '*']:
+            item_detail = tensor_index.split(':')
+            if len(item_detail) != 3:
+                raise PrecisionToolException("item:%d tensor index invalid. [%s]" % (
+                    item[self.row_map.index], tensor_index))
+            if item_detail[1] == 'input':
+                self.input.insert(int(item_detail[2]), item)
+            else:
+                self.output.insert(int(item_detail[2]), item)
+
+    def is_cosine_sim_over_threshold(self, threshold):
+        for item in self.output:
+            if item[self.row_map.cosine_similarity] == 'NaN':
+                continue
+            if float(item[self.row_map.cosine_similarity]) <= threshold:
+                return True
+        return False
+
+    @staticmethod
+    def _color_data(data, threshold):
+        try:
+            data = float(data)
+            if np.isnan(data):
+                raise ValueError
+            elif data <= threshold:
+                return "[red]%s[/red]" % data
+            else:
+                return "[green]%s[/green]" % data
+        except ValueError:
+            return "[yellow]%s[/yellow]" % data
+
+    def summary(self, threshold):
+        content = ["Left:  %s" % self.left, "Right: %s" % self.right, "Input: "]
+        input_txt = []
+        for i, item in enumerate(self.input):
+            input_txt.append(" - [%d]%s" % (i, self._color_data(item[self.row_map.cosine_similarity], threshold)))
+        content.extend([Constant.TAB_LINE.join(input_txt), "Output:"])
+        output_txt = []
+        for i, item in enumerate(self.output):
+            output_txt.append(" - [%d]%s" % (i, self._color_data(item[self.row_map.cosine_similarity], threshold)))
+        content.append(Constant.TAB_LINE.join(output_txt))
+        title = "[%d] %s" % (self.index, self.op_name)
+        util.print_panel(Constant.NEW_LINE.join(content), title=title)
+
+
+class CompareResult(object):
+    def __init__(self, file_path):
+        self.file_path = file_path
+        self.ops = None
+        self.prepare()
+
+    @catch_tool_exception
+    def prepare(self):
+        if not str(self.file_path).endswith(Constant.Suffix.CSV):
+            raise PrecisionToolException("Compare result file %s not a csv file." % self.file_path)
+        if not os.path.isfile(self.file_path):
+            raise PrecisionToolException("Compare result file %s not exist." % self.file_path)
+        items = util.read_csv(self.file_path)
+        self.ops = collections.OrderedDict()
+        row_map = RowMap()
+        for item in items:
+            if item[row_map.index] == 'Index':
+                row_map.update(item)
+                continue
+            if item[row_map.tensor_index] in ['NaN', '*']:
+                continue
+            tensor_index = item[row_map.tensor_index]
+            op_name = tensor_index.split(":")[0]
+            if op_name not in self.ops:
+                self.ops[op_name] = CompareItem(op_name, item, row_map)
+            op = self.ops[op_name]
+            op.update(item)
+
+    def get_compare_item_by_op(self, op_name):
+        if self.ops is None:
+            self.prepare()
+        if self.ops is None:
+            raise PrecisionToolException("Invalid compare result file: %s" % self.file_path)
+        if op_name in self.ops:
+            return self.ops[op_name]
+        return None
+
+    def get_op_by_cosine_sim_threshold(self, threshold, limit=-1):
+        result = []
+        for compare_item in self.ops.values():
+            if compare_item.is_cosine_sim_over_threshold(threshold):
+                result.append(compare_item)
+                if len(result) == limit:
+                    break
+        return result
-- 
Gitee


From aac2114b7a85409e20b497843443e1beb3c1f7a8 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:43:28 +0000
Subject: [PATCH 06/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/config/config.py       | 114 ++++++++++++++++++
 .../precision_tool/lib/config/fusion_off.cfg  |  10 ++
 .../lib/config/fusion_switch.cfg              |   6 +
 3 files changed, 130 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/config.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_off.cfg
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_switch.cfg

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/config.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/config.py
new file mode 100644
index 000000000..f9568666c
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/config.py
@@ -0,0 +1,114 @@
+# coding=utf-8
+import os
+
+# Dump config '0|5|10'
+TF_DUMP_STEP = '0'
+
+# path to run package operator cmp compare
+# default may be /usr/local/Ascend/
+CMD_ROOT_PATH = '/usr/local/Ascend/'
+ASCEND_SET_ENV = os.path.join(CMD_ROOT_PATH, 'bin/setenv.bash')
+
+
+# ASCEND Log Path
+ASCEND_LOG_PATH = '/root/ascend/log/plog/'
+
+# TOOL CONFIG
+LOG_LEVEL = "NOTSET"
+ROOT_DIR = ''
+
+# [train/infer] if adapt from msquickcmp result, set net type to infer
+NET_TYPE = 'infer'
+
+'''
+precision_data/
+├── npu
+│   ├── debug_0
+|   |   ├── dump
+|   |   |   └── 20210510101133
+|   │   └── graph
+|   |       └── ge_proto_00000179_PreRunAfterBuild.txt
+│   └── debug_1
+├── tf
+|   ├── tf_debug
+|   └── dump
+├── overflow
+├── fusion
+└── temp
+    ├── op_graph
+    ├── decode
+    |   ├── dump_decode
+    |   ├── overflow_decode
+    |   └── dump_convert
+    └── vector_compare
+        ├── 20210510101133
+        |   ├── result_123456.csv
+        |   └── result_123455.csv
+        └── 20210510101134
+            └── result_123458.csv
+'''
+
+# Static dirs, do not change
+DATA_ROOT_DIR = os.path.join(ROOT_DIR, 'precision_data')
+
+# fusion
+FUSION_DIR = os.path.join(DATA_ROOT_DIR, 'fusion')
+
+# npu dump/graph parent dir
+NPU_DIR = os.path.join(DATA_ROOT_DIR, 'npu')
+DEFAULT_NPU_DIR = os.path.join(NPU_DIR, 'debug_0')
+DEFAULT_NPU_DUMP_DIR = os.path.join(DEFAULT_NPU_DIR, 'dump')
+DEFAULT_NPU_GRAPH_DIR = os.path.join(DEFAULT_NPU_DIR, 'graph')
+PROFILING_DIR = os.path.join(DEFAULT_NPU_DIR, 'prof')
+DEFAULT_OP_DEBUG_DIR = DEFAULT_NPU_DIR
+
+# npu overflow dir
+OVERFLOW_DIR = os.path.join(DATA_ROOT_DIR, 'overflow')
+NPU_OVERFLOW_DUMP_DIR = os.path.join(OVERFLOW_DIR, 'dump')
+
+# tf dirs
+TF_DIR = os.path.join(DATA_ROOT_DIR, 'tf')
+TF_DEBUG_DUMP_DIR = os.path.join(TF_DIR, 'tf_debug')
+TF_DUMP_DIR = os.path.join(TF_DIR, 'dump')
+TF_GRAPH_DIR = os.path.join(TF_DIR, 'graph')
+# tf checkpoints
+TF_CKPT_ROOT = os.path.join(TF_DIR, 'checkpoints')
+TF_CKPT_FILE = os.path.join(TF_CKPT_ROOT, 'ckpt')
+TF_CKPT_INPUT_DIR = os.path.join(TF_CKPT_ROOT, 'input')
+
+# pytroch dirs
+PT_DIR = os.path.join(DATA_ROOT_DIR, 'pt')
+PT_NPU_DIR = os.path.join(PT_DIR, 'npu')
+PT_GPU_DIR = os.path.join(PT_DIR, 'gpu')
+
+# tmp dirs
+TMP_DIR = os.path.join(DATA_ROOT_DIR, 'temp')
+OP_GRAPH_DIR = os.path.join(TMP_DIR, 'op_graph')
+
+DECODE_DIR = os.path.join(TMP_DIR, 'decode')
+OVERFLOW_DECODE_DIR = os.path.join(DECODE_DIR, 'overflow_decode')
+DUMP_DECODE_DIR = os.path.join(DECODE_DIR, 'dump_decode')
+PT_DUMP_DECODE_DIR = os.path.join(DECODE_DIR, 'pt')
+DUMP_CONVERT_DIR = os.path.join(DECODE_DIR, 'dump_convert')
+
+VECTOR_COMPARE_PATH = os.path.join(TMP_DIR, 'vector_compare')
+TF_TENSOR_NAMES = os.path.join(TMP_DIR, 'tf_tensor_names.txt')
+TF_TENSOR_DUMP_CMD = os.path.join(TMP_DIR, 'tf_tensor_cmd.txt')
+
+# FLAG
+PRECISION_TOOL_OVERFLOW_FLAG = 'PRECISION_TOOL_OVERFLOW'
+PRECISION_TOOL_DUMP_FLAG = 'PRECISION_TOOL_DUMP'
+
+# for previous version, set 0
+OP_DEBUG_LEVEL = 4
+# DUMP CONFIG
+DUMP_GE_GRAPH_VALUE = 2
+DUMP_GRAPH_LEVEL_VALUE = 3
+DUMP_SEED = 2022
+
+# TF_DEBUG
+TF_DEBUG_TIMEOUT = 360
+
+# MSACCUCMP
+MS_ACCU_CMP = r'msaccucmp.py[c]?'
+BUILD_JSON_GRAPH_NAME = 'Build'
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_off.cfg b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_off.cfg
new file mode 100644
index 000000000..823672b74
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_off.cfg
@@ -0,0 +1,10 @@
+{
+  "Switch": {
+    "GraphFusion": {
+      "ALL": "off"
+    },
+    "UBFusion": {
+      "ALL": "off"
+    }
+  }
+}
\ No newline at end of file
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_switch.cfg b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_switch.cfg
new file mode 100644
index 000000000..572ad4271
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_switch.cfg
@@ -0,0 +1,6 @@
+{
+"Switch": {
+  "GraphFusion": {},
+  "UBFusion": {}
+}
+}
\ No newline at end of file
-- 
Gitee


From 6e4ed2805780821e34bb528d3f300faf8904d4f8 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:43:47 +0000
Subject: [PATCH 07/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/dump/dump_manager.py   |  89 ++++++++
 .../precision_tool/lib/dump/npu_dump.py       | 200 ++++++++++++++++++
 .../precision_tool/lib/dump/pt_dump.py        |  65 ++++++
 .../precision_tool/lib/dump/tf_dump.py        | 141 ++++++++++++
 4 files changed, 495 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/dump_manager.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/npu_dump.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/pt_dump.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/tf_dump.py

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/dump_manager.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/dump_manager.py
new file mode 100644
index 000000000..939f4fa47
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/dump_manager.py
@@ -0,0 +1,89 @@
+# coding=utf-8
+import os
+import collections
+from ..util.util import util
+from ..util.constant import Constant
+from .npu_dump import NpuDump
+from .tf_dump import TfDump
+from .pt_dump import PtDump
+from ..config import config as cfg
+
+
+class DumpManager(object):
+    def __init__(self):
+        self.npu_dumps = collections.OrderedDict()
+        self.pt_dump = PtDump(cfg.PT_DIR)
+        self.tf_dump = TfDump(cfg.TF_DUMP_DIR)
+        self._init_dirs()
+
+    def prepare(self):
+        # 1. prepare npu dump
+        sub_dirs = os.listdir(cfg.NPU_DIR)
+        if len(sub_dirs) == 0:
+            # create default
+            sub_dirs = [Constant.DEFAULT_DEBUG_ID]
+        sorted(sub_dirs)
+        for sub_dir in sub_dirs:
+            npu_dump = NpuDump(sub_dir)
+            npu_dump.prepare()
+            self.npu_dumps[sub_dir] = npu_dump
+        # 2. prepare tf dump
+        self.tf_dump.prepare()
+        # 3. prepare pt dump
+        self.pt_dump.prepare()
+
+    def get_dump_root_dir(self, debug_id):
+        if debug_id in self.npu_dumps:
+            return self.npu_dumps[debug_id].dump_root
+        return None
+
+    def op_dump_summary(self, ops):
+        npu_result = collections.OrderedDict()
+        for debug_id, op in ops.items():
+            if debug_id in self.npu_dumps:
+                npu_result[debug_id] = collections.OrderedDict()
+                for op_detail in op:
+                    npu_result[debug_id][op_detail.graph_name] = self.npu_dumps[debug_id].op_dump_summary(op_detail)
+        tf_result = None
+        if self.tf_dump is not None and len(ops[Constant.DEFAULT_DEBUG_ID]) != 0:
+            tf_result = self.tf_dump.op_dump_summary(ops[Constant.DEFAULT_DEBUG_ID][0])
+        return npu_result, tf_result
+
+    def pt_dump_summary(self, ir_name):
+        """Pytorch dump summary"""
+        return self.pt_dump.op_dump_summary(ir_name)
+
+    def convert_npu_dump(self, name, data_format=None, dst_path=None):
+        for _, npu_dump in enumerate(self.npu_dumps.values()):
+            npu_dump.convert_npu_dump(name, data_format, dst_path)
+
+    def print_tensor(self, file_name, is_convert):
+        """Print numpy data file"""
+        if os.path.isfile(file_name):
+            return util.print_npy_summary(os.path.dirname(file_name), os.path.basename(file_name), is_convert)
+        # file_name = file_name.replace('/', '_')
+        # npu decode file
+        npu_convert_files = self.npu_dumps[Constant.DEFAULT_DEBUG_ID].get_npu_dump_decode_files_by_name(file_name)
+        self._print_tensors(npu_convert_files, is_convert)
+        # util.list_npu_dump_convert_files(cfg.DECODE_DIR, file_name)
+        # tf decode file
+        tf_decode_files = self.tf_dump.get_dump_files_by_name(file_name, True)
+        self._print_tensors(tf_decode_files, is_convert)
+        # pt decode file
+        pt_decode_files = self.pt_dump.get_dump_files_by_name(file_name)
+        self._print_tensors(pt_decode_files, is_convert)
+
+    @staticmethod
+    def _print_tensors(file_infos, is_convert):
+        if file_infos is not None:
+            for file_info in file_infos.values():
+                util.print_npy_summary(file_info.dir_path, file_info.file_name, is_convert)
+
+    @staticmethod
+    def _init_dirs():
+        """Create dump file dirs"""
+        util.create_dir(cfg.DUMP_DECODE_DIR)
+        util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR)
+        util.create_dir(cfg.OVERFLOW_DECODE_DIR)
+        util.create_dir(cfg.TF_DUMP_DIR)
+        util.create_dir(cfg.PT_DIR)
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/npu_dump.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/npu_dump.py
new file mode 100644
index 000000000..7e3e4a9d8
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/npu_dump.py
@@ -0,0 +1,200 @@
+# coding=utf-8
+import os
+import re
+from ..util.util import util
+from ..util.constant import Constant
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.precision_tool_exception import PrecisionToolException
+from ..config import config as cfg
+
+
+class NpuDumpDecodeFile(object):
+    def __init__(self):
+        self.log = util.get_log()
+        self.input_files = {}
+        self.output_files = {}
+        self.timestamp = -1
+        self.op_name = ''
+        self.op_type = ''
+        self.task_id = -1
+        # self.stream_id = -1
+
+    def update(self, file_info):
+        """Prepare op npu decode file map."""
+        if not self._check(file_info):
+            self.log.warning('Invalid NpuDumpDecodeFile: %s', file_info)
+            return
+        if file_info.type == 'input':
+            self.input_files[file_info.idx] = file_info
+        else:
+            self.output_files[file_info.idx] = file_info
+
+    def summary(self):
+        txt = ['[yellow][%s][TaskID: %d][/yellow][green][%s][/green] %s' % (
+            self.timestamp, self.task_id, self.op_type, self.op_name)]
+        if len(self.input_files) > 0:
+            info = self.input_files[0]
+            shape, dtype, max_data, min_data, mean = util.npy_info(info.path)
+            txt.append(' - Input:  [green][0][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % (
+                shape, dtype, max_data, min_data, mean, info.file_name))
+            for idx in range(1, len(self.input_files)):
+                info = self.input_files[idx]
+                shape, dtype, max_data, min_data, mean = util.npy_info(info.path)
+                txt.append('           [green][%d][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % (
+                    idx, shape, dtype, max_data, min_data, mean, info.file_name))
+        if len(self.output_files) > 0:
+            info = self.output_files[0]
+            shape, dtype, max_data, min_data, mean = util.npy_info(info.path)
+            txt.append(' - Output: [green][0][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % (
+                shape, dtype, max_data, min_data, mean, info.file_name))
+            for idx in range(1, len(self.output_files)):
+                info = self.output_files[idx]
+                shape, dtype, max_data, min_data, mean = util.npy_info(info.path)
+                txt.append('           [green][%d][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % (
+                    idx, shape, dtype, max_data, min_data, mean, info.file_name))
+        return Constant.NEW_LINE.join(txt)
+
+    def _check(self, file_info):
+        if self.timestamp == -1:
+            self.timestamp = file_info.timestamp
+            self.op_name = file_info.op_name
+            self.op_type = file_info.op_type
+            self.task_id = file_info.task_id
+            # self.stream_id = file_info['stream']
+            return True
+        return self.timestamp == file_info['timestamp']
+
+
+class NpuDump(object):
+    def __init__(self, debug_id=Constant.DEFAULT_DEBUG_ID):
+        """Init"""
+        self.log = util.get_log()
+        self.debug_id = debug_id
+        npu_root = os.path.join(cfg.NPU_DIR, debug_id)
+        self.dump_root = os.path.join(npu_root, Constant.DUMP)
+        self.decode_dir = os.path.join(cfg.DUMP_DECODE_DIR, debug_id)
+        self.dump_files = None
+        self._init_dirs()
+
+    def prepare(self):
+        """Prepare npu/cpu dump files"""
+        self._parse_dump_files()
+
+    def get_dump_files_by_op(self, op):
+        """Get npu dump files by Op"""
+        npu_files = {}
+        op_name = op.name().replace('/', '_').replace('.', '_')
+        match_name = op.type() + '.' + op_name + '\\.'
+        for f in self.dump_files:
+            # match op name and graph name, infer dump directory may not has graph
+            if re.match(match_name, f) and (op.graph_name in self.dump_files[f].path or cfg.NET_TYPE == 'infer'):
+                npu_files[f] = self.dump_files[f]
+        return npu_files
+
+    @catch_tool_exception
+    def op_dump_summary(self, op):
+        """ print op dump info"""
+        if op is None:
+            raise PrecisionToolException("Get None operator")
+        # search npu dump file by op name
+        npu_dump_files = self.get_npu_dump_decode_files_by_op(op)
+        npu_dump_files = sorted(npu_dump_files.values(), key=lambda x: (x.idx, x.timestamp))
+        input_txt = ['NpuDumpInput:']
+        output_txt = ['NpuDumpOutput:']
+        for npu_dump_file in npu_dump_files:
+            if str(npu_dump_file.file_name).endswith(Constant.Suffix.CSV):
+                detail = util.read_csv(npu_dump_file.path)
+                input_txt.append(' -[%d]%s' % (npu_dump_file.idx, npu_dump_file.file_name))
+                output_txt.append(' -[%d]%s' % (npu_dump_file.idx, npu_dump_file.file_name))
+                for item in detail:
+                    item_txt = '[Shape: %s] [Dtype: %s] [Max: %s] [Min: %s] [Mean: %s]' % (item[5], item[3], item[6], item[7], item[8])
+                    if item[0] == 'Input':
+                        input_txt.append('   └─ [green][%s][/green][yellow]%s[/yellow]' % (item[1], item_txt))
+                    elif item[0] == 'Output':
+                        output_txt.append('   └─ [green][%s][/green][yellow]%s[/yellow]' % (item[1], item_txt))
+                continue
+            if npu_dump_file.type == 'input':
+                input_txt.append(' -[green][%s][/green] %s' % (npu_dump_file.idx, npu_dump_file.file_name))
+                input_txt.append('   └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(npu_dump_file.path))
+            else:
+                output_txt.append(' -[green][%s][/green] %s' % (npu_dump_file.idx, npu_dump_file.file_name))
+                output_txt.append('   └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(npu_dump_file.path))
+        input_txt.extend(output_txt)
+        return Constant.NEW_LINE.join(input_txt)
+
+    def _init_dirs(self):
+        util.create_dir(self.dump_root)
+        util.create_dir(self.decode_dir)
+
+    @catch_tool_exception
+    def _parse_dump_files(self):
+        """prepare npu dump, support soft link"""
+        sub_dir = util.get_newest_dir(self.dump_root)
+        sub_dir = os.path.join(self.dump_root, sub_dir) if sub_dir != '' else self.dump_root
+        self.dump_files = util.list_npu_dump_files(sub_dir)
+
+    def list_dump(self, dir_path, file_name):
+        """list dump"""
+
+    @staticmethod
+    def get_npu_dump_decode_files_by_name(file_name):
+        file_name = file_name.replace('/', '_')
+        return util.list_npu_dump_convert_files(cfg.DECODE_DIR, file_name)
+
+    def get_npu_dump_decode_files_by_op(self, op):
+        """Get npu dump decode files by op"""
+        dump_files = self.get_dump_files_by_op(op)
+        result = {}
+        for dump_file_key in dump_files.keys():
+            dump_file = dump_files[dump_file_key]
+            if str(dump_file.file_name).endswith(Constant.Suffix.CSV):
+                result.update({dump_file_key: dump_file})
+                continue
+            dump_decode_files = util.list_npu_dump_decode_files(self.decode_dir, dump_file.file_name)
+            if len(dump_decode_files) == 0:
+                util.convert_dump_to_npy(dump_file.path, self.decode_dir)
+            dump_decode_files = util.list_npu_dump_decode_files(self.decode_dir, dump_file.file_name)
+            result.update(dump_decode_files)
+        return result
+
+    def convert_npu_dump(self, name, data_format=None, dst_path=None):
+        """Convert npu dump to npy of data_format"""
+        if os.path.isfile(name):
+            # absolute path to file
+            self.log.info("Decode file: %s", name)
+            file_name = os.path.basename(name)
+            file_path = name
+        elif os.path.isdir(name):
+            # decode all files in path
+            self.log.info("Decode all files in path: %s", name)
+            file_name = ''
+            file_path = name
+        elif self.dump_files is not None and name in self.dump_files:
+            self.log.info("Decode npu dump file: %s in default dump path", name)
+            file_info = self.dump_files[name]
+            file_name = file_info.file_name
+            file_path = file_info.path
+        else:
+            # maybe op name
+            file_info = self._get_file_by_op_name(name)
+            if file_info is None:
+                raise PrecisionToolException("Can not find any op/dump file named %s" % name)
+            file_name = file_info.file_name
+            file_path = file_info.path
+        dst_path = cfg.DUMP_CONVERT_DIR if dst_path is None else dst_path
+        util.convert_dump_to_npy(file_path, dst_path, data_format)
+        dump_convert_files = util.list_npu_dump_convert_files(dst_path, file_name)
+        # print result info
+
+        summary_txt = ['SrcFile: %s' % name]
+        for convert_file in dump_convert_files.values():
+            summary_txt.append(' - %s' % convert_file.file_name)
+        util.print_panel(Constant.NEW_LINE.join(summary_txt))
+
+    def _get_file_by_op_name(self, op_name):
+        """Get dump file info by op name"""
+        op_name = op_name.replace('/', '_')
+        for file_info in self.dump_files.values():
+            if file_info.op_name == op_name:
+                return file_info
+        return None
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/pt_dump.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/pt_dump.py
new file mode 100644
index 000000000..27fd006dc
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/pt_dump.py
@@ -0,0 +1,65 @@
+# coding=utf-8
+from ..util.util import util
+from ..util.h5_util import H5Util
+from ..util.h5_util import gen_h5_data_name
+from ..config import config as cfg
+from ..util.constant import Constant
+
+
+class PtDump(object):
+    def __init__(self, data_dir):
+        self.log = util.get_log()
+        self.npu = None
+        self.gpu = None
+        self.data_dir = data_dir
+
+    def prepare(self):
+        util.create_dir(cfg.PT_NPU_DIR)
+        util.create_dir(cfg.PT_GPU_DIR)
+        util.create_dir(cfg.PT_DUMP_DECODE_DIR)
+        if not util.empty_dir(cfg.PT_NPU_DIR):
+            npu_h5_files = util.list_h5_files(cfg.PT_NPU_DIR)
+            if len(npu_h5_files) != 0:
+                file_list = sorted(npu_h5_files.values(), key=lambda x: x.timestamp)
+                self.npu = H5Util(file_list[0].path, prefix='npu')
+        if not util.empty_dir(cfg.PT_GPU_DIR):
+            gpu_h5_files = util.list_h5_files(cfg.PT_GPU_DIR)
+            if len(gpu_h5_files) != 0:
+                file_list = sorted(gpu_h5_files.values(), key=lambda x: x.timestamp)
+                self.gpu = H5Util(file_list[0].path, prefix='gpu')
+
+    @staticmethod
+    def get_dump_files_by_name(file_name):
+        """Get dump files by name"""
+        npu_pattern = gen_h5_data_name(file_name, 'npu') if '/' in file_name else file_name
+        gpu_pattern = gen_h5_data_name(file_name, 'gpu') if '/' in file_name else file_name
+        files = util.list_numpy_files(cfg.PT_DUMP_DECODE_DIR, extern_pattern=npu_pattern)
+        files.update(util.list_numpy_files(cfg.PT_DUMP_DECODE_DIR, extern_pattern=gpu_pattern))
+        return files
+
+    def op_dump_summary(self, ir_name):
+        summary_list = []
+        op_id = self._parse_op_id(ir_name)
+        if self.npu is not None:
+            h5_op = self.npu.get_op(op_id)
+            if h5_op is not None:
+                summary_list.append('NPU:')
+                summary_list.append(h5_op.summary())
+        if self.gpu is not None:
+            h5_op = self.gpu.get_op(op_id)
+            if h5_op is not None:
+                summary_list.append('GPU:')
+                summary_list.append(h5_op.summary())
+        return Constant.NEW_LINE.join(summary_list)
+
+    @staticmethod
+    def _parse_op_id(ir_name):
+        op_id = str(ir_name)
+        if op_id.isnumeric():
+            op_id = ir_name
+        else:
+            for name in op_id.split('/'):
+                if name.isnumeric():
+                    op_id = name
+                    break
+        return op_id
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/tf_dump.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/tf_dump.py
new file mode 100644
index 000000000..bb4230f54
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/tf_dump.py
@@ -0,0 +1,141 @@
+# coding=utf-8
+import os
+import re
+import time
+import sys
+from ..util.util import util
+from ..util.constant import Constant
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.precision_tool_exception import PrecisionToolException
+from ..config import config as cfg
+
+
+class TfDump(object):
+    def __init__(self, dump_root=cfg.TF_DUMP_DIR):
+        self.log = util.get_log()
+        self.dump_root = dump_root
+        self.dump_files = None
+
+    def prepare(self):
+        if not os.path.exists(self.dump_root):
+            util.create_dir(self.dump_root)
+        self._parse_dump_files()
+
+    def get_dump_files_by_op(self, op):
+        """Get cpu dump files by op"""
+        tf_files = {}
+        for output in op.outputs():
+            if output.data_dump_origin_name() != '':
+                tf_files.update(self.get_dump_files_by_name(output.data_dump_origin_name()))
+        if len(tf_files) == 0:
+            tf_files.update(self.get_dump_files_by_name(op.name()))
+        return tf_files
+
+    def get_dump_files_by_name(self, name, likely=False):
+        match_name = name.replace('/', '_')
+        if not likely:
+            match_name = match_name.replace('.', '_') + '\\.'
+        tf_files = {}
+        for f in self.dump_files:
+            if re.match(match_name, f):
+                tf_files[f] = self.dump_files[f]
+        return tf_files
+
+    @catch_tool_exception
+    def op_dump_summary(self, op):
+        # cpu dump info
+        if op is None:
+            return ''
+        cpu_dump_txt = ['TfDumpOutput:']
+        cpu_dump_files = self.get_dump_files_by_op(op)
+        cpu_dump_file_list = sorted(cpu_dump_files.values(), key=lambda x: x.timestamp)
+        for cpu_dump_file in cpu_dump_file_list:
+            cpu_dump_txt.append(' -[green][%s][/green] %s' % (cpu_dump_file.idx, cpu_dump_file.file_name))
+            cpu_dump_txt.append('   └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(cpu_dump_file.path))
+        return Constant.NEW_LINE.join(cpu_dump_txt)
+
+    def _parse_dump_files(self):
+        self.dump_files = util.list_cpu_dump_decode_files(self.dump_root)
+
+    def run_tf_dbg_dump(self, cmd_line=None):
+        """Run tf train script to get dump data."""
+        if os.path.exists(cfg.TF_DEBUG_DUMP_DIR) and len(os.listdir(cfg.TF_DEBUG_DUMP_DIR)) != 0:
+            self.log.info("TF offline debug path [%s] is not empty, will analyze it directly." % cfg.TF_DEBUG_DUMP_DIR)
+        elif cmd_line is not None:
+            self.log.info("Run command: %s" % cmd_line)
+            util.execute_command(cmd_line)
+            self.log.info("Run finish, start analyze TF dump.")
+        if not os.path.exists(cfg.TF_DEBUG_DUMP_DIR) or len(os.listdir(cfg.TF_DEBUG_DUMP_DIR)) == 0:
+            raise PrecisionToolException("Empty tf debug dir. %s" % cfg.TF_DEBUG_DUMP_DIR)
+        run_dirs = os.listdir(cfg.TF_DEBUG_DUMP_DIR)
+        run_dirs.sort()
+        # create dirs
+        util.create_dir(cfg.TF_DUMP_DIR)
+        util.create_dir(cfg.TMP_DIR)
+        # extra the last run dir
+        for run_dir in run_dirs:
+            time.sleep(1)
+            command = "%s -m tensorflow.python.debug.cli.offline_analyzer --ui_type readline --dump_dir %s" % (
+                util.python, os.path.join(cfg.TF_DEBUG_DUMP_DIR, run_dir))
+            self._do_run_tf_dbg_dump(command, 0)
+
+    @staticmethod
+    def _make_pt_commands(tensor_name_path):
+        pt_command_list = []
+        tensor_count = {}
+        with open(tensor_name_path) as tensor_name_file:
+            # skip 3 line
+            next(tensor_name_file)
+            next(tensor_name_file)
+            next(tensor_name_file)
+            # start to convert tensor to pt command
+            for line in tensor_name_file:
+                new_line = line.strip()
+                tensor_name = new_line[new_line.rfind(' ') + 1:]
+                if tensor_name not in tensor_count:
+                    tensor_count[tensor_name] = 0
+                else:
+                    tensor_count[tensor_name] += 1
+
+                npy_file_name = "%s.%s.npy" % (tensor_name.replace("/", "_").replace(":", "."),
+                                               str(round(time.time() * 1000000)))
+                pt_command_list.append("pt %s -n %d -w %s" %
+                                       (tensor_name, tensor_count[tensor_name],
+                                        os.path.join(cfg.TF_DUMP_DIR, npy_file_name)))
+        return pt_command_list
+
+    def _do_run_tf_dbg_dump(self, cmd_line, run_times=2):
+        """Run tf debug with pexpect, should set tf debug ui_type='readline'"""
+        try:
+            import pexpect
+            import readline
+        except ImportError as import_err:
+            self.log.error("Import failed with err:%s. You can run "
+                           "'pip3 install pexpect gnureadline pyreadline' to fix it.",
+                           import_err)
+            raise PrecisionToolException("Import module error.")
+        self.log.info("======< Auto run tf train process to dump data >======")
+        self.log.info("Send run times: %d", run_times)
+        tf_dbg = pexpect.spawn(cmd_line)
+        # tf_dbg.logfile = open(cfg.DUMP_FILES_CPU_LOG, 'wb')
+        tf_dbg.logfile = sys.stdout.buffer
+        for i in range(run_times):
+            tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT)
+            self.log.info("Process %d tf_debug.run", i + 1)
+            tf_dbg.sendline('run')
+        self.log.info("Generate tensor name file.")
+        tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT)
+        tf_dbg.sendline('lt > %s' % cfg.TF_TENSOR_NAMES)
+        tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT)
+        if not os.path.exists(cfg.TF_TENSOR_NAMES):
+            self.log.error("Failed to get tensor name in tf_debug.")
+            raise PrecisionToolException("Get tensor name in tf_debug failed.")
+        self.log.info("Save tensor name success. Generate tf dump commands from file: %s", cfg.TF_TENSOR_NAMES)
+        pt_commands = self._make_pt_commands(cfg.TF_TENSOR_NAMES)
+        self.log.info("Pt %d tensors." % len(pt_commands))
+        for cmd in pt_commands:
+            self.log.debug(cmd.strip())
+            tf_dbg.sendline(cmd.strip())
+            tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT)
+        tf_dbg.sendline('exit')
+        self.log.info('Finish dump tf data')
-- 
Gitee


From 33c590f7622b8efe669dfcb3dc774253772a5a3d Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:44:01 +0000
Subject: [PATCH 08/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../examples/precision_tool/lib/graph/desc.py | 154 +++++++++
 .../precision_tool/lib/graph/graph_manager.py | 111 ++++++
 .../precision_tool/lib/graph/npu_graph.py     | 316 ++++++++++++++++++
 .../examples/precision_tool/lib/graph/op.py   | 255 ++++++++++++++
 .../precision_tool/lib/graph/tf_graph.py      |  45 +++
 5 files changed, 881 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/desc.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/graph_manager.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/npu_graph.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/op.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/tf_graph.py

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/desc.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/desc.py
new file mode 100644
index 000000000..5ac35dbfd
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/desc.py
@@ -0,0 +1,154 @@
+# coding=utf-8
+from ..util.util import util
+
+ATTR = 'attr'
+ATTR_KEY = 'key'
+ATTR_VALUE = 'value'
+DATA_DUMP_ORIGIN_OUTPUT_INDEX = '_datadump_origin_output_index'
+FUSION_ORIGIN_OUTPUT_INDEX = '_fusion_origin_output_index'
+DATA_DUMP_ORIGIN_NAME = '_datadump_origin_name'
+ORIGIN_FORMAT = 'origin_format'
+ORIGIN_SHAPE = 'origin_shape'
+VALUE_RANGE = 'value_range'
+SHAPE_RANGE = 'shape_range'
+DT_STRING = 's'
+DT_INT = 'i'
+DT_LIST_LIST_INT = 'list_list_int'
+DT_LIST_LIST_I = 'list_list_i'
+DT_LIST = 'list'
+DT_LIST_INT = 'list_i'
+DATA_TYPE_DEFAULT_VALUE = {
+    'i': 0,
+    's': ''
+}
+
+
+class Desc(object):
+    """ Op desc
+        shape: data shape
+        dtype: data type
+        format: data format
+        npu_file: npu file name/path
+        cpu_file: cpu file name/path
+        idx: input idx
+    """
+    def __init__(self, desc_json, index):
+        self.desc_json = desc_json
+        self.index = index
+        self.log = util.get_log()
+
+    def idx(self):
+        return self.index
+
+    def shape(self):
+        return self.desc_json['shape']['dim'] if 'shape' in self.desc_json else []
+
+    def dtype(self):
+        return self.desc_json['dtype'] if 'dtype' in self.desc_json else ''
+
+    def format(self):
+        return self.desc_json['layout'] if 'layout' in self.desc_json else []
+
+    def origin_shape(self):
+        return self._get_attr_list(ORIGIN_SHAPE, DT_INT)
+
+    def origin_format(self):
+        return self._get_attr(ORIGIN_FORMAT, DT_STRING)
+
+    def value_range(self):
+        return self._get_attr_list_list(VALUE_RANGE, DT_LIST_INT)
+
+    def shape_range(self):
+        return self._get_attr_list_list(SHAPE_RANGE, DT_LIST_INT)
+
+    def _get_attr_list_list(self, key, data_type):
+        val = self._get_attr_base(key, DT_LIST_LIST_INT)
+        if val is None or DT_LIST_LIST_I not in val:
+            return []
+        res = []
+        for item in val[DT_LIST_LIST_I]:
+            if data_type in item:
+                res.append(item[data_type])
+        return res
+
+    def _get_attr_list(self, key, data_type):
+        val = self._get_attr_base(key, DT_LIST)
+        return val[data_type] if val is not None and data_type in val else []
+
+    def _get_attr(self, key, data_type):
+        val = self._get_attr_base(key, data_type)
+        return val if val is not None else DATA_TYPE_DEFAULT_VALUE[data_type]
+
+    def _get_attr_base(self, key, data_type):
+        if ATTR in self.desc_json:
+            for attr in self.desc_json[ATTR]:
+                if attr[ATTR_KEY] == key:
+                    if attr[ATTR_VALUE] is not None and data_type in attr[ATTR_VALUE]:
+                        return attr[ATTR_VALUE][data_type]
+        return None
+
+    def compare(self, right_desc):
+        if self.dtype() == right_desc.dtype() and self.format() == right_desc.format():
+            return "[green][%d] [%s][%s] %s[/green]" % (self.idx(), self.dtype(), self.format(), self.shape()), True
+        else:
+            return "[yellow][%d] [%s][%s] %s | [%s][%s] %s[/yellow]" % (
+                self.idx(), self.dtype(), self.format(), self.shape(),
+                right_desc.dtype(), right_desc.format(), right_desc.shape()), False
+
+    def data_dump_origin_name(self):
+        return ''
+
+
+class InputDesc(Desc):
+    def __init__(self, name, desc_json, index):
+        super(InputDesc, self).__init__(desc_json, index)
+        if name == '':
+            self.log.warning('invalid input name.')
+        name_info = name.split(':')
+        self.op_name = name
+        self.peer_index = -2
+        if len(name_info) == 2:
+            self.op_name = name_info[0]
+            self.peer_index = int(name_info[1])
+
+    def name(self):
+        return self.op_name
+
+    def peer_idx(self):
+        return self.peer_index
+
+    def is_control(self):
+        return self.peer_index == -1
+
+    def summary(self, origin_txt=False):
+        """idx | dtype | format | shape | [blue]value_range | shape_range| origin_shape[/blue] | op_name | peer_idx"""
+        if origin_txt:
+            return "[%d][%s][%s]%s %s:%d" % (self.idx(), self.dtype(), self.format(),
+                                             self.shape(), self.name(), self.peer_idx())
+        return "[green][%d][/green][yellow][%s][%s]%s[/yellow][blue] %s %s %s[/blue] %s:%d" % (
+            self.idx(), self.dtype(), self.format(), self.shape(),
+            self.value_range(), self.shape_range(), self.origin_shape(), self.name(), self.peer_idx())
+
+
+class OutputDesc(Desc):
+    def __init__(self, name, desc_json, index):
+        super(OutputDesc, self).__init__(desc_json, index)
+        if name == '':
+            self.log.warning('invalid output name.')
+        self.op_names = name.split(':')
+
+    def names(self):
+        return self.op_names
+
+    def summary(self, origin_txt=False):
+        if origin_txt:
+            return "[%d][%s][%s]%s %s" % (self.idx(), self.dtype(), self.format(), self.shape(), self.names())
+        return "[green][%d][/green][yellow][%s][%s]%s[/yellow][blue] %s %s %s[/blue] %s" % (
+            self.idx(), self.dtype(), self.format(), self.shape(),
+            self.value_range(), self.shape_range(), self.origin_shape(), self.names())
+
+    def data_dump_origin_name(self):
+        return self._get_attr(DATA_DUMP_ORIGIN_NAME, DT_STRING)
+
+    def data_dump_origin_output_index(self):
+        return self._get_attr(DATA_DUMP_ORIGIN_OUTPUT_INDEX, DT_INT)
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/graph_manager.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/graph_manager.py
new file mode 100644
index 000000000..cef861741
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/graph_manager.py
@@ -0,0 +1,111 @@
+# coding=utf-8
+"""
+Graph Manager
+"""
+import os
+import collections
+from ..util.constant import Constant
+from .npu_graph import NpuGraph
+from .tf_graph import TfGraph
+from ..util.util import util
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.precision_tool_exception import PrecisionToolException
+from ..config import config as cfg
+
+
+class GraphManager(object):
+    def __init__(self):
+        self.log = util.get_log()
+        self.npu_graphs = collections.OrderedDict()
+        self.tf_graph = None
+
+    def prepare(self):
+        # prepare npu graphs
+        if not os.path.exists(cfg.NPU_DIR):
+            util.create_dir(cfg.NPU_DIR)
+        sub_dirs = os.listdir(cfg.NPU_DIR)
+        if len(sub_dirs) == 0:
+            # create default dir
+            sub_dirs = [Constant.DEFAULT_DEBUG_ID]
+        for sub_dir in sub_dirs:
+            npu_graph = NpuGraph(sub_dir)
+            npu_graph.prepare()
+            self.npu_graphs[sub_dir] = npu_graph
+        # prepare cpu graph
+        self.tf_graph = TfGraph(cfg.TF_GRAPH_DIR)
+
+    def check_cast(self):
+        for graph in self.npu_graphs.values():
+            graph.check_cast()
+
+    def check_dtype(self):
+        for graph in self.npu_graphs.values():
+            graph.check_dtype()
+
+    def check_similarity(self):
+        self._check_npu_graph_similarity()
+
+    def _check_npu_graph_similarity(self):
+        """Check npu graph similarity"""
+        if len(self.npu_graphs) < 2:
+            self.log.debug("Only one npu debug, no need to check npu graph similarity.")
+            return
+        left_graphs = self.npu_graphs[Constant.DEFAULT_DEBUG_ID].sub_graphs
+        right_graphs = self.npu_graphs[Constant.NPU_DEBUG_ID_1].sub_graphs
+        for left_graph in left_graphs.values():
+            for right_graph in right_graphs.values():
+                if left_graph.graph_id != right_graph.graph_id:
+                    continue
+                left_graph.compare(right_graph)
+
+    def get_graphs(self, debug_id):
+        if debug_id not in self.npu_graphs:
+            raise PrecisionToolException("Get graphs failed with no debug_id:%s" % debug_id)
+        return self.npu_graphs[debug_id].build_json_files
+
+    def get_ops(self, op_name, graph_name=None):
+        """ Get npu/tf ops by op_name
+        :param op_name: op name
+        :param graph_name: graph name
+        :return: npu op dict: debug_id->Op, tf op
+        """
+        npu_ops = collections.OrderedDict()
+        for debug_id, npu_graph in self.npu_graphs.items():
+            npu_ops[debug_id] = npu_graph.get_op(op_name, graph_name)
+        # tf graph op
+        return npu_ops, None
+
+    def print_op_list(self, op_type='', op_name='', pass_name='', kernel_name=''):
+        if op_type == '' and op_name == '' and pass_name == '' and kernel_name == '':
+            table_list = []
+            for debug_id, graph in self.npu_graphs.items():
+                table = util.create_table(debug_id, ["OpType", "Count"])
+                op_type_counter = collections.Counter()
+                for op in graph.ops_list:
+                    op_type_counter[op.type()] += 1
+                for op_types, count in op_type_counter.items():
+                    table.add_row(op_types, str(count))
+                table_list.append(table)
+            util.render(util.create_columns(table_list))
+
+        else:
+            for debug_id, graph in self.npu_graphs.items():
+                ops = graph.list_ops(op_type, op_name, pass_name, kernel_name)
+                ops_txt = ['[green][%s][/green][yellow][%s][/yellow] %s' % (
+                    op.type(), op.pass_name(), op.name()) for op in ops]
+                util.print_panel(Constant.NEW_LINE.join(ops_txt), debug_id)
+
+    @staticmethod
+    def op_graph_summary(ops, attr_detail=False):
+        npu_summary = collections.OrderedDict()
+        for debug_id, op in ops.items():
+            npu_summary[debug_id] = collections.OrderedDict()
+            for op_detail in op:
+                npu_summary[debug_id][op_detail.graph_name] = op_detail.summary(attr_detail=attr_detail)
+        return npu_summary, None
+
+    def save_sub_graph(self, ops, deep):
+        for debug_id, op in ops.items():
+            if debug_id in self.npu_graphs:
+                for op_detail in op:
+                    self.npu_graphs[debug_id].save_sub_graph(op_detail, deep)
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/npu_graph.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/npu_graph.py
new file mode 100644
index 000000000..5a93b9718
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/npu_graph.py
@@ -0,0 +1,316 @@
+# coding=utf-8
+"""
+Graph Manager
+"""
+import json
+import os
+import collections
+import time
+from .op import Op
+from ..util.util import util
+from ..util.constant import Constant
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.precision_tool_exception import PrecisionToolException
+from ..config import config as cfg
+
+DANGEROUS_CAST = {
+    'DT_FLOAT': ['DT_INT32']
+}
+
+NO_DIG_OPS = ['AtomicAddrClean', 'NetOutput']
+CKPT_META_SHUFFIX='.meta'
+
+OP_CAST = 'Cast'
+
+
+class NpuSubGraph(object):
+    def __init__(self, graph_json, build_file, npu_graph):
+        self.log = util.get_log()
+        self.graph_name = graph_json['name']
+        self.npu_graph = npu_graph
+        self.graph = graph_json
+        self.build_file = build_file
+        self.ops_list = collections.OrderedDict()
+        self.ops_type_list = {}
+        self._prepare()
+        self.graph_id = self._get_graph_id()
+
+    def _prepare(self):
+        self.log.debug("Graph %s operator count: %d" % (self.graph_name, len(self.graph['op'])))
+        for op_json in self.graph['op']:
+            op_name = op_json['name']
+            op_type = op_json['type']
+            if op_name not in self.ops_list:
+                self.ops_list[op_name] = []
+            op = Op(op_json, self.ops_list, self.graph['name'], self.npu_graph, self)
+            if op_type not in self.ops_type_list:
+                self.ops_type_list[op_type] = {}
+            self.ops_list[op_name] = op
+            self.ops_type_list[op_type][op_name] = op
+
+    def _get_graph_id(self):
+        if 'attr' in self.graph:
+            for item in self.graph['attr']:
+                if item['key'] == '_session_graph_id':
+                    return item['value']['s']
+        self.log.warning("Unknown sub graph id.")
+        return "UNKNOWN"
+
+    def compare(self, sub_graph):
+        """compare with another sub graph"""
+        if not isinstance(sub_graph, NpuSubGraph):
+            raise PrecisionToolException("Should compare with another subgraph.")
+        right_ops_list = sub_graph.ops_list
+        ignore_ops = ["TransData", "Cast", "Recv", "Send", "Variable", "NetOutput", "NoOp", "Assign", "Constant",
+                      "StreamActive"]
+        similar_count = 0
+        for op_name in self.ops_list:
+            if self.ops_list[op_name].type() in ignore_ops:
+                continue
+            if op_name not in right_ops_list:
+                self.log.warning("Can not Find [%s] %s in right subgraph.", self.ops_list[op_name].type(), op_name)
+                continue
+            result, similar = self.ops_list[op_name].compare(right_ops_list[op_name])
+            if not similar:
+                util.print_panel(result, title=op_name)
+            else:
+                similar_count += 1
+        for op_name in right_ops_list:
+            if right_ops_list[op_name].type() in ignore_ops:
+                continue
+            if op_name not in self.ops_list:
+                self.log.warning("Can not Find [%s] %s in left subgraph.", right_ops_list[op_name].type(), op_name)
+        self.log.info("Compare [%s] [%s], similarity is [%s / %s]",
+                      self.graph_name, sub_graph.graph_name, similar_count, len(self.ops_list))
+
+    def get_op(self, name):
+        if name in self.ops_list:
+            return [self.ops_list[name]]
+        guess_op_list = []
+        for op_detail in self.ops_list.values():
+            if name in op_detail.name() or name == str(op_detail.name()).replace('/', '_'):
+                guess_op_list.append(op_detail)
+        return guess_op_list
+
+    def get_parent_node_by_subgraph_name(self, graph_name):
+        ops = []
+        for op_detail in self.ops_list.values():
+            if graph_name in op_detail.subgraph_names():
+                ops.append(op_detail)
+        return ops
+
+    def get_op_by_type(self, op_type):
+        ops = []
+        for op_detail in self.ops_list.values():
+            if op_type == op_detail.type():
+                ops.append(op_detail)
+        return ops
+
+    def check_cast(self):
+        cast_list = {}
+        danger_cast_list = {}
+        if OP_CAST in self.ops_type_list:
+            cast_ops = self.ops_type_list[OP_CAST]
+            for op in cast_ops.values():
+                input_type = ''
+                output_type = ''
+                for input_desc in op.inputs():
+                    input_type = input_desc.dtype() if input_desc.dtype() != '' else input_type
+                for output_desc in op.outputs():
+                    output_type = output_desc.dtype() if output_desc.dtype() != '' else output_type
+                cast_type = "%s -> %s" % (input_type, output_type)
+                if cast_type not in cast_list:
+                    cast_list[cast_type] = []
+                cast_list[cast_type].append(op.name())
+        for cast_type in cast_list:
+            if self._is_dangerous_cast(cast_type):
+                summary_txt = "[green][Cast][/green][red][%s][/red] %s" % (cast_type, cast_list[cast_type])
+                util.print(summary_txt)
+
+    @staticmethod
+    def _is_dangerous_cast(cast_type):
+        """Check if cast """
+        cast_info = cast_type.split(" -> ")
+        input_dtype = cast_info[0]
+        output_dtype = cast_info[1]
+        if input_dtype in DANGEROUS_CAST:
+            if output_dtype in DANGEROUS_CAST[input_dtype]:
+                return True
+        return False
+
+
+class NpuGraph(object):
+    def __init__(self, debug_id=Constant.DEFAULT_DEBUG_ID):
+        self.log = util.get_log()
+        self.build_files = None
+        self.build_json_files = []
+        self.debug_id = debug_id
+        self.npu_root = os.path.join(cfg.NPU_DIR, debug_id)
+        self.graph_root = os.path.join(self.npu_root, Constant.GRAPH)
+        self.sub_graphs = collections.OrderedDict()
+        self.ops_list = []
+        util.create_dir(self.graph_root)
+
+    @catch_tool_exception
+    def prepare(self):
+        """prepare"""
+        self._prepare_npu_graphs()
+        if self.build_files is not None:
+            for build_file in self.build_files:
+                self._parse_ops(build_file)
+
+    def check_cast(self):
+        """Check cast op type"""
+        for sub_graph in self.sub_graphs.values():
+            sub_graph.check_cast()
+
+    def check_dtype(self):
+        """Check op input/output dtype"""
+        for op in self.ops_list:
+            input_dtype = ''
+            for input_desc in op.inputs():
+                input_dtype += ' ' + input_desc.dtype()
+            output_dtype = ''
+            for output_desc in op.outputs():
+                output_dtype += ' ' + output_desc.dtype()
+            util.print('[green][%s][/green] %s\n - Input:  %s\n - Output: %s' % (
+                op.type(), op.name(), input_dtype, output_dtype))
+
+    def check_similarity(self):
+        """Check graph similarity."""
+
+    @catch_tool_exception
+    def save_sub_graph(self, op, deep=0, dump_manager=None, compare_manager=None):
+        """Save sub graph"""
+        if op is None:
+            raise PrecisionToolException("Save sub graph failed as root operator is None.")
+        try:
+            from graphviz import Digraph
+            file_name_list = [self.debug_id, op.graph_name, op.type(), op.name().replace('/', '_').replace('.', '_'),
+                              str(deep), 'gv']
+            file_name = '.'.join(file_name_list)
+            path = os.path.join(cfg.OP_GRAPH_DIR, file_name)
+            dot = Digraph(file_name, filename=path, node_attr={'shape': 'Mrecord'}, format='svg')
+            dot_list = []
+            edge_list = []
+            self._gen_sub_graph(dot, op, deep, dot_list, edge_list, 'red', direction='all',
+                                dump_manager=dump_manager, compare_manager=compare_manager)
+            dot.format = 'svg'
+            dot.save(path)
+            self.log.info("Sub graph saved to %s" % os.path.abspath(cfg.OP_GRAPH_DIR))
+            try:
+                dot.view(path)
+                time.sleep(1)
+            except Exception as err:
+                raise PrecisionToolException(
+                    "graphviz not install, use [yum/apt-get] install graphviz xdg-utils. %s" % err)
+        except ImportError as err:
+            raise PrecisionToolException("Save sub graph failed as import graphviz module failed. %s" % err)
+
+    def _gen_sub_graph(self, dot, op, deep, dot_list, edge_list, color='black', direction='all',
+                       dump_manager=None, compare_manager=None):
+        """Gen sub graph"""
+        if deep == 0 or op.type() in NO_DIG_OPS:
+            return
+        if op.name() not in dot_list:
+            dot.node(op.name(), self._gen_sub_graph_label(op), color=color, tooltip=op.summary(True))
+            dot_list.append(op.name())
+        # add input and output
+        for desc in op.inputs():
+            sub_op = self.get_op(desc.name(), op.graph_name)
+            if len(sub_op) != 0:
+                sub_op = sub_op[0]
+                if direction in ['all', 'input']:
+                    self._gen_sub_graph(dot, sub_op, deep - 1, dot_list, edge_list, direction='input')
+                if sub_op.name() in dot_list:
+                    src_edge = '%s:o%d' % (sub_op.name(), desc.peer_idx())
+                else:
+                    dot.node(sub_op.name(), self._gen_sub_graph_label(sub_op), color=color, tooltip=op.summary(True))
+                    src_edge = '%s:o%d' % (sub_op.name(), desc.peer_idx())
+                dst_edge = '%s:i%d' % (op.name(), desc.idx())
+                if src_edge + dst_edge not in edge_list:
+                    dot.edge(src_edge, dst_edge)
+                    edge_list.append(src_edge + dst_edge)
+        # add output
+        for desc in op.outputs():
+            for out_node_name in desc.names():
+                sub_op = self.get_op(out_node_name, op.graph_name)
+                if len(sub_op) != 0 and direction in ['all', 'output']:
+                    sub_op = sub_op[0]
+                    self._gen_sub_graph(dot, sub_op, deep - 1, dot_list, edge_list, direction='output')
+
+    def _gen_sub_graph_label(self, op):
+        input_labels = []
+        for desc in op.inputs():
+            input_labels.append(self._gen_sub_graph_desc(desc, 'i'))
+        output_labels = []
+        for desc in op.outputs():
+            output_labels.append(self._gen_sub_graph_desc(desc, 'o'))
+        str_cell = '|'
+        return '{{ %s } | [%s] %s | { %s }}' % (str_cell.join(input_labels), op.type(), op.name(),
+                                                str_cell.join(output_labels))
+
+    @staticmethod
+    def _gen_sub_graph_desc(desc, id_prefix):
+        desc_str = r'<%s%d> [%d]' % (id_prefix, desc.idx(), desc.idx())
+        desc_str = r'%s [%s]' % (desc_str, desc.dtype()) if desc.dtype() != '' else desc_str
+        desc_str = r'%s\n%s' % (desc_str, desc.shape()) if len(desc.shape()) != 0 else desc_str
+        return desc_str
+
+    def list_ops(self, op_type='', op_name='', pass_name='', kernel_name=''):
+        """list ops in graph"""
+        return filter(lambda op: op_type in op.type() and op_name in op.name() and (
+                pass_name == '' or pass_name in op.pass_name()) and kernel_name in op.kernel_name(), self.ops_list)
+
+    def get_op(self, name, graph_name=None):
+        """get op by name"""
+        # get op in specific sub graph
+        if graph_name is not None and graph_name in self.sub_graphs:
+            return self.sub_graphs[graph_name].get_op(name)
+        ops = []
+        for sub_graph in self.sub_graphs.values():
+            ops.extend(sub_graph.get_op(name))
+        # check if there is an exact match operation
+        match_ops = list(filter(lambda x: x.name() == name, ops))
+        if len(match_ops) != 0:
+            return match_ops
+        # return guess operations by name
+        self.log.info("Can not find Operator named %s. You may mean the operator bellow.", name)
+        guess_op_name_list = ['[green][%s][/green] %s' % (x.type(), x.name()) for x in ops]
+        util.print_panel(Constant.NEW_LINE.join(guess_op_name_list), title='Possible Operators')
+        return ops
+
+    def get_parent_node_by_subgraph_name(self, graph_name):
+        ops = []
+        for sub_graph in self.sub_graphs.values():
+            ops.extend(sub_graph.get_parent_node_by_subgraph_name(graph_name))
+        return ops
+
+    def _prepare_npu_graphs(self):
+        """prepare ge graphs  """
+        # move graphs to precision data dir
+        graph_files = util.list_ge_graph_files(self.graph_root)
+        self.build_files = sorted(filter(lambda x: x.graph_name == cfg.BUILD_JSON_GRAPH_NAME, graph_files.values()),
+                                  key=lambda x: x.graph_id)
+        if len(self.build_files) == 0:
+            self.log.warning("Can not find any build files in dir: %s", self.graph_root)
+        self.log.info("Find [%d] GE build files.", len(self.build_files))
+
+    @catch_tool_exception
+    def _parse_ops(self, build_file):
+        """Parse *_Build.txt.json to op objects."""
+        build_file_json = build_file.path + '.json'
+        build_file_json = util.convert_proto_to_json(build_file.path, build_file_json)
+        if build_file_json is not None:
+            self.build_json_files.append(build_file_json)
+        with open(build_file_json, 'r') as f:
+            graph_json = json.load(f)
+            if 'graph' not in graph_json:
+                raise PrecisionToolException("No graph in file: %s" % build_file.file_name)
+            if len(graph_json['graph']) != 1:
+                self.log.warning("There are more then one graph in ge build file, find %d" % len(graph_json['graph']))
+            # sub_graphs = []
+            for graph in graph_json['graph']:
+                npu_sub_graph = NpuSubGraph(graph, build_file, self)
+                self.sub_graphs[graph['name']] = npu_sub_graph
+                self.ops_list.extend(npu_sub_graph.ops_list.values())
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/op.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/op.py
new file mode 100644
index 000000000..2dbd5a31b
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/op.py
@@ -0,0 +1,255 @@
+# coding=utf-8
+import json
+import re
+from typing import List
+from .desc import InputDesc
+from .desc import OutputDesc
+from ..util.util import util
+from ..util.constant import Constant
+from ..util.precision_tool_exception import PrecisionToolException
+
+NO_INPUT_NODES = ['Data', 'AtomicAddrClean', 'Recv', 'Constant']
+NO_OUTPUT_NODES = ['Send', 'Recv', 'NetOutput', 'PartitionedCall']
+
+JSON_KEY_NAME = 'name'
+JSON_KEY_ID = 'id'
+JSON_KEY_TYPE = 'type'
+JSON_KEY_ATTR = 'attr'
+JSON_KEY = 'key'
+JSON_VALUE = 'value'
+JSON_KEY_LIST = 'list'
+JSON_KEY_STR = 's'
+JSON_KEY_INT = 'i'
+JSON_KEY_INPUT_I = 'input_i'
+JSON_KEY_OUTPUT_I = 'output_i'
+JSON_KEY_PASS_NAME = 'pass_name'
+JSON_KEY_DATA_DUMP_ORIGINAL_OP_NAMES = '_datadump_original_op_names'
+JSON_KEY_GE_ATTR_OP_KERNEL_LIB_NAME = "_ge_attr_op_kernel_lib_name"
+JSON_KEY_PARENT_NODE_INDEX = "_parent_node_index"
+JSON_KEY_SUBGRAPH_NAME = "subgraph_name"
+
+KERNEL_NAME_SHUFFIX = '_kernelname'
+
+
+class Op(object):
+    """ Op class.
+        name: op name
+        type: op type
+        inputs: list of input descs
+        outputs: list of output descs
+    """
+    def __init__(self, op_json, op_list, graph_name, npu_graph, sub_graph):
+        """Init"""
+        self.op_json = op_json
+        self.op_list = op_list
+        self.graph_name = graph_name
+        self.npu_graph = npu_graph
+        self.sub_graph = sub_graph
+        self.input_list = None
+        self.output_list = None
+        self.log = util.get_log()
+
+    def name(self):
+        """Get op name"""
+        return self.op_json[JSON_KEY_NAME]
+
+    def id(self):
+        """Get op id"""
+        return self.op_json[JSON_KEY_ID] if JSON_KEY_ID in self.op_json else ''
+
+    def json(self):
+        return json.dumps(self.op_json, indent=2)
+
+    def type(self):
+        """Get op type"""
+        return self.op_json[JSON_KEY_TYPE]
+
+    def subgraph_names(self):
+        return self.op_json[JSON_KEY_SUBGRAPH_NAME] if JSON_KEY_SUBGRAPH_NAME in self.op_json else []
+
+    def inputs(self):
+        """Get the input list"""
+        if self.input_list is None:
+            self._parse_inputs()
+        if len(self.input_list) == 0 and self.type() == 'Data':
+            # Looking for Real Data
+            self._looking_for_real_inputs()
+        return self.input_list
+
+    def input_addr(self):
+        return self.op_json[JSON_KEY_INPUT_I]
+
+    def outputs(self):
+        """Get output list"""
+        if self.output_list is None:
+            self._parse_outputs()
+        if len(self.output_list) == 0 and self.type() == 'PartitionedCall':
+            self._looking_for_real_outputs()
+        return self.output_list
+
+    def output_addr(self):
+        return self.op_json[JSON_KEY_OUTPUT_I]
+
+    def pass_name(self):
+        return self._attr(JSON_KEY_PASS_NAME)
+
+    def kernel_name(self):
+        return self._attr(self.name() + KERNEL_NAME_SHUFFIX)
+
+    def ge_attr_op_kernel_lib_name(self):
+        return self._attr(JSON_KEY_GE_ATTR_OP_KERNEL_LIB_NAME)
+
+    def data_dump_original_op_names(self):
+        return self._attr(JSON_KEY_DATA_DUMP_ORIGINAL_OP_NAMES)
+
+    def parent_node_index(self):
+        return self._attr(JSON_KEY_PARENT_NODE_INDEX)
+
+    def _attr(self, key):
+        if JSON_KEY_ATTR in self.op_json:
+            for attr in self.op_json[JSON_KEY_ATTR]:
+                if key == attr[JSON_KEY]:
+                    if JSON_KEY_STR in attr[JSON_VALUE]:
+                        return attr[JSON_VALUE][JSON_KEY_STR]
+                    elif JSON_KEY_LIST in attr[JSON_VALUE]:
+                        if JSON_KEY_STR in attr[JSON_VALUE][JSON_KEY_LIST]:
+                            return attr[JSON_VALUE][JSON_KEY_LIST][JSON_KEY_STR]
+                    elif JSON_KEY_INT in attr[JSON_VALUE]:
+                        return attr[JSON_VALUE][JSON_KEY_INT]
+                    else:
+                        self.log.warning("Unknown attr format: %s", attr[JSON_VALUE])
+        return ''
+
+    def compare(self, right_op):
+        """Compare with another op"""
+        if not isinstance(right_op, Op):
+            raise PrecisionToolException("Should compare with another op.")
+        res_str = ['LeftOp(Type/Name) : [green][%s][/green] %s' % (self.type(), self.name()),
+                   'RightOp(Type/Name): [green][%s][/green] %s' % (right_op.type(), right_op.name())]
+        similar = True
+        if len(self.inputs()) != len(right_op.inputs()):
+            res_str.append("Input: [yellow]Input num mismatch.[/yellow]")
+        else:
+            res_str.append("Input:")
+        for left_input in self.inputs():
+            for right_input in right_op.inputs():
+                if left_input.idx() != right_input.idx():
+                    continue
+                txt, input_similar = left_input.compare(right_input)
+                res_str.append(' - ' + txt)
+                similar = similar and input_similar
+        if len(self.outputs()) != len(right_op.outputs()):
+            res_str.append("Output: [yellow]Output num mismatch.[/yellow]")
+        else:
+            res_str.append("Output:")
+        for left_output in self.outputs():
+            for right_output in right_op.outputs():
+                if left_output.idx() != right_output.idx():
+                    continue
+                txt, output_similar = left_output.compare(right_output)
+                res_str.append(' - ' + txt)
+                similar = similar and output_similar
+        return Constant.NEW_LINE.join(res_str), similar
+
+    def _attr_detail(self):
+        """Gen attr details"""
+        res_str = []
+        if JSON_KEY_ATTR in self.op_json:
+            res_str = [' ' + str(i) for i in self.op_json[JSON_KEY_ATTR]]
+        return Constant.NEW_LINE.join(res_str)
+
+    def summary(self, origin_txt=False, attr_detail=False):
+        """Summary of current op"""
+        res_str = ['Op(Type/Name): [green][%s][/green] %s' % (self.type(), self.name()),
+                   'ID:    [yellow]%s[/yellow]' % self.id(),
+                   'KernelName:    [yellow]%s[/yellow]' % self.kernel_name(),
+                   'KernelLibName: [yellow]%s[/yellow]' % self.ge_attr_op_kernel_lib_name(),
+                   'GraphName:     [yellow]%s[/yellow]' % self.graph_name]
+        pass_name = self.pass_name()
+        if pass_name != '':
+            res_str.append('PassName: [yellow]%s[/yellow]' % pass_name)
+        origin_op = self.data_dump_original_op_names()
+        if origin_op != '':
+            res_str.append('OriginalOp: %s' % origin_op)
+        if attr_detail:
+            res_str.append(self._attr_detail())
+        res_str.append('InputAddr : [yellow]%s[/yellow]' % self.input_addr())
+        res_str.append('OutputAddr: [yellow]%s[/yellow]' % self.output_addr())
+        res_str.append('Input:%s' % InputDesc.summary.__doc__)
+        for i in self.inputs():
+            res_str.append(' -' + i.summary(origin_txt))
+        res_str.append('Output:')
+        for i in self.outputs():
+            res_str.append(' -' + i.summary(origin_txt))
+        return Constant.NEW_LINE.join(res_str)
+
+    def _parse_inputs(self):
+        """ parse input desc in graph """
+        self.input_list = []
+        if 'input' not in self.op_json:
+            if self.type() not in NO_INPUT_NODES:
+                self.log.warning('Parse Op[%s][%s] inputs error.' % (self.type(), self.name()))
+            return self.input_list
+        desc_index = 0
+        for i in range(len(self.op_json['input'])):
+            name = self.op_json['input'][i]
+            if name == '':
+                # if self.type() not in NO_INPUT_NODES:
+                # self.log.warning('invalid input name.')
+                continue
+            name_info = name.split(':')
+            if len(name_info) == 2 and int(name_info[1]) == -1:
+                # control edge
+                self.input_list.append(InputDesc(name, [], i))
+            else:
+                self.input_list.append(InputDesc(name, self.op_json['input_desc'][desc_index], i))
+                desc_index += 1
+        self.input_list.sort(key=lambda x: x.index)
+        return self.input_list
+
+    def _parse_outputs(self):
+        """ parse output desc in graph """
+        self.output_list = []
+        if 'dst_index' not in self.op_json:
+            if self.type() not in NO_OUTPUT_NODES:
+                self.log.warning('Parse Op[%s][%s] outputs error.' % (self.type(), self.name()))
+            return self.output_list
+        desc_index = 0
+        for i in range(len(self.op_json['dst_index'])):
+            dst_name = self.op_json['dst_name'][i]
+            if self.op_json['dst_index'][i] == -1:
+                # control edge
+                self.output_list.append(OutputDesc(dst_name, [], -1))
+            else:
+                self.output_list.append(OutputDesc(dst_name, self.op_json['output_desc'][desc_index], desc_index))
+                desc_index += 1
+        self.output_list.sort(key=lambda x: x.index)
+        return self.output_list
+
+    def _looking_for_real_inputs(self):
+        """Find real inputs of subgraph data node."""
+        graph_name = self.graph_name
+        parent_node_idx = self.parent_node_index()
+        parent_nodes = self.npu_graph.get_parent_node_by_subgraph_name(graph_name)
+        self.log.debug("Find %s parent nodes." % len(parent_nodes))
+        for parent_node in parent_nodes:
+            inputs = parent_node.inputs()
+            if len(inputs) <= parent_node_idx:
+                self.log.warning("Parent node has %d inputs, bug need index %d" % (len(inputs), parent_node_idx))
+                continue
+            self.input_list.append(inputs[parent_node_idx])
+
+    def _looking_for_real_outputs(self):
+        """Find real outputs of PartitionedCall Node"""
+        subgraph_names = self.subgraph_names()
+        for subgraph_name in subgraph_names:
+            net_output_with_subgraph_name = subgraph_name + '_Node_Output'
+            net_output_nodes = self.npu_graph.get_op(net_output_with_subgraph_name)
+            self.log.debug("Find %s net output nodes, just need one." % len(net_output_nodes))
+            self.log.info("Note: PartitionCall output nodes is the node connect to PartitionCall from inside.")
+            for output_node in net_output_nodes:
+                self.output_list = output_node.inputs()
+
+
+
+
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/tf_graph.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/tf_graph.py
new file mode 100644
index 000000000..acf8c8920
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/tf_graph.py
@@ -0,0 +1,45 @@
+# coding=utf-8
+import collections
+import logging
+import os
+from ..util.util import util
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.precision_tool_exception import PrecisionToolException
+from ..config import config as cfg
+
+CKPT_META_SHUFFIX='.meta'
+
+
+class TfGraph(object):
+    def __init__(self, graph_root=cfg.TF_GRAPH_DIR):
+        """"""
+        self.graph_root = graph_root
+        self.log = util.get_log()
+        self.op_list = collections.OrderedDict()
+
+    @catch_tool_exception
+    def get_op_list(self, ckpt_path=None):
+        if self.op_list is None:
+            self._convert_ckpt_to_graph(ckpt_path)
+        return self.op_list
+
+    def _convert_ckpt_to_graph(self, ckpt_path):
+        log_level = self.log.level
+        try:
+            self.log.setLevel('ERROR')
+            import tensorflow as tf
+            self.log.setLevel(log_level)
+        except ImportError as err:
+            self.log.setLevel(log_level)
+            raise PrecisionToolException("Import tensorflow failed.")
+        meta_files = util.list_cpu_graph_files(ckpt_path)
+        if len(meta_files) == 0:
+            raise PrecisionToolException("Can not find any ckpt meta files.")
+        file_list = sorted(meta_files.values(), key=lambda x: x['timestamp'])
+        ckpt_file = file_list[-1]
+        self.log.info("Find %d tf ckpt meta files, choose [%s]" % (len(meta_files), ckpt_file['file_name']))
+        self.op_list = collections.OrderedDict()
+        saver = tf.train.import_meta_graph(ckpt_file['path'], clear_devices=True)
+        graph = tf.get_default_graph()
+        for op in graph.get_operations():
+            self.op_list[op.name] = op
-- 
Gitee


From d4748f9f7cc59f5ede07b986677724ddaff272ff Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:44:20 +0000
Subject: [PATCH 09/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../lib/train/train_analysis.py               | 112 ++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/train/train_analysis.py

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/train/train_analysis.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/train/train_analysis.py
new file mode 100644
index 000000000..b7547d677
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/train/train_analysis.py
@@ -0,0 +1,112 @@
+# coding=utf-8
+import os
+import numpy as np
+from ..adapter.tf_adapter import TfAdapter
+from ..dump.tf_dump import TfDump
+from ..util.util import util
+from ..config import config as cfg
+from ..util.precision_tool_exception import PrecisionToolException
+
+
+class TrainAnalysis(object):
+    def __init__(self):
+        self.log = util.get_log()
+        self.tf_adapter = TfAdapter()
+
+    @staticmethod
+    def gen_feed_file_name(name):
+        file_name = str(name).replace(':', '_').replace('/', '_') + '.npy'
+        return os.path.join(cfg.TF_CKPT_INPUT_DIR, file_name)
+
+    def _init_session(self, device='npu', action='dump'):
+        """"""
+        import tensorflow as tf
+        if device == 'npu':
+            # util.execute_command('source %s', cfg.ASCEND_SET_ENV)
+            return tf.Session(config=self.tf_adapter.session_dump_config(None, action=action))
+        sess = tf.Session(config=tf.ConfigProto())
+        return self.tf_adapter.sess_dump(sess)
+
+    def _reset_dropout_rate(self, graph):
+        import tensorflow as tf
+        for op in graph.get_operations():
+            if 'dropout' in op.name and 'rate' in op.name:
+                self.log.debug("Find dropout rate node [%s][%s]" % (op.type, op.name))
+                # tensor = graph.get_tensor_by_name(op.name)
+                if op.type != 'Const':
+                    self.log.warning("Drop out op [%s] is not Const, skip reset rate. May cause difference.")
+                    continue
+                op._set_attr('value', tf.AttrValue(tensor=tf.make_tensor_proto(0.0, tf.float32)))
+                self.log.debug("Set op: %s" % str(op))
+
+    def _prepare_graph(self, graph):
+        graph.seed = cfg.DUMP_SEED
+        self._reset_dropout_rate(graph)
+        return graph
+
+    def _load_train_graph(self, sess):
+        import tensorflow as tf
+        if util.empty_dir(cfg.TF_CKPT_ROOT):
+            raise PrecisionToolException('checkpoint dir [%s] is empty, can not run train analysis process.' %
+                                         cfg.TF_CKPT_ROOT)
+        checkpoint = tf.train.latest_checkpoint(cfg.TF_CKPT_ROOT)
+        if checkpoint is None:
+            raise PrecisionToolException('Load ckpt failed from [%s].' % cfg.TF_CKPT_ROOT)
+        saver = tf.train.import_meta_graph(checkpoint + '.meta')
+        self._prepare_graph(tf.get_default_graph())
+        saver.restore(sess, checkpoint)
+        return tf.get_default_graph()
+
+    @staticmethod
+    def _get_input_from_graph(graph):
+        input_nodes = []
+        tensor_index = {}
+        for op in graph.get_operations():
+            if 'Placeholder' == op.type:
+                if op.name in tensor_index:
+                    tensor_index[op.name] += 1
+                else:
+                    tensor_index[op.name] = 0
+                node = graph.get_tensor_by_name(op.name + ':' + str(tensor_index[op.name]))
+                input_nodes.append(node)
+        return input_nodes
+
+    def _get_input_tensors(self, input_nodes):
+        feed_map = {}
+        for node in input_nodes:
+            file_name = self.gen_feed_file_name(node.name)
+            if os.path.isfile(file_name):
+                feed_map[node] = np.load(file_name)
+            else:
+                # TD data type
+                feed_map[node] = np.random.random(node.shape)
+        return feed_map
+
+    def _build_feed_map(self, graph):
+        input_nodes = self._get_input_from_graph(graph)
+        return self._get_input_tensors(input_nodes)
+
+    def _analysis(self, device, action='dump'):
+        import tensorflow as tf
+        if device == 'npu':
+            import npu_bridge.npu_init
+        sess = self._init_session(device, action=action)
+        graph = self._load_train_graph(sess)
+        train_op = tf.get_collection(tf.GraphKeys.TRAIN_OP)
+        feed_map = self._build_feed_map(graph)
+        sess.run(train_op, feed_dict=feed_map)
+        if device == 'cpu':
+            tf_dump = TfDump()
+            tf_dump.run_tf_dbg_dump()
+
+    def run(self, device='all', action='dump'):
+        """
+        :param device: all | npu | cpu
+        :param action: dump | overflow | fusion_switch | fusion_off
+        :return:
+        """
+        if device == 'all':
+            self._analysis('cpu', action)
+            self._analysis('npu', action)
+        else:
+            self._analysis(device, action)
-- 
Gitee


From e5fa0797422c176c1040ee8577d57d9618ef964d Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:44:39 +0000
Subject: [PATCH 10/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/util/constant.py       |  20 +
 .../precision_tool/lib/util/file_desc.py      |  38 ++
 .../precision_tool/lib/util/h5_util.py        | 190 +++++++
 .../precision_tool/lib/util/onnx_builder.py   |   0
 .../lib/util/precision_tool_exception.py      |  24 +
 .../precision_tool/lib/util/tool_object.py    |  10 +
 .../examples/precision_tool/lib/util/util.py  | 536 ++++++++++++++++++
 7 files changed, 818 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/constant.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/file_desc.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/h5_util.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/onnx_builder.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/precision_tool_exception.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/tool_object.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/util.py

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/constant.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/constant.py
new file mode 100644
index 000000000..8106bb8d4
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/constant.py
@@ -0,0 +1,20 @@
+# coding=utf-8
+
+
+class Constant(object):
+    VERSION = "0.1.11"
+    NEW_LINE = "\n"
+    TAB_LINE = "\t"
+    DEFAULT_DEBUG_ID = "debug_0"
+    NPU_DEBUG_ID_1 = "debug_1"
+    GRAPH = "graph"
+    DUMP = "dump"
+
+    class Suffix(object):
+        JSON = '.json'
+        CSV = '.csv'
+        H5 = '.h5'
+        OM = '.om'
+
+    class Pattern(object):
+        GE_PROTO_GRAPH_PATTERN = r'^ge_proto_([0-9]+)_(graph_[0-9]+_)*([A-Za-z0-9_-]+)\.txt$'
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/file_desc.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/file_desc.py
new file mode 100644
index 000000000..0773632b1
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/file_desc.py
@@ -0,0 +1,38 @@
+# coding=utf-8
+import os
+
+
+class FileDesc(object):
+    def __init__(self, file_name, dir_path, timestamp=-1):
+        self.file_name = file_name
+        self.dir_path = dir_path
+        self.path = os.path.join(dir_path, file_name)
+        self.timestamp = timestamp
+        self.idx = 0
+        if self.timestamp == -1:
+            self.timestamp = os.path.getmtime(self.path)
+
+
+class BuildGraphFileDesc(FileDesc):
+    def __init__(self, file_name, dir_path, timestamp, graph_id, graph_name):
+        super(BuildGraphFileDesc, self).__init__(file_name, dir_path, timestamp)
+        self.graph_id = graph_id
+        self.graph_name = graph_name
+
+
+class NpuDumpFileDesc(FileDesc):
+    def __init__(self, file_name, dir_path, timestamp, op_name, op_type, task_id, stream_id=0):
+        super(NpuDumpFileDesc, self).__init__(file_name, dir_path, timestamp)
+        self.op_name = op_name
+        self.op_type = op_type
+        self.task_id = task_id
+        stream_id = 0 if stream_id is None else int(stream_id)
+        self.stream_id = stream_id
+        self.idx = dir_path.split(os.sep)[-1]
+
+
+class DumpDecodeFileDesc(NpuDumpFileDesc):
+    def __init__(self, file_name, dir_path, timestamp, op_name, op_type, task_id, anchor_type, anchor_idx):
+        super(DumpDecodeFileDesc, self).__init__(file_name, dir_path, timestamp, op_name, op_type, task_id)
+        self.type = anchor_type
+        self.idx = anchor_idx
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/h5_util.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/h5_util.py
new file mode 100644
index 000000000..1b294ab97
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/h5_util.py
@@ -0,0 +1,190 @@
+import collections
+import os
+import numpy as np
+
+try:
+    import h5py
+except ImportError as import_err:
+    h5py = None
+    print("Failed to import h5py. some function may disable. Run 'pip3 install h5py' to fix it.",
+          import_err)
+
+from ..util.util import util
+from ..util.constant import Constant
+from ..config import config as cfg
+
+
+class IdxType(object):
+    # /batch_norm/88/input/xxx
+    OP_TYPE = 'OP_TYPE'
+    OP_NAME = 'OP_NAME'
+    OP_ANC = 'OP_ANC'
+
+
+H5_NAME_IDX = [IdxType.OP_TYPE, IdxType.OP_NAME, IdxType.OP_ANC]
+
+
+def gen_h5_data_name(name, prefix='npu'):
+    return "%s_h5%s.npy" % (prefix, name.replace('/', '_'))
+
+
+class H5Data(object):
+    def __init__(self, data, prefix='npu'):
+        self.data = data
+        self.prefix = prefix
+        self.name = gen_h5_data_name(self.data.name, self.prefix)
+
+    def np_data(self):
+        np_data = np.array(self.data)
+        self._save(np_data)
+        return np_data
+
+    def _save(self, data):
+        path = os.path.join(cfg.PT_DUMP_DECODE_DIR, self.name)
+        np.save(path, data)
+
+
+class H5Op(object):
+    def __init__(self, name, h5_node, prefix='npu'):
+        self.log = util.get_log()
+        self.name = name
+        self.prefix = prefix
+        self.h5_node = h5_node
+        self.inputs = {}
+        self.outputs = {}
+        self.group = {
+            'grads': {},
+            'tensors': {},
+            'grad_inputs': {},
+            'result': {}
+        }
+        '''
+        self.input_grad = {}
+        self.output_grad = {}
+        self.input_tensor = {}
+        self.output_tensor = {}
+        '''
+        self._prepare()
+
+    def summary(self):
+        summary_txt = []
+        summary_txt.extend(self._gen_txt(self.inputs, '-Input:'))
+        summary_txt.extend(self._gen_txt(self.outputs, '-Output:'))
+        summary_txt.extend(self._gen_txt(self.group['grads'], 'Grads:'))
+        summary_txt.extend(self._gen_txt(self.group['tensors'], '-Tensors:'))
+        summary_txt.extend(self._gen_txt(self.group['grad_inputs'], '-GradInputs:'))
+        summary_txt.extend(self._gen_txt(self.group['result'], '-Result:'))
+        return Constant.NEW_LINE.join(summary_txt)
+
+    @staticmethod
+    def _gen_txt(h5_data, name):
+        if len(h5_data) == 0:
+            return []
+        txt = [name]
+        for idx, data in enumerate(h5_data.values()):
+            txt.append(' └─[green][%s][/green] %s' % (idx, data.name))
+            txt.append('   └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(data.np_data()))
+        return txt
+
+    def _parse_group(self, node):
+        sub_node_type = node.name.split('/')[-1]
+        if sub_node_type in self.group.keys():
+            for item in node:
+                sub_node = node[item]
+                if isinstance(sub_node, h5py.Dataset):
+                    self.group[sub_node_type][item] = H5Data(sub_node, self.prefix)
+                else:
+                    self.log.warning("Unknown sub node: %s" % sub_node)
+        else:
+            self.log.warning("Unknown sub node type: %s(%s)" % (sub_node_type, node))
+
+    def _prepare_input_output(self, node, desc_type):
+        for desc_name in node:
+            sub_node = node[desc_name]
+            if isinstance(sub_node, h5py.Group):
+                self._parse_group(sub_node)
+            elif isinstance(sub_node, h5py.Dataset):
+                update_dict = self.inputs if desc_type == 'input' else self.outputs
+                update_dict[desc_name] = H5Data(sub_node, self.prefix)
+            else:
+                self.log.warning("Unknown type: %s(%s)" % (type(sub_node), sub_node))
+
+    def _prepare(self):
+        for desc_type in self.h5_node:
+            if desc_type in ['input', 'output']:
+                self._prepare_input_output(self.h5_node[desc_type], desc_type)
+            else:
+                self.log.warning("Unknown desc type: %s(%s)" % (desc_type, self.h5_node))
+
+
+class H5Util(object):
+    def __init__(self, file_name, prefix):
+        self.log = util.get_log()
+        self.file_name = file_name
+        self.prefix = prefix
+        self.h5 = None
+        self.ops = collections.OrderedDict()
+        self._prepare()
+
+    def __del__(self):
+        if self.h5 is not None:
+            self.h5.close()
+
+    def get_op(self, op_id):
+        if op_id in self.ops:
+            return self.ops[op_id]
+        self.log.warning("Can not find any h5 op id: %s" % op_id)
+        return None
+
+    def get_tensor_by_name(self, tensor_name):
+        if self.h5 is None:
+            self.log.warning("h5 file is None.")
+            return None
+        if tensor_name in self.h5:
+            return np.array(self.h5[tensor_name])
+        return None
+
+    def print_tensor(self, tensor_name):
+        tensor = self.get_tensor_by_name(tensor_name)
+        if tensor is None:
+            self.log.warning("Tensor:%s not exist." % tensor_name)
+            return
+        file_path = self._dump_numpy(tensor_name, tensor)
+        util.print_npy_summary(os.path.dirname(file_path), os.path.basename(file_path))
+
+    def _prepare(self):
+        if not os.path.isfile(self.file_name) or not str(self.file_name).endswith(Constant.Suffix.H5):
+            self.log.error("File [%s] not exist or not a h5 file" % self.file_name)
+        if h5py is None:
+            self.log.warning("Can not find python module h5py.")
+        self.h5 = h5py.File(self.file_name, 'r')
+        self._list_tensors(self.h5)
+
+    def _list_tensors(self, h5, idx=0, name=''):
+        for item in h5:
+            item_name = name + '/' + item
+            if idx == 1:
+                self.ops[str(item)] = H5Op(item_name, h5[item_name], self.prefix)
+                continue
+            self._list_tensors(h5[item], idx+1, item_name)
+
+    def _list_tensors_loop(self, h5, idx=0, name=''):
+        for item in h5:
+            if isinstance(h5[item], h5py.Group):
+                item_name = name + '/' + item
+                print(item_name)
+                # check
+                if H5_NAME_IDX[idx] == IdxType.OP_NAME and item_name not in self.ops:
+                    self.ops[item_name] = H5Op(item)
+                if H5_NAME_IDX[idx] == IdxType.OP_ANC:
+                    self.ops[item_name] = H5Op(item)
+                self._list_tensors(h5[item], idx + 1, item_name)
+
+    def _dump_numpy(self, tensor_name, tensor):
+        if not os.path.exists(cfg.PT_DUMP_DECODE_DIR):
+            util.create_dir(cfg.PT_DUMP_DECODE_DIR)
+        file_name = tensor_name.replace('/', '_').strip('_') + '.npy'
+        file_path = os.path.join(cfg.PT_DUMP_DECODE_DIR, file_name)
+        self.log("Dump file: %s" % file_path)
+        np.save(file_path, tensor)
+        return file_path
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/onnx_builder.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/onnx_builder.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/precision_tool_exception.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/precision_tool_exception.py
new file mode 100644
index 000000000..02084770f
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/precision_tool_exception.py
@@ -0,0 +1,24 @@
+# coding=utf-8
+import logging
+
+
+class PrecisionToolException(Exception):
+    """
+    Class for PrecisionTool Exception
+    """
+    def __init__(self, error_info):
+        super(PrecisionToolException, self).__init__()
+        self.error_info = error_info
+
+
+def catch_tool_exception(fuc):
+    def handle(*args, **kwargs):
+        log = logging.getLogger()
+        try:
+            return fuc(*args, **kwargs)
+        except PrecisionToolException as pte:
+            log.warning(pte.error_info)
+        except SystemExit:
+            # do not exit
+            log.debug("Exit")
+    return handle
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/tool_object.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/tool_object.py
new file mode 100644
index 000000000..7412b6cee
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/tool_object.py
@@ -0,0 +1,10 @@
+# coding=utf-8
+
+
+class ToolObject(object):
+    _instance = None
+
+    def __new__(cls, *args, **kwargs):
+        if not cls._instance:
+            cls._instance = super(ToolObject, cls).__new__(cls, *args, **kwargs)
+        return cls._instance
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/util.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/util.py
new file mode 100644
index 000000000..88fbe0b00
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/util.py
@@ -0,0 +1,536 @@
+# coding=utf-8
+import csv
+import re
+import sys
+import os
+import shutil
+import numpy as np
+import logging
+import subprocess
+from .constant import Constant
+from .precision_tool_exception import PrecisionToolException
+from .precision_tool_exception import catch_tool_exception
+from .file_desc import *
+from ..config import config as cfg
+
+try:
+    from rich.traceback import install
+    from rich.panel import Panel
+    from rich.table import Table
+    from rich import print as rich_print
+    from rich.columns import Columns
+    install()
+except ImportError as import_err:
+    install = None
+    Panel = None
+    Table = None
+    Columns = None
+    rich_print = print
+    print("Failed to import rich. some function may disable. Run 'pip3 install rich' to fix it.",
+          import_err)
+
+try:
+    import readline
+    readline.parse_and_bind('tab: complete')
+except ImportError as import_error:
+    print("Unable to import module: readline. Run 'pip3 install gnureadline pyreadline' to fix it.")
+
+# patterns
+OFFLINE_DUMP_PATTERN = r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)\.?([0-9]+)?\.([0-9]{1,255})[.csv]?"
+OFFLINE_DUMP_DECODE_PATTERN = \
+    r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})\.?[0-9]?[\.0-9]+?" \
+    r"\.([a-z]+)\.([0-9]{1,255})\.npy$"
+OFFLINE_DUMP_CONVERT_PATTERN = \
+    r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})" \
+    r"\.([a-z]+)\.([0-9]{1,255})(\.[x0-9]+)?\.npy$"
+OFFLINE_FILE_NAME = 'op_type.op_name.task_id(.stream_id).timestamp'
+OP_DEBUG_NAME = 'OpDebug.Node_OpDebug.taskid.timestamp'
+CPU_DUMP_DECODE_PATTERN = r"^([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})\.npy$"
+CPU_FILE_DECODE_NAME = 'op_name.0(.0).timestamp.npy'
+OP_DEBUG_PATTERN = r"Opdebug\.Node_OpDebug\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})"
+OP_DEBUG_DECODE_PATTERN = r"Opdebug\.Node_OpDebug\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})[\.0-9]*\.([a-z]+)\.([0-9]{1,255})\.json"
+VECTOR_COMPARE_RESULT_PATTERN = r"result_([0-9]{1,255})\.csv"
+TIMESTAMP_DIR_PATTERN = '[0-9]{1,255}'
+NUMPY_PATTERN = r".*\.npy$"
+H5_PATTERN = r".*\.h5$"
+CSV_SHUFFIX = '.csv'
+NUMPY_SHUFFIX = '.npy'
+CKPT_META_SHUFFIX = r".*.meta$"
+MAPPING_CSV = "mapping.csv"
+
+
+class Util(object):
+    def __init__(self):
+        self.atc = None
+        self.ms_accu_cmp = None
+        logging.basicConfig(level=cfg.LOG_LEVEL, format="%(asctime)s (%(process)d) -[%(levelname)s]%(message)s",
+                            datefmt="%Y-%m-%d %H:%M:%S")
+        self.log = logging.getLogger()
+        self.python = sys.executable
+
+    def get_log(self):
+        return self.log
+
+    def execute_command(self, cmd: str):
+        """ Execute shell command
+        :param cmd: command
+        :return: status code
+        """
+        if cmd is None:
+            self.log.error("Command is None.")
+            return -1
+        self.log.debug("[Run CMD]: %s", cmd)
+        complete_process = subprocess.run(cmd, shell=True)
+        return complete_process.returncode
+
+    @staticmethod
+    def empty_dir(dir_path: str) -> bool:
+        """ Check if target dir is empty
+        :param dir_path: target dir
+        :return: bool
+        """
+        if not os.path.exists(dir_path):
+            return True
+        if len(os.listdir(dir_path)) == 0:
+            return True
+        return False
+
+    def convert_proto_to_json(self, src_file, dst_file):
+        """Convert GE proto graphs to json format.
+        command: atc --mode=5 --om=ge_proto_Build.txt --json=xxx.json
+        :param src_file: proto file
+        :param dst_file: output json file
+        :return: result json file
+        """
+        if not os.path.exists(src_file):
+            raise PrecisionToolException("Source proto file %s not exist." % src_file)
+        # src_file = os.path.join(cfg.GRAPH_DIR_ALL, proto_file)
+        # json_file = proto_file + '.json'
+        # dst_file = os.path.join(cfg.GRAPH_DIR_BUILD, json_file)
+        if os.path.exists(dst_file) and os.path.getmtime(dst_file) > os.path.getmtime(src_file):
+            self.log.debug("GE graph build json already exist.")
+            return dst_file
+        cmd = '%s --mode=5 --om=%s --json=%s' % (self._get_atc(), src_file, dst_file)
+        self.execute_command(cmd)
+        if not os.path.isfile(dst_file):
+            raise PrecisionToolException("Convert GE build graph to json failed. can not find any json file.")
+        self.log.info('Finish convert [%s] build graph from proto to json format.', src_file)
+        return dst_file
+
+    def convert_dump_to_npy(self, src_file, dst_path, data_format=None):
+        """Convert npu dump files to npy format.
+        :param src_file: src file
+        :param dst_path: dst path
+        :param data_format: target data format
+        :return: status code
+        """
+        self.create_dir(dst_path)
+        format_cmd = '' if data_format is None else '-f %s' % data_format
+        cmd = '%s %s convert -d %s -out %s %s' % (self.python, self._get_ms_accu_cmp(), src_file, dst_path, format_cmd)
+        return self.execute_command(cmd)
+
+    def compare_vector(self, npu_dump_dir, cpu_dump_dir, graph_json, result_path):
+        """Run compare vector command.
+        :param npu_dump_dir: npu dump data dir
+        :param cpu_dump_dir: cpu dump data dir
+        :param graph_json: graph json
+        :param result_path: result path
+        :return: status code
+        """
+        self.create_dir(result_path)
+        if graph_json is None:
+            cmd = '%s %s compare -m %s -g %s -out %s' % (
+                self.python, self._get_ms_accu_cmp(), npu_dump_dir, cpu_dump_dir, result_path)
+        else:
+            cmd = '%s %s compare -m %s -g %s -f %s -out %s' % (
+                self.python, self._get_ms_accu_cmp(), npu_dump_dir, cpu_dump_dir, graph_json, result_path)
+        return self.execute_command(cmd)
+
+    def list_dump_files(self, path, sub_path=''):
+        """List npu dump files in npu dump dir.
+        default only list the newest sub dir ordered by timestamp. set sub_path to specific other sub_path
+        :param path: dump path
+        :param sub_path: sub dir
+        :return: dump_files, parent_dirs
+        """
+        parent_dirs = {}
+        dump_files = {}
+        newest_sub_path = self.get_newest_dir(path) if sub_path == '' else sub_path
+        dump_pattern = re.compile(OFFLINE_DUMP_PATTERN)
+        for dir_path, dir_names, file_names in os.walk(os.path.join(path, newest_sub_path), followlinks=True):
+            for name in file_names:
+                dump_match = dump_pattern.match(name)
+                if dump_match is None:
+                    continue
+                dump_files[name] = self._gen_dump_file_info(name, dump_match, dir_path)
+                if dir_path not in parent_dirs:
+                    parent_dirs[dir_path] = {}
+                parent_dirs[dir_path][name] = dump_files[name]
+        return dump_files, parent_dirs
+
+    def parse_mapping_csv(self, path, pattern, extern_pattern=''):
+        """parse mapping csv in dump path"""
+        dump_files = {}
+        re_pattern = re.compile(pattern)
+        for dir_path, dir_names, file_names in os.walk(path, followlinks=True):
+            if MAPPING_CSV not in file_names:
+                continue
+            mapping = self.read_csv(os.path.join(dir_path, MAPPING_CSV))
+            for item in mapping:
+                src_file = os.path.abspath(os.path.join(dir_path, item[0]))
+                if not os.path.isfile(src_file):
+                    self.log.warning("Can not find file %s in mapping.csv, dir: %s.", item[0], dir_path)
+                    continue
+                match = re_pattern.match(item[1])
+                if match is None:
+                    self.log.warning("file name [%s] in mapping.csv is invalid.", item[1])
+                    continue
+                file_desc = self._gen_dump_file_info(item[0], match, dir_path)
+                dst_file_name = '.'.join([file_desc.op_type, file_desc.file_name, str(file_desc.task_id),
+                                          str(file_desc.stream_id), str(file_desc.timestamp)])
+                if item[1].endswith(Constant.Suffix.CSV):
+                    dst_file_name += '.csv'
+                dst_file = os.path.abspath(os.path.join(dir_path, dst_file_name))
+                if not os.path.islink(src_file):
+                    os.rename(src_file, dst_file)
+                    os.symlink(dst_file, src_file)
+                file_desc.path = dst_file
+                file_desc.file_name = dst_file_name
+                dump_files[item[1]] = file_desc
+        return dump_files
+
+    def list_npu_dump_files(self, path, extern_pattern=''):
+        npu_dump_files = self._list_file_with_pattern(path, OFFLINE_DUMP_PATTERN, extern_pattern,
+                                                      self._gen_dump_file_info)
+        npu_dump_files.update(self.parse_mapping_csv(path, OFFLINE_DUMP_PATTERN, extern_pattern))
+        return npu_dump_files
+
+    def list_ge_graph_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, Constant.Pattern.GE_PROTO_GRAPH_PATTERN, extern_pattern,
+                                            self._gen_build_graph_file_info)
+
+    def list_npu_dump_decode_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, OFFLINE_DUMP_DECODE_PATTERN, extern_pattern,
+                                            self._gen_npu_dump_decode_file_info)
+
+    def list_debug_decode_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, OP_DEBUG_DECODE_PATTERN, extern_pattern,
+                                            self._gen_overflow_debug_decode_file_info)
+
+    def list_cpu_dump_decode_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, CPU_DUMP_DECODE_PATTERN, extern_pattern,
+                                            self._gen_cpu_dump_decode_file_info)
+
+    def list_cpu_graph_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, CKPT_META_SHUFFIX, extern_pattern,
+                                            self._gen_cpu_graph_files_info)
+
+    def list_vector_compare_result_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, VECTOR_COMPARE_RESULT_PATTERN, extern_pattern,
+                                            self._gen_vector_compare_result_file_info)
+
+    def list_npu_dump_convert_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, OFFLINE_DUMP_CONVERT_PATTERN, extern_pattern,
+                                            self._gen_npu_dump_convert_file_info)
+
+    def list_numpy_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, NUMPY_PATTERN, extern_pattern,
+                                            self._gen_numpy_file_info)
+
+    def list_h5_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, H5_PATTERN, extern_pattern,
+                                            self._gen_file_info)
+
+    def create_dir(self, path):
+        """Create dir if not exist
+        :param path: path
+        :return: bool
+        """
+        if os.path.exists(path):
+            return True
+        try:
+            os.makedirs(path, mode=0o700)
+        except OSError as err:
+            self.log.error("Failed to create %s. %s", path, str(err))
+            return False
+        return True
+
+    def clear_dir(self, path: str, pattern=''):
+        """Clear dir with pattern (file/path name match pattern will be removed)
+        :param path: path
+        :param pattern: pattern
+        :return: None
+        """
+        if not os.path.exists(path):
+            return
+        try:
+            for f in os.listdir(path):
+                if not re.match(pattern, f):
+                    continue
+                file_path = os.path.join(path, f)
+                if os.path.isfile(file_path):
+                    os.remove(file_path)
+                elif os.path.isdir(file_path):
+                    shutil.rmtree(file_path)
+        except OSError as err:
+            self.log.error("Failed to remove %s. %s", path, str(err))
+
+    @staticmethod
+    def npy_info(source_data):
+        """Get npy information
+        :param source_data: npy path
+        :return: (shape, dtype)
+        """
+        if isinstance(source_data, str):
+            if not str(source_data).endswith(NUMPY_SHUFFIX):
+                raise PrecisionToolException("Npy file [%s] is invalid" % source_data)
+            data = np.load(source_data, allow_pickle=True)
+        elif isinstance(source_data, np.ndarray):
+            data = source_data
+        else:
+            raise PrecisionToolException("Invalid source data:%s" % source_data)
+        if data.dtype == 'object':
+            raise PrecisionToolException("Invalid source data, data is object.")
+        if np.size(data) == 0:
+            raise PrecisionToolException("Empty source data:%s" % source_data)
+        return data.shape, data.dtype, data.max(), data.min(), data.mean()
+
+    @catch_tool_exception
+    def gen_npy_info_txt(self, source_data):
+        """ Generate numpy info txt.
+        :param source_data: source path or np.ndarray
+        :return: txt
+        """
+        try:
+            shape, dtype, max_data, min_data, mean = self.npy_info(source_data)
+            return '[Shape: %s] [Dtype: %s] [Max: %s] [Min: %s] [Mean: %s]' % (shape, dtype, max_data, min_data, mean)
+        except PrecisionToolException:
+            return ''
+
+    def print_npy_summary(self, path, file_name, is_convert=False, extern_content=''):
+        """Print summary of npy data
+        :param path: file path
+        :param file_name: file name
+        :param is_convert: if convert to txt file
+        :param extern_content: extern content append to the summary
+        :return: None
+        """
+        target_file = os.path.join(path, file_name)
+        if not os.path.exists(target_file):
+            raise PrecisionToolException("File [%s] not exist" % target_file)
+        data = np.load(target_file, allow_pickle=True)
+        table = self.create_table('', ['Index', 'Data'])
+        flatten_data = data.flatten()
+        for i in range(min(16, int(np.ceil(flatten_data.size / 8)))):
+            last_idx = min(flatten_data.size, i*8+8)
+            table.add_row(str(i * 8), ' '.join(flatten_data[i*8: last_idx].astype('str').tolist()))
+        summary = ['[yellow]%s[/yellow]' % self.gen_npy_info_txt(data), 'Path: %s' % target_file]
+        if is_convert:
+            summary.append('TxtFile: %s.txt' % target_file)
+        if extern_content != '':
+            summary.append('%s' % extern_content)
+        self.print_panel(self.create_columns([table, Constant.NEW_LINE.join(summary)]), file_name)
+        if is_convert:
+            self.save_npy_to_txt(data, target_file + '.txt')
+
+    def save_npy_to_txt(self, src_file, dst_file='', align=0):
+        """save numpy file to txt file.
+        default data will be aligned to the last axis of data.shape
+        :param src_file: src file name
+        :param dst_file: dst file name
+        :param align: data align
+        :return: None
+        """
+        if dst_file == '':
+            dst_file = src_file + '.txt'
+        if os.path.exists(dst_file):
+            self.log.debug("Dst file %s exists, will not save new one.", dst_file)
+            return
+        if isinstance(src_file, str):
+            data = np.load(src_file, allow_pickle=True)
+        elif isinstance(src_file, np.ndarray):
+            data = src_file
+        else:
+            raise PrecisionToolException("invalid src_file: %s", src_file)
+        if data.dtype == 'object':
+            raise PrecisionToolException("Invalid source data, data is object.")
+        shape = data.shape
+        data = data.flatten()
+        if align == 0:
+            if len(shape) == 0:
+                align = 1
+            else:
+                align = shape[-1]
+        elif data.size % align != 0:
+            pad_array = np.zeros((align - data.size % align,))
+            data = np.append(data, pad_array)
+        np.savetxt(dst_file, data.reshape((-1, align)), delimiter=' ', fmt='%g')
+
+    def read_csv(self, path):
+        """Read csv file to list.
+        :param path: csv file path
+        :return: list
+        """
+        if not str(path).endswith(CSV_SHUFFIX):
+            self.log.error("csv path [%s] is invalid", path)
+            return
+        rows = []
+        with open(path) as f:
+            csv_handle = csv.reader(f)
+            for row in csv_handle:
+                rows.append(row)
+        return rows
+
+    @staticmethod
+    def print(content):
+        rich_print(content)
+
+    @staticmethod
+    def render(content, rich=True):
+        if rich:
+            rich_print(content)
+        else:
+            print(content)
+
+    @staticmethod
+    def create_table(title, columns):
+        if Table is None:
+            raise PrecisionToolException("No rich module error.")
+        table = Table(title=title)
+        for column_name in columns:
+            table.add_column(column_name, overflow='fold')
+        return table
+
+    @staticmethod
+    def create_columns(content):
+        if Columns is None:
+            raise PrecisionToolException("No rich module error.")
+        return Columns(content)
+
+    def print_panel(self, content, title='', fit=True):
+        """ Print panel.
+        :param content: content
+        :param title: title
+        :param fit: if panel size fit the content
+        :return:Node
+        """
+        if Panel is None:
+            print(content)
+            return
+        if fit:
+            self.print(Panel.fit(content, title=title))
+        else:
+            self.print(Panel(content, title=title))
+
+    @staticmethod
+    def _detect_file(file_name, root_dir):
+        """Find file in root dir"""
+        result = []
+        for dir_path, dir_names, file_names in os.walk(root_dir, followlinks=True):
+            for name in file_names:
+                if re.match(file_name, name):
+                    result.append(os.path.join(dir_path, name))
+        return result
+
+    def _detect_file_if_not_exist(self, target_file):
+        """Find specific file in cmd root path"""
+        self.log.info("Try to auto detect file with name: %s.", target_file)
+        res = self._detect_file(target_file, cfg.CMD_ROOT_PATH)
+        if len(res) == 0:
+            raise PrecisionToolException("Cannot find any file named %s in dir %s" % (target_file, cfg.CMD_ROOT_PATH))
+        self.log.info("Detect [%s] success. %s", target_file, res)
+        return res[0]
+
+    def _get_atc(self):
+        if self.atc is None:
+            self.atc = self._detect_file_if_not_exist('^atc$')
+        return self.atc
+
+    def _get_ms_accu_cmp(self):
+        if self.ms_accu_cmp is None:
+            self.ms_accu_cmp = self._detect_file_if_not_exist(cfg.MS_ACCU_CMP)
+        return self.ms_accu_cmp
+
+    def get_newest_dir(self, path: str):
+        """Find the newest subdir in specific path, subdir should named by timestamp."""
+        if not os.path.isdir(path):
+            self.log.warning("Path [%s] not exists", path)
+            return ''
+        paths = os.listdir(path)
+        sub_paths = []
+        for p in paths:
+            if re.match(TIMESTAMP_DIR_PATTERN, p):
+                sub_paths.append(p)
+        if len(sub_paths) == 0:
+            self.log.debug("Path [%s] has no timestamp dirs.", path)
+            return ''
+        newest_sub_path = sorted(sub_paths)[-1]
+        self.log.info("Sub path num:[%d]. Dirs[%s], choose[%s]", len(sub_paths), str(sub_paths), newest_sub_path)
+        return newest_sub_path
+
+    @staticmethod
+    def _list_file_with_pattern(path, pattern, extern_pattern, gen_info_func):
+        if path is None or not os.path.exists(path):
+            raise PrecisionToolException("Path %s not exist." % path)
+        file_list = {}
+        re_pattern = re.compile(pattern)
+        for dir_path, dir_names, file_names in os.walk(path, followlinks=True):
+            for name in file_names:
+                match = re_pattern.match(name)
+                if match is None:
+                    continue
+                if extern_pattern != '' and not re.match(extern_pattern, name):
+                    continue
+                file_list[name] = gen_info_func(name, match, dir_path)
+        return file_list
+
+    @staticmethod
+    def _gen_numpy_file_info(name, math, dir_path):
+        return FileDesc(name, dir_path)
+
+    @staticmethod
+    def _gen_file_info(name, math, dir_path):
+        return FileDesc(name, dir_path)
+
+    @staticmethod
+    def _gen_build_graph_file_info(name, match, dir_path):
+        return BuildGraphFileDesc(name, dir_path, -1, int(match.group(1)), match.groups()[-1])
+
+    @staticmethod
+    def _gen_dump_file_info(name, match, dir_path):
+        return NpuDumpFileDesc(name, dir_path, int(match.groups()[-1]), op_name=match.group(2), op_type=match.group(1),
+                               task_id=int(match.group(3)), stream_id=match.group(4))
+
+    @staticmethod
+    def _gen_npu_dump_decode_file_info(name, match, dir_path):
+        return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-3]), op_name=match.group(2),
+                                  op_type=match.group(1), task_id=int(match.group(3)),
+                                  anchor_type=match.groups()[-2], anchor_idx=int(match.groups()[-1]))
+
+    @staticmethod
+    def _gen_cpu_dump_decode_file_info(name, match, dir_path):
+        return DumpDecodeFileDesc(name, dir_path, -1, op_name=match.group(1), op_type='', task_id=0,
+                                  anchor_type='output', anchor_idx=int(match.group(2)))
+
+    @staticmethod
+    def _gen_cpu_graph_files_info(name, match, dir_path):
+        return FileDesc(name, dir_path, -1)
+
+    @staticmethod
+    def _gen_overflow_debug_decode_file_info(name, match, dir_path):
+        return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-3]), op_name='Node_OpDebug', op_type='Opdebug',
+                                  task_id=int(match.group(1)), anchor_type=match.groups()[-2],
+                                  anchor_idx=int(match.groups()[-1]))
+
+    @staticmethod
+    def _gen_vector_compare_result_file_info(name, match, dir_path):
+        return FileDesc(name, dir_path, int(match.group(1)))
+
+    @staticmethod
+    def _gen_npu_dump_convert_file_info(name, match, dir_path):
+        return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-4]), op_name=match.group(2),
+                                  op_type=match.group(1), task_id=int(match.group(3)), anchor_type=match.groups()[-3],
+                                  anchor_idx=int(match.groups()[-2]))
+
+
+util = Util()
-- 
Gitee


From 73c43849e7cde2ce7493bf386d180ebc60d82813 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:44:55 +0000
Subject: [PATCH 11/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../examples/precision_tool/lib/__init__.py   |   0
 .../examples/precision_tool/lib/cpu_tvm.py    |  51 ++++
 .../precision_tool/lib/interactive_cli.py     |  87 +++++++
 .../precision_tool/lib/precision_tool.py      | 230 ++++++++++++++++++
 4 files changed, 368 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/__init__.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/cpu_tvm.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/interactive_cli.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/precision_tool.py

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/__init__.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/cpu_tvm.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/cpu_tvm.py
new file mode 100644
index 000000000..a0906071e
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/cpu_tvm.py
@@ -0,0 +1,51 @@
+import numpy as np
+from tbe import tvm
+
+
+class CpuTvm():
+    def __init__(self, json_file, dump_input_files, dump_output_files):
+        self.json_file = json_file
+        self.dump_input_files = dump_input_files
+        self.dump_output_files = dump_output_files
+        self.input_list = []
+        self.output_list = []
+
+    def _load_schedule(self):
+        with open(self.json_file, 'r') as jsonfile:
+            tvm_node = tvm.load_json(jsonfile.read())
+            self.output_list = tvm_node.op.attrs['output_list']
+            self.input_list = tvm_node.op.attrs['input_list']
+        schedule = tvm.create_schedule([res.op for res in self.output_list])
+        return schedule
+
+    def _build_tvm(self, schedule):
+        tensor_list = [ele for ele in self.input_list if ele is not None]
+        for ele in self.output_list:
+            if ele is not None:
+                tensor_list.append(ele)
+        fusion_op = tvm.build(schedule, tensor_list, "c", "llvm")
+        return fusion_op
+
+    def _load_data(self, dump_files):
+        ctx = tvm.cpu(0)
+        data_tvm = []
+        for dump_file in dump_files:
+            data_temp_numpy = np.load(dump_file)
+            data_temp_tvm = tvm.nd.array(data_temp_numpy, ctx)
+            data_tvm.append(data_temp_tvm)
+        return data_tvm
+
+    def run_cpu_tvm(self):
+        # load schedule and build tvm
+        schedule = self._load_schedule()
+        fusion_op = self._build_tvm(schedule)
+
+        #load data and run cpu tvm
+        data_tvm_in = self._load_data(self.dump_input_files)
+        data_tvm_out = self._load_data(self.dump_output_files)
+        data_tvm_in.extend(data_tvm_out)
+        fusion_op(*data_tvm_in)
+
+        #tvm format to numpy format
+        data_np_out = [data.asnumpy() for data in data_tvm_out]
+        return data_np_out
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/interactive_cli.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/interactive_cli.py
new file mode 100644
index 000000000..4e6aedd18
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/interactive_cli.py
@@ -0,0 +1,87 @@
+# coding=utf-8
+import cmd
+from .util.util import util
+from .util.constant import Constant
+from .precision_tool import PrecisionTool
+
+HEADER = r"""    ____                 _      _           ______            __
+   / __ \________  _____(_)____(_)___  ____/_  __/___  ____  / /
+  / /_/ / ___/ _ \/ ___/ / ___/ / __ \/ __ \/ / / __ \/ __ \/ /
+ / ____/ /  /  __/ /__/ (__  ) / /_/ / / / / / / /_/ / /_/ / /
+/_/   /_/   \___/\___/_/____/_/\____/_/ /_/_/  \____/\____/_/ version=%s""" % Constant.VERSION
+
+HELP_AC = "Run auto check function, use [-c] to start vector compare process.\n" \
+          "  usage: ac (-c) \n"
+HELP_RUN = "Run any shell command.\n" \
+           "  usage: (run) vim tensor_name.txt \n"
+HELP_PT = "Print npy tensor, use [-c] to convert and save to txt file.\n" \
+          "  usage: pt (-c) [tensor_name.npy] \n"
+
+
+class InteractiveCli(cmd.Cmd):
+    def __init__(self):
+        cmd.Cmd.__init__(self)
+        self.prompt = "PrecisionTool > "
+        self.precision_tool = None
+        util.print_panel(HEADER)
+        self._prepare()
+
+    def default(self, line=''):
+        util.execute_command(line)
+        return False
+
+    def _prepare(self):
+        self.precision_tool = PrecisionTool()
+        self.precision_tool.prepare()
+
+    def do_ac(self, line=''):
+        """Auto check."""
+        self.precision_tool.do_auto_check(self._parse_argv(line))
+
+    def do_run(self, line=''):
+        """Run any shell command"""
+        util.execute_command(line)
+
+    def do_ls(self, line=''):
+        """List ops: \n usage: ls (op(default)/dump) -n [op_name] -t [op_type]"""
+        argv = self._parse_argv(line)
+        if len(argv) > 0 and argv[0] == 'dump':
+            return self.precision_tool.do_list_dump(argv[1:])
+        self.precision_tool.do_list_nodes(argv)
+
+    def do_ni(self, line=''):
+        """Print node info:\n usage: ni (-n) [op_name]"""
+        self.precision_tool.do_node_info(self._parse_argv(line, '-n'))
+
+    def do_dc(self, line=''):
+        """Convert npu dump by op names:\n usage: dc (-n) [npu dump file] -f [target format]"""
+        self.precision_tool.do_convert_npu_dump(self._parse_argv(line, '-n'))
+
+    def do_vc(self, line=''):
+        """Do vector compare: \n usage: vc """
+        self.precision_tool.do_vector_compare(self._parse_argv(line))
+
+    def do_vcs(self, line=''):
+        """Do vector compare summary"""
+        self.precision_tool.do_vector_compare_summary(self._parse_argv(line))
+
+    def do_pt(self, line=''):
+        """Print data info:\n usage: pt (-n) [*.npy] (-c)\n   -c: convert and save to txt file"""
+        self.precision_tool.do_print_data(self._parse_argv(line, '-n'))
+
+    def do_cp(self, line=''):
+        """Compare two data file """
+        self.precision_tool.do_compare_data(self._parse_argv(line, '-n'))
+
+    def do_train(self, line=''):
+        """Train process:\n usage: train -d all -a dump"""
+        self.precision_tool.do_train_analysis(self._parse_argv(line))
+
+    @staticmethod
+    def _parse_argv(line, insert=None):
+        argv = line.split() if line != '' else []
+        if '-h' in argv:
+            return argv
+        if insert is not None and len(argv) > 0 and argv[0] != insert:
+            argv.insert(0, insert)
+        return argv
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/precision_tool.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/precision_tool.py
new file mode 100644
index 000000000..d118b86bc
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/precision_tool.py
@@ -0,0 +1,230 @@
+import argparse
+import os
+import time
+
+from .adapter.overflow import Overflow
+from .dump.dump_manager import DumpManager
+from .graph.graph_manager import GraphManager
+from .compare.compare import Compare
+from .adapter.fusion import Fusion
+from .train.train_analysis import TrainAnalysis
+from .util.util import util
+from .util.constant import Constant
+from .config import config as cfg
+from .util.precision_tool_exception import PrecisionToolException
+from .util.precision_tool_exception import catch_tool_exception
+
+
+class PrecisionTool(object):
+    def __init__(self):
+        """init"""
+        self.graph_manager = GraphManager()
+        self.overflow = Overflow()
+        self.dump_manager = DumpManager()
+        self.compare = Compare()
+        self.fusion = Fusion()
+        self.train_analysis = TrainAnalysis()
+        self.log = util.get_log()
+
+    @catch_tool_exception
+    def prepare(self):
+        """prepare"""
+        util.create_dir(cfg.DATA_ROOT_DIR)
+        self.graph_manager.prepare()
+        self.dump_manager.prepare()
+        self.overflow.prepare()
+        self.fusion.prepare()
+        self.compare.prepare()
+
+    @catch_tool_exception
+    def do_auto_check(self, argv):
+        """Auto check"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-c', '--vector_compare', dest='vector_compare', help='Run vector compare process',
+                            action='store_true')
+        parser.add_argument('-l', '--limit', dest='limit', type=int, help='limit', default=3)
+        args = parser.parse_args(argv)
+        # vector compare
+        if args.vector_compare:
+            self.do_vector_compare()
+        self.do_vector_compare_summary()
+        self.do_check_fusion()
+        self.do_check_overflow(args.limit)
+        self.do_check_cast()
+        self.do_check_graph_similarity()
+
+    @catch_tool_exception
+    def do_check_overflow(self, limit=3):
+        """check overflow"""
+        self.overflow.check(limit)
+
+    @catch_tool_exception
+    def do_check_cast(self):
+        self.graph_manager.check_cast()
+
+    @catch_tool_exception
+    def do_check_dtype(self):
+        """Check input/output dtype"""
+        self.graph_manager.check_dtype()
+
+    @catch_tool_exception
+    def do_check_fusion(self):
+        """print fusion info summary"""
+        self.fusion.check()
+
+    @catch_tool_exception
+    def do_check_graph_similarity(self):
+        self.graph_manager.check_similarity()
+
+    @catch_tool_exception
+    def do_vector_compare(self, argv=None):
+        """do vector compare"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-lt', '--left', dest='lt', default=None, help='left path(npu dump path)')
+        parser.add_argument('-rt', '--right', dest='rt', default=None, help='right path(cpu/npu dump path)')
+        parser.add_argument('-g', '--graph', dest='graph', required=False, default=None, help='graph json file')
+        args = parser.parse_args() if argv is None else parser.parse_args(argv)
+        # 1. compare npu_debug0 - tf  dump data (auto)
+        # 2. compare npu_debug0 - npu_debug1 dump data
+        # 3. compare dir - dir dump data
+        result_dir = os.path.join(cfg.VECTOR_COMPARE_PATH, time.strftime("%Y%m%d%H%M%S", time.localtime()))
+        if args.lt is None:
+            debug_0_dump_root = self.dump_manager.get_dump_root_dir(Constant.DEFAULT_DEBUG_ID)
+            if util.empty_dir(debug_0_dump_root):
+                raise PrecisionToolException("NPU debug_0 dump dir is empty, no files to compare.")
+            if not util.empty_dir(cfg.TF_DUMP_DIR):
+                self.log.info("Tf dump dir is not empty, will compare npu dump data with tf dump data.")
+                self.compare.npu_tf_vector_compare(self.graph_manager.get_graphs(Constant.DEFAULT_DEBUG_ID),
+                                                   debug_0_dump_root, cfg.TF_DUMP_DIR, result_dir)
+            else:
+                self.log.warning("Tf dump dir is empty, maybe run [python3 precision_tool/cli.py tf_dump] to decode"
+                                 " tf debug data.")
+            debug_1_dump_root = self.dump_manager.get_dump_root_dir(Constant.NPU_DEBUG_ID_1)
+            if debug_1_dump_root is not None and not util.empty_dir(debug_1_dump_root):
+                self.log.info("NPU debug_1 dump dir is not empty, will compare two npu dump data.")
+                self.compare.npu_vector_compare(debug_0_dump_root, debug_1_dump_root)
+        else:
+            lh_path = args.lt
+            rh_path = args.rt
+            graph_json = args.graph
+            self.compare.vector_compare(lh_path, rh_path, result_dir, graph_json)
+        self.compare.vector_summary(result_dir)
+
+    @catch_tool_exception
+    def do_vector_compare_summary(self, argv=None):
+        parser = argparse.ArgumentParser(description="show vector compare result summary.")
+        parser.add_argument('-f', '--file', dest='file', default=None, required=False, help='compare_result file/path')
+        parser.add_argument('-c', '--cos_sim', dest='cos_sim', type=float, help='cos_sim_threshold', default=0.98)
+        parser.add_argument('-l', '--limit', dest='limit', type=int, help='limit', default=3)
+        args = parser.parse_args() if argv is None else parser.parse_args(argv)
+        error_ops = self.compare.vector_summary(args.file, args.cos_sim, args.limit)
+        # parse error_ops
+
+    @catch_tool_exception
+    def do_print_data(self, argv=None):
+        """print tensor data"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-n', '--name', dest='name', default='', help='list by op name')
+        args = parser.parse_args() if argv is None else parser.parse_args(argv)
+        self.dump_manager.print_tensor(args.name, True)
+
+    @catch_tool_exception
+    def do_list_nodes(self, argv):
+        """list op nodes in graph"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-t', '--type', dest='type', default='', help='list by op type')
+        parser.add_argument('-n', '--name', dest='name', default='', help='list by op name')
+        parser.add_argument('-f', '--fusion', dest='fusion', default='', help='list by op fusion pass')
+        parser.add_argument('-k', '--kernel_name', dest='kernel_name', default='', help='list by op kernel_name')
+        args = parser.parse_args(argv)
+        self.graph_manager.print_op_list(args.type, args.name, args.fusion, args.kernel_name)
+
+    @catch_tool_exception
+    def do_node_info(self, argv):
+        """Print op node info"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-n', '--name', dest='name', default='', help='op name')
+        parser.add_argument('-g', '--graph', dest='graph', help='graph name')
+        parser.add_argument('-a', '--attr', dest='attr', action='store_true', help='show all attr info')
+        parser.add_argument('-c', '--check', dest='check ', action='store_true', help='check single op precision')
+        parser.add_argument('-s', '--save', dest='save', type=int, default=0,
+                            help='save subgraph, param gives the deep of subgraph')
+        args = parser.parse_args(argv)
+        # print graph op info
+        npu_ops, _ = self.graph_manager.get_ops(args.name, args.graph)
+        npu_op_summary, tf_op_summary = self.graph_manager.op_graph_summary(npu_ops, args.attr)
+        npu_dump_summary, tf_dump_summary = self.dump_manager.op_dump_summary(npu_ops)
+        pt_dump_summary = self.dump_manager.pt_dump_summary(args.name)
+        # merge graph/dump/compare info
+        for debug_id, graph_summary in npu_op_summary.items():
+            for graph_name, summary_detail in graph_summary.items():
+                summary_txt = [summary_detail]
+                if debug_id in npu_dump_summary and graph_name in npu_dump_summary[debug_id]:
+                    summary_txt.append(npu_dump_summary[debug_id][graph_name])
+                if tf_dump_summary is not None:
+                    summary_txt.append(tf_dump_summary)
+                title = "[green](%s)[/green] %s" % (debug_id, graph_name)
+                util.print_panel(Constant.NEW_LINE.join(summary_txt), title)
+        if pt_dump_summary != '':
+            util.print_panel(pt_dump_summary, args.name)
+        if args.save != 0:
+            self.graph_manager.save_sub_graph(npu_ops, args.save)
+
+    @catch_tool_exception
+    def do_compare_data(self, argv):
+        """compare two tensor"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-n', '--name', dest='names', type=str, default=[], help='op name', nargs='+')
+        parser.add_argument('-p', '--print', dest='count', default=20, type=int, help='print err data num')
+        parser.add_argument('-s', '--save', dest='save', action='store_true', help='save data in txt format')
+        parser.add_argument('-al', '--atol', dest='atol', default=0.001, type=float, help='set rtol')
+        parser.add_argument('-rl', '--rtol', dest='rtol', default=0.001, type=float, help='set atol')
+        args = parser.parse_args(argv)
+        if len(args.names) != 2:
+            self.log.error("compare files should be 2.")
+        else:
+            self.compare.compare_data(args.names[0], args.names[1], args.save, args.rtol, args.atol, args.count)
+
+    @catch_tool_exception
+    def do_list_dump(self, argv):
+        """List dump files"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-t', '--type', dest='type', help='')
+        parser.add_argument('-n', '--name', dest='name')
+        self.dump_manager.list_dump(argv.dir, argv.name)
+
+    @catch_tool_exception
+    def do_convert_npu_dump(self, argv):
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-n', '--name', dest='name', help='op name')
+        parser.add_argument('-f', '--format', dest='format', default=None, required=False, help='target format')
+        parser.add_argument('-o', '--output', dest='output', required=False, default=None, help='output path')
+        args = parser.parse_args(argv)
+        self.dump_manager.convert_npu_dump(args.name, args.format, args.output)
+
+    @catch_tool_exception
+    def do_convert_all_npu_dump(self):
+        self.dump_manager.decode_all_npu_dump()
+
+    @catch_tool_exception
+    def check_graph_similarity(self):
+        """ Check graph similarity """
+
+    @catch_tool_exception
+    def do_train_analysis(self, argv):
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-d', '--device', dest='device', default='all', required=False,
+                            help='train device, support cpu/npu/all')
+        parser.add_argument('-a', '--action', dest='action', default='dump', required=False,
+                            help='action, support dump(-d cpu/npu)[overflow]|fusion_off|fusion_switch(npu)')
+        args = parser.parse_args(argv)
+        self.train_analysis.run(args.device, args.action)
+
+    def single_cmd(self, argv):
+        cmd_func_map = {'compare': self.do_compare_data,
+                        'vector_compare': self.do_vector_compare,
+                        'train': self.do_train_analysis}
+        if argv[1] in cmd_func_map:
+            func = cmd_func_map[argv[1]]
+            return func(argv[2:])
+        raise PrecisionToolException("cmd %s is not supported or cmd should be run in interactive mode." % argv[1])
-- 
Gitee


From 2fdf42531701b1ff7fca425d2af3fc9627cefb80 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 05:45:25 +0000
Subject: [PATCH 12/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../examples/precision_tool/README.md         | 518 ++++++++++++++++++
 .../examples/precision_tool/RELEASE.md        |  66 +++
 .../examples/precision_tool/__init__.py       |   0
 .../examples/precision_tool/caffe_dump.py     | 132 +++++
 .../examples/precision_tool/cli.py            | 129 +++++
 .../examples/precision_tool/tf_config.py      | 118 ++++
 .../examples/precision_tool/tf_session.py     |  60 ++
 7 files changed, 1023 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/README.md
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/RELEASE.md
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/__init__.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/caffe_dump.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/cli.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_config.py
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_session.py

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/README.md b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/README.md
new file mode 100644
index 000000000..3f291afff
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/README.md
@@ -0,0 +1,518 @@
+# 精度问题分析工具
+
+## 功能介绍
+### 简介
+该工具包提供了精度比对常用的功能，当前该工具主要适配Tensorflow训练场景
+
+对于常用功能基本可以做到一键操作，同时提供Dump数据/图信息的交互式查询和操作入口
+
+推理场景可直接使用[推理一键式全流程精度比对](https://gitee.com/ascend/tools/tree/master/msquickcmp) 工具
+### 主要功能
+#### 已完成功能
+1. 简化脚本修改【手动/半自动】
+2. TF标杆数据生成【自动/半自动】
+3. 算子溢出检测分析【自动】
+4. 开启GE图Dump和图解析【自动】
+5. 开启数据Dump并进行全网比对【自动】
+6. 查询算子列表/节点信息【手动】
+7. 查询/解析Dump数据信息【手动】
+8. 数据比对【手动】
+### 工具获取
+1. 下载压缩包的方式获取
+   将https://gitee.com/ascend/tools 以压缩包形式下载
+2. 使用git命令方式获取
+3. 移动 tools/precision_tool 子目录至训练工作目录
+### 安装python3三方依赖
+```shell
+pip3 install rich gnureadline pexpect graphviz
+# ubuntu/Debian
+sudo apt-get install graphviz
+# fedora/Centos
+sudo yum install graphviz
+```
+### 工具执行依赖
+* 一般直接在NPU训练环境上部署该脚本，环境上能够正常执行CPU和NPU训练脚本
+* 如果需要进行数据Dump比对，则需要先检查并去除训练脚本内部使用到的随机处理，避免由于输入数据不一致导致数据比对结果不可用
+    ```python
+    # 对于使用tf.random / np.random / (python) random的可以通过固定随机种子的方式固定输入
+    # import tf_config.py 默认会设置上述三种random的seed，但由于import位置关系，可能不一定能作用到所有的关联代码，建议在代码确认合适位置手动嵌入
+    seed =987654
+    random.seed(seed)
+    tf.random.set_random_seed(seed)
+    np.random.seed(seed)
+  
+    # RunConfig/NPURunConfig中设置tf_random_seed固定网络随机因子
+    # Estimator中tf.random设置的随机种子并不能全局生效
+    # 需要使用下面的方式进行设置
+    run_config = tf.estimator.RunConfig(tf_random_seed=1, ...)
+    run_config = NPURunConfig(tf_random_seed=1, ...)
+    ```
+ * **理论上网络中的大多数随机均能通过上面的方式固定, 一般不需要再做下面的这些操作**
+    ```python
+    # 1. 参数初始化中的随机操作
+    #    加载checkpoint的方式能够固定大多数初始参数
+    saver.restore(sess, saver_dir)
+    
+    # 2. 输入数据的随机操作（例如对输入数据做shuffle操作）
+    dataset = tf.data.TFRecordDataset(tf_data)
+    dataset = dataset.shuffle(batch_size*10)    # 直接注释掉该行
+    
+    # 3. 模型中的随机操作（例如使用dropout）
+    net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') # 建议注释该行
+    
+    # 4. 图像预处理使用的随机操作(根据实际情况固定随机种子，或者替换成其他固定的预处理操作)
+    # 4.1 Random rotate
+    random_angle = tf.random_uniform([], - self.degree * 3.141592 / 180, self.degree * 3.141592 / 180)
+    image = tf.contrib.image.rotate(image, random_angle, interpolation='BILINEAR')
+    depth_gt = tf.contrib.image.rotate(depth_gt, random_angle, interpolation='NEAREST')
+  
+    # 4.2 Random flipping
+    do_flip = tf.random_uniform([], 0, 1)
+    image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(image), lambda: image)
+    depth_gt = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(depth_gt), lambda: depth_gt)
+    
+    # 4.3 Random crop
+    mage_depth = tf.concat([image, depth_gt], 2)
+    image_depth_cropped = tf.random_crop(image_depth, [self.params.height, self.params.width, 4])
+  
+    # 其他......
+    ```
+* 该工具基于**NPU的计算图**，**NPU的DUMP数据**，**NPU的溢出检测数据**，**TF的计算图meta文件**，**TF的DUMP数据**进行数据解析和分析。
+这几类依赖数据可以通过以下方式获取（只使用部分工具功能并不需要提前获取所有依赖数据）：
+#### 1. NPU的计算图获取
+   ```
+     注意：NPU的Dump数据和计算图存在一定的对应关系，需要同时获取 
+          避免在自定义的训练脚本中unset DUMP GRAPH相关的环境变量
+   ```
+* 【推荐】方法一：配置2、3依赖中的NPU数据Dump或者overflow检测功能，将自动配置上Dump GE图的环境变量
+
+* 【不推荐】方法二：参考迁移指导中的修改配置，执行NPU脚本，并将获取到的图转存至precision_data图目录
+   ```shell
+   export DUMP_GE_GRAPH=2
+   export DUMP_GRAPH_LEVEL=3
+   export DUMP_GRAPH_PATH=./precision_data/npu/debug_0/graph
+   # 未配置DUMP_GRAPH_PATH时，图文件将保存在脚本执行目录，可以直接转存至precision_data目录
+   mkdir -p ./precision_data/npu/debug_0/graph && mv ge_proto_*.txt ./precision_data/npu/debug_0/graph
+   ```
+#### 2. NPU的DUMP数据获取
+* 【推荐】方法一：在训练脚本中**import precision_tool.tf_config**，并使用precision_tool中提供的辅助命令行执行训练脚本 
+    ``` python
+    # NPU的DUMP获取和溢出检测数据的获取，均可按如下方式修改代码
+    # 注意：参数action可以设置为'dump'或'overflow'
+    # 引用 precision_tool/tf_config.py
+    import precision_tool.tf_config as npu_tf_config
+    
+    # 如果使用的是Estimator的NPURunConfig配置使能NPU，则可以参考以下修改
+    dump_config = npu_tf_config.estimator_dump_config(action='dump') # 新增行
+    npu_config = NPURunConfig(dump_config=dump_config)
+  
+    # 如果使用的是session.run或者使用tf.ConfigProto创建session_config传入tf.estimator.RunConfig的方式使能npu
+    # 可以参考如下修改
+    session_config = npu_tf_config.session_dump_config(session_config, action='dump') # 新增行
+    # tf.estimator
+    run_config = tf.estimator.RunConfig(session_config=session_config,...)
+    # tf.keras
+    npu_keras_sess = set_keras_session_npu_config(config=session_config)
+    # session run
+    with tf.Session(config=npu_config_proto(session_config)):
+        ......
+    
+    # 如果使用的是custom_op方式，则可以参考以下修改
+    config = tf.ConfigProto()
+    custom_op =  config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name =  "NpuOptimizer"
+    custom_op.parameter_map["use_off_line"].b = True
+    custom_op = npu_tf_config.update_custom_op(custom_op, action='dump')   # 新增行
+    ```
+
+* 【不推荐】方法二：参考[精度比对工具使用指南](https://www.hiascend.com/document?tag=community-developer) 修改训练脚本。
+   执行训练脚本，并将dump的数据拷贝到【precision_data/dump/npu/】目录
+#### 3. NPU的溢出检测数据的获取（缺少该数据将无法展示溢出检测结果）
+* 【推荐】方法一：在训练脚本中**import precision_tool.tf_config**，并按【2. NPU的DUMP数据获取】中修改训练代码，使用precision_tool中提供的辅助命令行执行训练脚本
+    ```python
+    # 需要将action设置成'overflow'
+    # 引用 precision_tool/tf_config.py
+    import precision_tool.tf_config as npu_tf_config
+    dump_config = npu_tf_config.estimator_dump_config(action='overflow') # 新增行
+    ```
+* 【不推荐】方法二：参考[使用溢出检测工具分析算子溢出](https://www.hiascend.com/document?tag=community-developer) 修改训练脚本，
+   并将溢出数据拷贝至【precision_tool/dump/overflow/】目录
+
+#### 4. TF的DUMP数据获取（缺少该数据无法使用数据比对功能）(适用于TF 1.15， TF2.x参考tfdbg_ascend)
+* 【推荐】方法一：在CPU/GPU训练脚本中添加tf_debug代码，并使用precision_tool中提供的辅助命令行工具生成标杆DUMP数据
+   ```python
+    import precision_tool.tf_config as npu_tf_config
+    
+    # 如果使用的是Estimator,EstimatorSpec加入training_hooks
+    estim_specs = tf.estimator.EstimatorSpec(training_hooks=[npu_tf_config.estimator_dump()])    
+    
+    # 如果使用的session.run，以下代码在为sess加上了tf_debug的wrapper
+    sess = npu_tf_config.sess_dump(sess=sess)
+   ```
+   ```shell
+   # 1. 执行脚本
+   # 2. 解析tf debug dump文件，生成算子输出tensor文件
+   # 注意：TF dump数据的原理是使用tf_debug的print_tensor(pt)命令实现的，由于训练代码提供了非常灵活的run()接口，
+   #      脚本无法感知用户需要dump的tensor在哪个run阶段，因此需要用户修改训练代码，在执行完正确的run后，立即退出。
+   #      例如，修改代码只执行一个step的训练，根据代码中run的次数，会获取到1~N个离线tf_debug的dump目录
+   #      precision_tool脚本会自动提取最后一个run阶段中出现的所有tensor作为标杆数据。
+   python3.7.5 precision_tool/cli.py tf_dump
+   
+   # 在precision_data/tf/dump/ 目录会存放提取的tensor
+   # 如果获取tensor不符合预期，可以检查下precision_data/dump/cpu_debug/目录, 只保留预期run阶段的tf_debug离线数据
+   # 执行以下命令重新生成
+   rm -rf precision_data/tf/dump/* && python3.7.5 precision_tool/cli.py tf_dump
+   ```
+* 【不推荐】方法二：参考[准备基于GPU/CPU运行生成的npy数据](https://www.hiascend.com/document?tag=community-developer)
+   获取CPU/GPU的TF数据，并拷贝至【precision/dump/cpu/】目录
+#### 5. TF计算图Meta文件的获取（可选）
+* 通过saver保存ckpt获取
+    ```python
+    # 修改CPU/NPU脚本
+    with tf.Session() as sess:
+       # do session.run()
+       saver = tf.train.Saver()
+       # 保存ckpt
+       saver.save(sess, saver_dir)
+    ```
+#### 6. 关闭NPU的融合功能（根据情况启用）
+* NPU会对计算图中的算子进行融合，以提高网络性能，由于大多数融合是自动识别的，可能存在未考虑到的场景，导致精度问题，
+  因此，可以尝试关闭融合定界网络问题是否是由于融合导致。
+  ```python
+    # 关闭融合可以和溢出检测/数据Dump同时进行，启用方法也类似
+    # 引用 precision_tool/tf_config.py
+    import precision_tool.tf_config as npu_tf_config
+    
+    # 如果使用的是Estimator的NPURunConfig配置使能NPU，则可以参考以下修改
+    npu_config = NPURunConfig(fusion_switch_file=npu_tf_config.FUSION_OFF_FILE) # 修改行
+    # 如果需要关闭指定的融合规则，则可以修改precision_tool/fusion_switch.cfg, 并参考如下修改
+    npu_config = NPURunConfig(fusion_switch_file=npu_tf_config.FUSION_SWITCH_FILE) # 关闭特定融合修改行
+  
+    # 如果使用的是session.run或者使用tf.ConfigProto创建session_config传入tf.estimator.RunConfig的方式使能npu
+    # 可以参考如下修改(数据Dump和关闭融合同时使能)
+    session_config = npu_tf_config.session_dump_config(session_config, action='dump|fusion_off') # 新增行
+    session_config = npu_tf_config.session_dump_config(session_config, action='dump|fusion_switch') # 关闭特定融合新增行
+    # tf.estimator
+    run_config = tf.estimator.RunConfig(session_config=session_config,...)
+    # tf.keras
+    npu_keras_sess = set_keras_session_npu_config(config=session_config)
+    # session run
+    with tf.Session(config=npu_config_proto(session_config)):
+        ......
+    # 如果有custom_op,也可以直接使用下面的方式配置
+    custom_op = npu_tf_config.update_custom_op(custom_op=custom_op, action='dump | fusion_off')
+  ```
+## 使用说明
+1.  配置文件precision_tool/config.py（正常默认即可）
+    ```python
+    # 如果需要dump特定曾的数据，则可以修改以下配置项
+    # 一般对比分析dump首层即可
+    # Dump config '0|5|10'
+    TF_DUMP_STEP = '0'
+    
+    # 融合开关配置，可以再该配置文件中配置融合开关的开启和关闭，使用方法参考以下文档：
+    # https://support.huaweicloud.com/tensorflowdevg-cann330alphaXtraining/atlastfadapi_07_0005.html
+    FUSION_SWITCH_FILE = './precision_tool/fusion_switch.cfg'
+    
+    # 依赖run包中的atc和msaccucmp.pyc工具，一般在run包安装目录，配置到父目录即可
+    # 默认run包安装在/usr/local/Ascend，可以不用修改。指定目录安装则需要修改
+    # parent dir path of msaccucmp.pyc and atc, usually run package dir
+    CMD_ROOT_PATH = '/usr/local/'
+    
+    # ASCEND Log Path
+    ASCEND_LOG_PATH = '/root/ascend/log/plog/'
+    
+    # 日志级别及数据分析目录设置
+    # TOOL CONFIG
+    LOG_LEVEL = "NOTSET"
+    # ModelArts场景下，可以根据情况将数据跟目录修改成自定义目录，并在完成后完整下载该目录
+    ROOT_DIR = './'
+    ```
+2. 启动脚本（交互命令行）
+    ```shell
+    python3 ./precision_tool/cli.py 
+    ```
+   
+### 交互模式命令
+1. ac -l [limit_num] -c
+    ```shell
+    # auto check. 自动化检测命令
+    # 列出Fusion信息;解析算子溢出信息;
+    # -c 可选，进行全网比对
+    # -l 可选，限制输出结果的条数（overflow解析的条数等）
+    PrecisionTool > ac -c
+   ╭──────────────────────────────────────────────────────────────────────────────────────────────────╮
+   │ [TransData][327] trans_TransData_1170                                                            │
+   │  - [AI Core][Status:32][TaskId:327] ['浮点计算有溢出']                                           │
+   │  - First overflow file timestamp [1619347786532995] -                                            │
+   │  |- TransData.trans_TransData_1170.327.1619347786532995.input.0.npy                              │
+   │   |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.11950836181640626] │
+   │  |- TransData.trans_TransData_1170.327.1619347786532995.output.0.npy                             │
+   │   |- [Shape: (32, 20, 8, 8, 16)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.07781982421875] │
+   ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
+    ```
+2. run [command]
+    ```shell
+    # 不退出交互命令环境执行shell命令，与内置命令不冲突的可以直接执行，否则需要加run前缀
+    PrecisionTool > run vim cli.py
+    PrecisionTool > vim cli.py
+    ```
+
+3. ls -n [op_name] -t [op_type] -f [fusion_pass] -k [kernel_name]
+    ```shell
+    # 通过[算子名]/[算子类型]查询网络里的算子，模糊匹配
+    # -n 算子节点名称
+    # -t  算子类型
+    # -f 融合类型
+    # -k kernel_name
+    PrecisionTool > ls -t Mul -n mul_3 -f TbeMulti
+   [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5b/Branch_1/mul_3
+   [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5c/Branch_1/mul_3
+   [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5d/Branch_1/mul_3
+   [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_6b/Branch_1/mul_3
+    ```
+
+4. ni (-n) [op_name] -s [save sub graph deep]
+    ```shell
+    # 通过[算子名]查询算子节点信息
+    # -n 指定节点名称
+    # -g graph名
+    # -a 显示attr信息
+    # -s 保存一个以当前算子节点为根，深度为参数值的子图
+   PrecisionTool >  ni gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual -s 3
+   ╭─────────────────── [GreaterEqual]gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual ────────────────────╮
+   │ [GreaterEqual] gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual                                       │
+   │ Input:                                                                                                                               │
+   │  -[0][DT_FLOAT][NHWC][32, 8, 8, 320] InceptionV3/InceptionV3/Mixed_7a/Branch_0/add_3:0                                               │
+   │  -[1][DT_FLOAT][NHWC][1, 8, 1, 1] InceptionV3/Mixed_7a/Branch_0/Conv2d_1a_3x3tau:0                                                   │
+   │  -[2][][[]][] atomic_addr_clean0_21:-1                                                                                               │
+   │ Output:                                                                                                                              │
+   │  -[0][DT_BOOL][NHWC][32, 8, 8, 320] ['trans_TransData_1170']                                                                         │
+   │ NpuDumpInput:                                                                                                                        │
+   │  -[0] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.input.0.npy  │
+   │   |- [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.846897] [Min: -8.368301] [Mean: -0.72565556]                                  │
+   │  -[1] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.input.1.npy  │
+   │   |- [Shape: (1, 8, 1, 1)] [Dtype: float32] [Max: 0.0] [Min: 0.0] [Mean: 0.0]                                                        │
+   │ NpuDumpOutput:                                                                                                                       │
+   │  -[0] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.output.0.npy │
+   │   |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.1176300048828125]                                      │
+   │ CpuDumpOutput:                                                                                                                       │
+   │  -[0] gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.0.1619492699305998.npy                         │
+   │   |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.11764373779296874]                                     │
+   ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+   2021-04-27 14:39:55 (15178) -[DEBUG]write 14953 bytes to './precision_data/dump/temp/op_graph/GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.3.gv'
+   2021-04-27 14:39:55 (15178) -[INFO]Sub graph saved to /root/sym/inception/precision_data/dump/temp/op_graph
+   ```
+   
+5. pt (-n) [*.npy] 
+    ```shell
+    # 查看某个dump数据块的数据信息
+    # -n 可选，含义是待查看的数据文件名
+    # 默认会将数据保存成 txt
+    PrecisionTool > pt TransData.trans_TransData_1170.327.1619347786532995.input.0.npy 
+   ╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+   │ Shape: (32, 8, 8, 320)                                                                                                  │
+   │ Dtype: bool                                                                                                             │
+   │ Max: True                                                                                                               │
+   │ Min: False                                                                                                              │
+   │ Mean: 0.11950836181640626                                                                                               │
+   │ Path: ./precision_data/dump/temp/overflow_decode/TransData.trans_TransData_1170.327.1619347786532995.input.0.npy        │
+   │ TxtFile: ./precision_data/dump/temp/overflow_decode/TransData.trans_TransData_1170.327.1619347786532995.input.0.npy.txt │
+   ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+    ```
+
+6. cp (-n) [left *.npy] [right *.npy] -p [print num] -al [atol] -rl [rtol]
+    ```shell
+    # 对比两个tensor的数据
+    # -n 指定需要对比的两个numpy名
+    # -p 指定输出的错误数据的个数及前多少个数据
+    # -al/rl 指定相对误差的参数,在两个场景中用到
+    # -s 保存成txt文件，默认打开
+    #   1. np.allclose(left, right, atol=al, rtol=rl)
+    #   2. err_cnt += 1 if abs(data_left[i] - data_right[i]) > (al + rl * abs(data_right[i]))
+    PrecisionTool > cp Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy -p 10 -s -al 0.002 -rl 0.005
+                      Error Item Table                                        Top Item Table
+   ┏━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ ┏━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+   ┃ Index ┃ Left          ┃ Right        ┃ Diff         ┃ ┃ Index ┃ Left        ┃ Right       ┃ Diff          ┃
+   ┡━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ ┡━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+   │ 155   │ 0.024600908   │ 0.022271132  │ 0.002329776  │ │ 0     │ -0.9206961  │ -0.9222216  │ 0.0015255213  │
+   │ 247   │ 0.015752593   │ 0.017937578  │ 0.0021849852 │ │ 1     │ -0.6416973  │ -0.64051837 │ 0.0011789203  │
+   │ 282   │ -0.0101207765 │ -0.007852031 │ 0.0022687456 │ │ 2     │ -0.35383835 │ -0.35433492 │ 0.0004965663  │
+   │ 292   │ 0.019581757   │ 0.02240482   │ 0.0028230622 │ │ 3     │ -0.18851271 │ -0.18883198 │ 0.00031927228 │
+   │ 640   │ -0.06593232   │ -0.06874806  │ 0.0028157383 │ │ 4     │ -0.43508735 │ -0.43534422 │ 0.00025686622 │
+   │ 1420  │ 0.09293677    │ 0.09586689   │ 0.0029301196 │ │ 5     │ 1.4447614   │ 1.4466647   │ 0.0019032955  │
+   │ 1462  │ -0.085207745  │ -0.088047795 │ 0.0028400496 │ │ 6     │ -0.3455438  │ -0.3444429  │ 0.0011008978  │
+   │ 1891  │ -0.03433288   │ -0.036525503 │ 0.002192624  │ │ 7     │ -0.6560242  │ -0.6564579  │ 0.0004336834  │
+   │ 2033  │ 0.06828873    │ 0.07139922   │ 0.0031104907 │ │ 8     │ -2.6964858  │ -2.6975214  │ 0.0010356903  │
+   │ 2246  │ -0.06376442   │ -0.06121233  │ 0.002552092  │ │ 9     │ -0.73746175 │ -0.73650354 │ 0.00095820427 │
+   └───────┴───────────────┴──────────────┴──────────────┘ └───────┴─────────────┴─────────────┴───────────────┘
+   ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+   │ Left:                                                                                                                                    │
+   │  |- NpyFile: ./precision_data/dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy     │
+   │  |- TxtFile: ./precision_data/dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy.txt │
+   │  |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.846897] [Min: -8.368301] [Mean: -0.72565556]                              │
+   │ DstFile:                                                                                                                                 │
+   │  |- NpyFile: ./precision_data/dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy                            │
+   │  |- TxtFile: ./precision_data/dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy.txt                        │
+   │  |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.8425903] [Min: -8.374472] [Mean: -0.7256237]                              │
+   │ NumCnt:   655360                                                                                                                         │
+   │ AllClose: False                                                                                                                          │
+   │ CosSim:   0.99999493                                                                                                                     │
+   │ ErrorPer: 0.023504638671875  (rl= 0.005, al= 0.002)                                                                                      │
+   ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+    ```
+
+7. vc -lt [left_path] -rt [right_path] -g [graph]
+   ```python
+    # 用于手动指定两个目录进行整网对比
+    # -lt 必选，其中一个文件目录
+    # -rt 必选，另一个目录，一般是标杆目录 
+    # -g 可选，指定-g将尝试解析graph内的映射关系比对（一般用于NPU和TF之间的数据比对， NPU与NPU之间比对不需要，直接按照算子name对比）
+    # 需要指定到dump数据所在的目录层级，precision_data/npu/debug_0/dump/20220217095546/3/ge_default_20220217095547_1/1/0/
+   ```
+8. vcs -f [file_name] -c [cos_sim_threshold] -l [limit]
+   ```python
+    # 查看精度比对结果的概要信息，可以更加预先相似的阈值过滤出低于阈值的算子/信息
+    # -f (--file) 可选，指定csv文件，不设置则默认遍历precision_data/temp/vector_compare/目录下最近产生的对比目录内的所有csv
+    # -c (--cos_sim) 可选，指定筛选所使用的预先相似度阈值，默认0.98
+    # -l (--limit) 可选，指定输出前多少个结果，默认值3
+    PrecisionTool > vcs -c 0.98 -l 2
+    2021-05-31 14:48:56 (2344298) -[INFO]Sub path num:[1]. Dirs[['20210529145750']], choose[20210529145750]
+    2021-05-31 14:48:56 (2344298) -[DEBUG]Find ['result_20210529145751.csv', 'result_20210529145836.csv', 'result_20210529145837.csv', 'result_20210529145849.csv', 'result_20210529150404.csv', 'result_20210529151102.csv'] result files in dir precision_data/temp/vector_compare/20210529145750
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145751.csv
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145836.csv
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 1 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145837.csv
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 2 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145849.csv
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 2 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529150404.csv
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529151102.csv
+    ╭── [578] pixel_cls_loss/cond_1/TopKV2 ───╮
+    │ Left:  ['pixel_cls_loss/cond_1/TopKV2'] │
+    │ Right: ['pixel_cls_loss/cond_1/TopKV2'] │
+    │ Input:                                  │
+    │  - [0]1.0        - [1]nan               │
+    │ Output:                                 │
+    │  - [0]0.999999   - [1]0.978459          │
+    ╰─────────────────────────────────────────╯
+    ╭── [490] gradients/AddN_5 ───╮
+    │ Left:  ['gradients/AddN_5'] │
+    │ Right: ['gradients/AddN_5'] │
+    │ Input:                      │
+    │  - [0]nan        - [1]1.0   │
+    │ Output:                     │
+    │  - [0]0.05469               │
+    ╰─────────────────────────────╯
+   ```
+### Precision_data目录结构
+```
+precision_data/
+├── npu
+│   ├── debug_0
+|   |   ├── dump
+|   |       └── 20210510101133
+|   │   └── graph
+|   |       └── ge_proto_00000179_PreRunAfterBuild.txt
+│   └── debug_1
+├── tf
+|   ├── tf_debug
+|   └── dump
+├── overflow
+├── fusion
+└── temp
+    ├── op_graph
+    ├── decode
+    |   ├── dump_decode
+    |   ├── overflow_decode
+    |   └── dump_convert
+    └── vector_compare
+        ├── 20210510101133
+        |   ├── result_123456.csv
+        |   └── result_123455.csv
+        └── 20210510101134
+            └── result_123458.csv
+```
+### 配合msquickcmp一键式推理精度比对工具使用
+- msquickcmp会将中间dump数据和图自动保存在一个时间戳命名的目录内, 可以使用precision_tool工具直接对该目录进行分析
+```python
+output-path/timestamp
+├── dump_data
+├── input
+├── model
+├── result_2021211214657.csv
+└── tmp 
+```
+- 修改配置
+```python
+# file precision_tool/config.py
+# [train/infer] if adapt from msquickcmp result, set net type to infer
+NET_TYPE = 'infer'
+```
+- 执行以下命令
+```shell
+# 前提条件：
+#    当前目录没有precision_data目录（导入过程会新创建一个precision_data,用于保存导入数据）
+#    只有第一次需要使用infer子命令导入，后续直接python3 precision_tool/cli.py
+python3 precision_tool/cli.py infer output-path/timestamp
+```  
+
+### 基于checkpoint进行训练精度分析
+#### 获取checkpoint和网络数据数据
+```python
+from precision_tool.tf_session import PrecisionTfSession
+with PrecisionTfSession() as sess:
+    sess.run()
+# 执行完成后，将在precision_data/tf/checkpoint 目录生成一个checkpoint
+# 在precision_data/tf/checkpoint/inputs目录保存[input_tensor_name].npy的输入数据
+```
+
+#### 使用【train】命令进行cpu和npu dump数据的获取
+```shell
+# train -d [all/npu/cpu] -a [dump|fusion_off|overflow]
+python3 precision_tool/cli.py train -d all -a dump
+```
+
+### TF脚本修改参考
+
+```python
+# 打印动态Scale的Loss值
+loss_scale_manager = ExponentialUpdateLossScaleManager()
+scale_v = sess.run([loss_scale_manager.get_loss_scale()])
+print(">>> Current Loss Scale >>> ", scale_v)
+
+
+with tf.Session() as sess:
+   # do session.run()
+   saver = tf.train.Saver()
+   # 保存ckpt
+   saver.save(sess, saver_dir)
+   # ...
+   # 从ckpt恢复
+   saver.restore(sess, saver_dir)
+   # ...
+   # 保存Tensorboard
+   summary_writer = tf.summary.FileWriter(logdir=log_dir, graph=sess.graph)
+
+```
+
+### F&Q
+1. 安装gnureadline报错找不到lncurses
+   ```shell
+   /usr/bin/ld: cannot find -lncurses
+   collect2: error: ld returned 1 exit status
+   error: command 'gcc' failed with exit status 1
+   ```
+   ```shell
+   # 先尝试在本地查找libncurses.so*
+   find / -name libncurses.so*
+   # 如果能找到以下文件，直接创建一个libncurses.so指向libncurses.so.5即可，否则需要用包管理工具安装ncurses
+   /usr/lib64/libncurses.so.5
+   /usr/lib64/libncurses.so.5.9
+   /usr/lib64/libncursesw.so.5
+   # 创建软连接
+   ln -s /usr/lib64/libncurses.so.5.9 /usr/lib64/libncurses.so
+   ```
+#### 参与贡献
+
+1.  Fork 本仓库
+2.  新建 Feat_xxx 分支
+3.  提交代码
+4.  新建 Pull Request
\ No newline at end of file
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/RELEASE.md b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/RELEASE.md
new file mode 100644
index 000000000..6ebb752c4
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/RELEASE.md
@@ -0,0 +1,66 @@
+#Release 0.1.11
+## Update
+* 兼容csv mapping解析summary dump数据
+* 新增打印输入输出地址偏移
+* TF2.x dump时默认保存输入和输出
+
+#Release 0.1.10
+## Update
+* 兼容新的opdebug dump格式
+
+
+#Release 0.1.9
+## Update
+* 兼容新的summary dump的csv格式
+
+#Release 0.1.8
+## Update
+* 兼容新的fusion_result格式
+
+#Release 0.1.7
+## Update
+* 兼容新平台溢出检测数据解析
+
+#Release 0.1.6
+## Update
+* 支持获取profiling数据
+* 兼容一些溢出监测dump数据的修改 
+* 修复溢出检测数据解析问题
+
+# Release 0.1.5
+## Update
+* 修复PT命令重复打屏的问题
+* 模糊匹配溢出检测算子名
+* 修复PT Dump的H5文件解析的一些问题
+
+# Release 0.1.4
+## Update
+* 完善训练随机固定场景
+
+
+# Release 0.1.3
+## Update
+* 支持解析Torch Dump的H5数据
+
+# Release 0.1.2
+## Update
+* 适配部分dump数据格式
+
+# Release 0.1.1
+## Features
+* 新增NpuPrintLossScaleCallBack，用于TF2.x下打印scale值
+* 新增自动查找子图Data节点真实输入节点功能
+
+## Update
+* 优化部分推理场景自动对比目录名和graph名不匹配的场景识别逻辑
+
+## Bugfix
+* 溢出错误码解析崩溃bugfix
+
+
+# Release 0.1.0
+## Feature
+* 新增基于Checkpoint加载执行网络精度对比的能力
+
+## Update
+* 优化目录组织结构
\ No newline at end of file
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/__init__.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/caffe_dump.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/caffe_dump.py
new file mode 100644
index 000000000..3c1b5982d
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/caffe_dump.py
@@ -0,0 +1,132 @@
+# coding=utf-8
+"""
+Source code:
+    https://bbs.huaweicloud.com/blogs/181056
+Example:
+    python3.7 caffe_dump.py -m resnet50.prototxt -w resnet50.caffemodel -i test.bin -n 'data:0' -o ./output_dir
+
+Guide for setting up Caffe/Tensorflow precision golden data generation environments:
+    https://bbs.huaweicloud.com/blogs/181059
+"""
+import caffe
+import sys
+import argparse
+import os
+import caffe.proto.caffe_pb2 as caffe_pb2
+import google.protobuf.text_format
+import json
+import numpy as np
+import time
+
+TIME_LENGTH = 1000
+FILE_PERMISSION_FLAG = 0o600
+
+
+class CaffeProcess:
+    def __init__(self):
+        parse = argparse.ArgumentParser()
+        parse.add_argument("-w", dest="weight_file_path",
+                           help="<Required> the caffe weight file path",
+                           required=True)
+        parse.add_argument("-m", dest="model_file_path",
+                           help="<Required> the caffe model file path",
+                           required=True)
+        parse.add_argument("-o", dest="output_path", help="<Required> the output path",
+                           required=True)
+        parse.add_argument("-i", "--input_bins", dest="input_bins", help="input_bins bins. e.g. './a.bin;./c.bin'",
+                           required=True)
+        parse.add_argument("-n", "--input_names", dest="input_names",
+                           help="input nodes name. e.g. 'graph_input_0:0;graph_input_0:1'",
+                           required=True)
+        args, _ = parse.parse_known_args(sys.argv[1:])
+        self.weight_file_path = os.path.realpath(args.weight_file_path)
+        self.model_file_path = os.path.realpath(args.model_file_path)
+        self.input_bins = args.input_bins.split(";")
+        self.input_names = args.input_names.split(";")
+        self.output_path = os.path.realpath(args.output_path)
+        self.net_param = None
+        self.cur_layer_idx = -1
+
+    @staticmethod
+    def _check_file_valid(path, is_file):
+        if not os.path.exists(path):
+            print('Error: The path "' + path + '" does not exist.')
+            exit(-1)
+        if is_file:
+            if not os.path.isfile(path):
+                print('Error: The path "' + path + '" is not a file.')
+                exit(-1)
+        else:
+            if not os.path.isdir(path):
+                print('Error: The path "' + path + '" is not a directory.')
+                exit(-1)
+
+    def _check_arguments_valid(self):
+        self._check_file_valid(self.model_file_path, True)
+        self._check_file_valid(self.weight_file_path, True)
+        self._check_file_valid(self.output_path, False)
+        for input_file in self.input_bins:
+            self._check_file_valid(input_file, True)
+
+    @staticmethod
+    def calDataSize(shape):
+        dataSize = 1
+        for dim in shape:
+            dataSize *= dim
+        return dataSize
+
+    def _load_inputs(self, net):
+        inputs_map = {}
+        for layer_name, blob in net.blobs.items():
+            if layer_name in self.input_names:
+                input_bin = np.fromfile(
+                    self.input_bins[self.input_names.index(layer_name)], np.float32)
+                input_bin_shape = blob.data.shape
+                if self.calDataSize(input_bin_shape) == self.calDataSize(input_bin.shape):
+                    input_bin = input_bin.reshape(input_bin_shape)
+                else:
+                    print("Error: input node data size %d not match with input bin data size %d.", self.calDataSize(
+                        input_bin_shape), self.calDataSize(input_bin.shape))
+                    exit(-1)
+                inputs_map[layer_name] = input_bin
+        return inputs_map
+
+    def process(self):
+        """
+        Function Description:
+            process the caffe net, save result as dump data
+        """
+        # check path valid
+        self._check_arguments_valid()
+
+        # load model and weight file
+        net = caffe.Net(self.model_file_path, self.weight_file_path,
+                        caffe.TEST)
+        inputs_map = self._load_inputs(net)
+        for key, value in inputs_map.items():
+            net.blobs[key].data[...] = value
+        # process
+        net.forward()
+
+        # read prototxt file
+        net_param = caffe_pb2.NetParameter()
+        with open(self.model_file_path, 'rb') as model_file:
+            google.protobuf.text_format.Parse(model_file.read(), net_param)
+        for layer in net_param.layer:
+            name = layer.name.replace("/", "_").replace(".", "_")
+            index = 0
+            for top in layer.top:
+                data = net.blobs[top].data[...]
+                file_name = name + "." + str(index) + "." + str(
+                    round(time.time() * 1000000)) + ".npy"
+                output_dump_path = os.path.join(self.output_path, file_name)
+                np.save(output_dump_path, data)
+                os.chmod(output_dump_path, FILE_PERMISSION_FLAG)
+                print('The dump data of "' + layer.name
+                      + '" has been saved to "' + output_dump_path + '".')
+                index += 1
+
+
+if __name__ == "__main__":
+    caffe_process = CaffeProcess()
+    caffe_process.process()
\ No newline at end of file
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/cli.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/cli.py
new file mode 100644
index 000000000..f46368dee
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/cli.py
@@ -0,0 +1,129 @@
+# coding=utf-8
+"""
+cli
+"""
+import os
+import sys
+
+from lib.precision_tool import PrecisionTool
+from lib.train.train_analysis import TrainAnalysis
+from lib.interactive_cli import InteractiveCli
+from lib.util.precision_tool_exception import PrecisionToolException
+from lib.util.util import util
+from lib.dump.tf_dump import TfDump
+from lib.adapter.msquickcmp_adapter import MsQuickCmpAdapter
+from lib.adapter.offline_om_adapter import OfflineOmAdapter
+from lib.config import config as cfg
+
+INTRODUCE_DOC = \
+    "==============<Precision Tool>=================\n" \
+    "Usage:\n" \
+    "  Single mode:\n" \
+    "    Exp:\n" \
+    "      Dump TF data:\n" \
+    "       > python3.7.5 precision_tool/cli.py tf_dump \n" \
+    "      Adapt msquickcmp data:\n" \
+    "       > python3.7.5 precision_tool/cli.py infer [data path of msquickcmp output] \n" \
+    "  Interactive mode:\n" \
+    "    Exp:\n" \
+    "      Start command line:\n" \
+    "       > python3.7.5 precision_tool/cli.py\n"
+
+
+def _run_tf_dbg_dump(cmdline):
+    """ Generate tf dump files with tf debug files."""
+    tf_dump = TfDump()
+    tf_dump.run_tf_dbg_dump(cmdline)
+
+
+def _unset_flags():
+    if cfg.PRECISION_TOOL_OVERFLOW_FLAG in os.environ:
+        del os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG]
+    if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ:
+        del os.environ[cfg.PRECISION_TOOL_DUMP_FLAG]
+
+
+def _run_npu_dump(cmd):
+    """Deprecate function."""
+    _unset_flags()
+    log = util.get_log()
+    os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] = 'True'
+    log.info("Start run NPU script with dump data.")
+    ret = util.execute_command(cmd)
+    log.info("Finish run NPU script with dump data. ret [%s]", ret)
+    _unset_flags()
+
+
+def _run_npu_overflow(cmd):
+    """Deprecate function."""
+    _unset_flags()
+    log = util.get_log()
+    os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG] = 'True'
+    log.info("Start run NPU script with overflow check process....")
+    ret = util.execute_command(cmd)
+    log.info("Finish run NPU script with overflow check process. ret [%s]", ret)
+    precision_tool = PrecisionTool()
+    precision_tool.prepare()
+    precision_tool.do_check_overflow()
+    _unset_flags()
+
+
+def _run_infer_adapter(output_path):
+    """ Run precision_tool with msquickcmp output data
+    :param output_path: msquickcmp output path
+    :return: None
+    """
+    if OfflineOmAdapter.validate(output_path):
+        adapter = OfflineOmAdapter(output_path)
+    else:
+        adapter = MsQuickCmpAdapter(output_path)
+    adapter.run()
+    _run_interactive_cli()
+
+
+def _run_interactive_cli(cli=None):
+    """ Run precision_tool in interactive mode
+    :param cli:
+    :return:
+    """
+    util.get_log().info("Interactive command mode.")
+    if cli is None:
+        cli = InteractiveCli()
+    try:
+        cli.cmdloop(intro="Enjoy!")
+    except KeyboardInterrupt:
+        util.get_log().info("Bye.......")
+
+
+def _run_cli_with_data(data_path):
+    """ Run precision with specific data path, default is precision_data."""
+    cfg.DATA_ROOT_DIR = data_path
+    _run_interactive_cli()
+
+
+function_list = {
+    'tf_dump': _run_tf_dbg_dump,
+    'npu_dump': _run_npu_dump,
+    'npu_overflow': _run_npu_overflow,
+    'infer': _run_infer_adapter,
+    'data': _run_cli_with_data
+}
+
+
+def main():
+    while len(sys.argv) > 1:
+        util.get_log().info("Single command mode.")
+        function_key = sys.argv[1]
+        cmd_line = sys.argv[2] if len(sys.argv) > 2 else None
+        if function_key in function_list:
+            return function_list[function_key](cmd_line)
+        precision_tool = PrecisionTool()
+        return precision_tool.single_cmd(sys.argv)
+    _run_interactive_cli()
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except PrecisionToolException as pte:
+        util.get_log().error(pte.error_info)
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_config.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_config.py
new file mode 100644
index 000000000..b159b0e99
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_config.py
@@ -0,0 +1,118 @@
+# coding=utf-8
+import os
+import random
+import tensorflow as tf
+from .lib.adapter.tf_adapter import TfAdapter
+from .lib.config import config as cfg
+
+
+adapter = TfAdapter()
+
+
+def seed_everything(seed=cfg.DUMP_SEED):
+    """ set random seed
+    :param seed: random seed
+    :return: None
+    """
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    random.seed(seed)
+    if hasattr(tf.random, 'set_seed'):
+        tf.random.set_seed(seed)
+    elif hasattr(tf.random, 'set_random_seed'):
+        tf.random.set_random_seed(seed)
+    print("[PrecisionTool] Set Tensorflow random seed to %d success." % seed)
+    try:
+        import numpy as np
+        np.random.seed(seed)
+        print("[PrecisionTool] Set numpy random seed to %d success." % seed)
+    except ImportError as err:
+        np = None
+        print("[PrecisionTool] No numpy module.", err)
+    try:
+        from tfdeterminism import patch
+        patch()
+        print("[PrecisionTool] patch tf determinism success.")
+    except Exception as err:
+        print("[PrecisionTool] No tfdeterminism module. Install it by pip3 install tfdeterminism.", err)
+
+
+# set global random seed
+seed_everything()
+
+
+def sess_dump(sess):
+    """wrapper session with dumping debug wrapper.
+    In session run mode. Use sess=sess_dump(sess)
+    :param sess: origin session
+    :return: Session
+    """
+    return adapter.sess_dump(sess)
+
+
+def estimator_dump():
+    """In estimator mode. estim_spec = tf.estimator.EstimatorSpec(traing_hooks=[estimator_dump()])
+    :return:
+    """
+    return adapter.estimator_dump()
+
+
+def npu_device_dump_config(npu_device, action):
+    """For tf2.x
+    :param npu_device: npu_device
+    :param action: dump | overflow| fusion_off | fusion_switch
+    :return: npu_device
+    """
+    return adapter.npu_device_dump_config(npu_device, action)
+
+
+def estimator_dump_config(action=None):
+    """return DumpConfig.
+    In estimator mode. set dump_config in NPURunConfig().
+    exp. config = NPURunConfig(dump_config=estimator_dum_config(), session_config=session_config)
+    :return: DumpConfig
+    """
+    return adapter.estimator_dump_config(action)
+
+
+def session_dump_config(session_config=None, action=None, dump_layer=None):
+    """
+    In TF session mode. set dump_config in session_config.
+    exp. config = session_dump_config()
+         config.[set your own configs]
+         with tf.Session(config=config) as sess:
+            sess.run(_)
+            tf_debug.LocalCLIDebugWrapperSession(sess=sess, ui_type="readline")
+    :param session_config: original session config
+    :param action: if set action, no need to start app with cli wrapper
+    :return: config_pb2.ConfigProto
+    """
+    return adapter.session_dump_config(session_config, action, dump_layer)
+
+
+def update_custom_op(custom_op, action=None, dump_layer=None):
+    """Update custom_op
+    :param custom_op: origin custom op
+    :param action: dump | overflow | fusion_off | fusion_switch
+    :param dump_layer: layers to dump, split by space
+    :return:
+    """
+    return adapter.update_custom_op(custom_op, action, dump_layer)
+
+
+class NpuPrintLossScaleCallBack(tf.keras.callbacks.Callback):
+    """
+    For TF2.x callbacks. Usage:
+        callbacks = []
+        # append other callbacks.
+        callbacks.append(NpuPrintLossScaleCallBack(opt))
+        model.fit(xx, xx, callbacks=callbacks)
+    """
+    def __init__(self, optimizer, loss=None):
+        super(NpuPrintLossScaleCallBack, self).__init__()
+        self.optimizer = optimizer
+        self.loss = loss
+
+    def on_train_batch_begin(self, batch, logs=None):
+        print("PrecisionTool: Train steps {}, loss_scale={:.3f} / not_overflow_status={}".format(
+            batch, self.optimizer.loss_scale.numpy(), self.optimizer.last_step_finite.numpy()
+        ), flush=True)
diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_session.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_session.py
new file mode 100644
index 000000000..73cd8f7ab
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_session.py
@@ -0,0 +1,60 @@
+# coding=utf-8
+import tensorflow as tf
+import numpy as np
+from .lib.util.util import util
+from .lib.train.train_analysis import TrainAnalysis
+from .lib.config import config as cfg
+
+
+class PrecisionTfSession(tf.Session):
+    def __init__(self, target='', graph=None, config=None):
+        super().__init__(target, graph, config)
+        self.log = util.get_log()
+        self._create_dir()
+        self.running = False
+
+    def run(self, fetches, feed_dict=None, options=None, run_metadata=None):
+        """ wrapper super.run() """
+        run_before_after = False
+        if not self.running:
+            self.running = True
+            run_before_after = True
+        if run_before_after:
+            self._before_run(feed_dict)
+        res = super(tf.Session, self).run(fetches, feed_dict, options, run_metadata)
+        if run_before_after:
+            # saver will call run func.
+            self._after_run()
+            self.running = False
+        return res
+
+    @staticmethod
+    def _create_dir():
+        util.create_dir(cfg.TF_CKPT_ROOT)
+        util.create_dir(cfg.TF_CKPT_INPUT_DIR)
+
+    def _save_data(self, feed, feed_val):
+        self.log.info('Save: %s', feed)
+        file_name = TrainAnalysis.gen_feed_file_name(feed.name)
+        np.save(file_name, feed_val)
+
+    def _before_run(self, feed_dict):
+        """
+        save feed dict tensors
+        :return: None
+        """
+        if feed_dict is not None:
+            self.log.info('Session run with feed_dict, will save feed dict.')
+            for feed, feed_val in feed_dict.items():
+                if not isinstance(feed, tf.Tensor):
+                    return
+                self._save_data(feed, feed_val)
+        # Iterator case
+
+    def _after_run(self):
+        """
+        save checkpoint for dump and
+        :return:
+        """
+        saver = tf.train.Saver()
+        saver.save(self, cfg.TF_CKPT_FILE)
-- 
Gitee


From 5d1aeb1663395b7333137bf5be3cc9f5dcffc71a Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 06:04:03 +0000
Subject: [PATCH 13/38] 
 recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 ...ID3057_FwFM_performance_1p_RT2_overflow.sh | 192 ++++++++++++++++++
 1 file changed, 192 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh
new file mode 100644
index 000000000..eb446c60f
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh
@@ -0,0 +1,192 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+source ~/.bashrc
+docker_enable="false"
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+RankSize=1
+# 数据集路径,保持为空,不需要修改
+data_path=""
+#export ASCEND_SLOG_PRINT_TO_STDOUT=1
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="FwFM_ID3057_for_TensorFlow"
+#训练epoch
+train_epochs=5
+#训练batch_size
+batch_size=128
+#训练step
+train_steps=
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_fp32_to_fp16"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+	--data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --conda_name* ]];then
+        conda_name=`echo ${para#*=}`
+        source $cur_path/set_conda.sh
+        source activate $conda_name
+    elif [[ $para == --docker_enable* ]];then
+        docker_enable=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#docker适配
+if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then
+   echo "docker_enable basic"
+   export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin
+   export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../examples
+
+sed -i "s|epochs=10|epochs=5|g" run_fwfm.py
+
+#溢出检测
+key_word="args.precision_mode"
+line=`grep -rn "args.precision_mode" run_fwfm.py| tail -1 | awk -F ":" '{print $1}'`
+jit_word="\ \ \ \ custom_op = npu_tf_config.update_custom_op(custom_op, action='overflow')"
+sed -i -b "$[line+1] i $jit_word" run_fwfm.py
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup python3 run_fwfm.py \
+        --data_dir=${data_path} \
+        --precision_mode=${precision_mode} \
+        --profiling=${profiling} \
+        --profiling_dump_path=${profiling_dump_path} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+sed -i "s|epochs=5|epochs=10|g" run_fwfm.py
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+# #输出性能FPS，需要模型审视修改
+
+Time=`cat $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|tr -d '\b\r'|grep -Eo "[0-9]*us/sample"|awk -F "us/sample" 'END {print $1}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", 1 /'${Time}'*1000000}'`
+#打印，不需要修改
+echo "Final Performance item/sec : $FPS"
+
+#输出CompileTime
+CompileTime=`grep '/sample' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| head -n 2| awk '{print $4}' | awk -F 's' '{sum+=$1} END {print sum}'`
+
+# #输出训练精度,需要模型审视修改
+train_accuracy=`grep "test AUC" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'`
+# #打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+cat $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tr -d '\b\r'|grep -Eo " loss: [0-9]*\.[0-9]*"|awk -F " " '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CompileTime = ${CompileTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
-- 
Gitee


From 6f54c967aaecb1dfe81899367e78f110437042f5 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 06:05:17 +0000
Subject: [PATCH 14/38] update
 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py           | 1 +
 1 file changed, 1 insertion(+)

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py
index f97c37546..38b347db6 100644
--- a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py
@@ -39,6 +39,7 @@ from deepctr.feature_column import  SparseFeat, DenseFeat, get_feature_names
 
 import argparse
 import os
+import precision_tool.tf_config as npu_tf_config
 
 def main():
 
-- 
Gitee


From 25d1daaa2b782422921f4ebdc950da0fbc224ed3 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 06:10:34 +0000
Subject: [PATCH 15/38] 
 recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh
index eb446c60f..9a9f03168 100644
--- a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh
@@ -162,7 +162,7 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'privileged'_'docker'_'overflow'_'perf'
 
 ##获取性能数据，不需要修改
 #吞吐量
-- 
Gitee


From bfb0caa393b7992f53e7dab32d53be2af7b6a8e1 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 06:19:20 +0000
Subject: [PATCH 16/38] 
 recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_overflow.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../test/train_performance_1p_overflow.sh                 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_overflow.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_overflow.sh
index 81d82ea09..7fee490a2 100644
--- a/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_overflow.sh
+++ b/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_overflow.sh
@@ -145,15 +145,15 @@ BatchSize=${batch_size}
 DeviceType=`uname -m`
 if [[ $precision_mode == "must_keep_origin_dtype" ]];then
     if [[ $docker_enable == "privileged" ]];then
-        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'privileged_docker'_'perf'
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'privileged_docker'_'overflow'_'perf'
     else
-        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf'
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'overflow'_'perf'
     fi
 else
     if [[ $docker_enable == "privileged" ]];then
-        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'privileged_docker'_'perf'
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'privileged_docker'_'overflow'_'perf'
     else
-        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'overflow'_'perf'
     fi
 fi
 echo "CaseName : $CaseName"
-- 
Gitee


From 1a69841353d84d8f274404236a37be36c828b786 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 06:20:37 +0000
Subject: [PATCH 17/38] 
 built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_dump.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../test/train_performance_1p_dump.sh                     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_dump.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_dump.sh
index da1fdc31b..29d3a1aca 100644
--- a/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_dump.sh
+++ b/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_dump.sh
@@ -145,15 +145,15 @@ BatchSize=${batch_size}
 DeviceType=`uname -m`
 if [[ $precision_mode == "must_keep_origin_dtype" ]];then
     if [[ $docker_enable == "privileged" ]];then
-        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'privileged_docker'_'perf'
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'privileged_docker'_'dump'_'perf'
     else
-        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf'
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'dump'_'perf'
     fi
 else
     if [[ $docker_enable == "privileged" ]];then
-        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'privileged_docker'_'perf'
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'privileged_docker'_'dump'_'perf'
     else
-        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'dump'_'perf'
     fi
 fi
 echo "CaseName : $CaseName"
-- 
Gitee


From 306a2d72f055d99a91182971147fa0eb2f431d19 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 06:39:13 +0000
Subject: [PATCH 18/38] 
 recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh        | 1 +
 1 file changed, 1 insertion(+)

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh
index 9a9f03168..87e0d54e8 100644
--- a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh
@@ -190,3 +190,4 @@ echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseNa
 echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "CompileTime = ${CompileTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
 
+rm -rf $cur_path/../examples/precision_data
-- 
Gitee


From 6cd72527732b77f77b639c59abc3bb1ae3d21303 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 06:44:22 +0000
Subject: [PATCH 19/38] 
 recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_dump.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 ...ain_ID3057_FwFM_performance_1p_RT2_dump.sh | 193 ++++++++++++++++++
 1 file changed, 193 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_dump.sh

diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_dump.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_dump.sh
new file mode 100644
index 000000000..919e4d2f0
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_dump.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+source ~/.bashrc
+docker_enable="false"
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+RankSize=1
+# 数据集路径,保持为空,不需要修改
+data_path=""
+#export ASCEND_SLOG_PRINT_TO_STDOUT=1
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="FwFM_ID3057_for_TensorFlow"
+#训练epoch
+train_epochs=5
+#训练batch_size
+batch_size=128
+#训练step
+train_steps=
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_fp32_to_fp16"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+	--data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --conda_name* ]];then
+        conda_name=`echo ${para#*=}`
+        source $cur_path/set_conda.sh
+        source activate $conda_name
+    elif [[ $para == --docker_enable* ]];then
+        docker_enable=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#docker适配
+if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then
+   echo "docker_enable basic"
+   export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin
+   export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../examples
+
+sed -i "s|epochs=10|epochs=5|g" run_fwfm.py
+
+#溢出检测
+key_word="args.precision_mode"
+line=`grep -rn "args.precision_mode" run_fwfm.py| tail -1 | awk -F ":" '{print $1}'`
+jit_word="\ \ \ \ custom_op = npu_tf_config.update_custom_op(custom_op, action='dump')"
+sed -i -b "$[line+1] i $jit_word" run_fwfm.py
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup python3 run_fwfm.py \
+        --data_dir=${data_path} \
+        --precision_mode=${precision_mode} \
+        --profiling=${profiling} \
+        --profiling_dump_path=${profiling_dump_path} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+sed -i "s|epochs=5|epochs=10|g" run_fwfm.py
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+# #输出性能FPS，需要模型审视修改
+
+Time=`cat $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|tr -d '\b\r'|grep -Eo "[0-9]*us/sample"|awk -F "us/sample" 'END {print $1}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", 1 /'${Time}'*1000000}'`
+#打印，不需要修改
+echo "Final Performance item/sec : $FPS"
+
+#输出CompileTime
+CompileTime=`grep '/sample' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| head -n 2| awk '{print $4}' | awk -F 's' '{sum+=$1} END {print sum}'`
+
+# #输出训练精度,需要模型审视修改
+train_accuracy=`grep "test AUC" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'`
+# #打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'privileged'_'docker'_'dump'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+cat $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tr -d '\b\r'|grep -Eo " loss: [0-9]*\.[0-9]*"|awk -F " " '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CompileTime = ${CompileTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+rm -rf $cur_path/../examples/precision_data
\ No newline at end of file
-- 
Gitee


From f7f3128d8f99296f5b9cbab230fa17b6d7cfdcc4 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:16:11 +0000
Subject: [PATCH 20/38] 
 cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_overflow.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../test/train_performance_1p_overflow.sh     | 214 ++++++++++++++++++
 1 file changed, 214 insertions(+)
 create mode 100644 TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_overflow.sh

diff --git a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_overflow.sh b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_overflow.sh
new file mode 100644
index 000000000..b824a8286
--- /dev/null
+++ b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_overflow.sh
@@ -0,0 +1,214 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+source ~/.bashrc
+docker_enable="false"
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="ResNet50_ID0360_for_TensorFlow2.X"
+#训练epoch
+train_epochs=2
+#训练batch_size
+batch_size=256
+#训练step
+train_steps=`expr 1281167 / ${batch_size}`
+#学习率
+learning_rate=0.495
+
+#TF2.X独有，需要模型审视修改
+export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=True
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --conda_name* ]];then
+        conda_name=`echo ${para#*=}`
+        source set_conda.sh
+        source activate $conda_name
+    elif [[ $para == --docker_enable* ]];then
+        docker_enable=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#docker适配
+if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then
+   echo "docker_enable basic"
+   export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin
+   export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH
+fi
+
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../tensorflow
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    #绑核，不需要绑核的模型删除，需要绑核的模型根据实际修改
+    cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'`
+    cpustep=`expr $cpucount / 8`
+    echo "taskset c steps:" $cpustep
+    let a=RANK_ID*$cpustep
+    let b=RANK_ID+1
+    let c=b*$cpustep-1
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup taskset -c $a-$c python3 resnet_ctl_imagenet_main.py \
+        --data_dir=${data_path} \
+        --num_accumulation_steps=1 \
+        --train_steps=${train_steps} \
+        --train_epochs=${train_epochs} \
+        --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \
+        --distribution_strategy=off \
+        --use_tf_while_loop=true \
+        --use_tf_function=true \
+        --enable_checkpoint_and_export \
+        --steps_per_loop=${train_steps} \
+        --base_learning_rate=${learning_rate} \
+        --momentum=0.901 \
+        --epochs_between_evals=1 \
+        --eval_offset_epochs=2 \
+        --optimizer=SGD \
+        --label_smoothing=0.1 \
+        --single_l2_loss_op \
+        --warmup_epochs=5 \
+        --weight_decay=0.000025 \
+        --lr_schedule=polynomial \
+        --drop_eval_remainder=True \
+        --precision_mode=${precision_mode} \
+        --over_dump=${over_dump} \
+        --over_dump_path=${over_dump_path} \
+        --data_dump_flag=${data_dump_flag} \
+        --data_dump_step=${data_dump_step} \
+        --data_dump_path=${data_dump_path} \
+        --batch_size=${batch_size} \
+        --profiling=${profiling} \
+        --profiling_dump_path=${profiling_dump_path}  > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep TimeHistory  $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'basic'_'docker'_'overflow'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep train_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v BatchTimestamp|awk '{print $10}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+sed -i "/AttributeError/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
+sed -i "/MLL/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
+
+rm -rf $cur_path/output/overflow_dump
\ No newline at end of file
-- 
Gitee


From e43d757ea62c84e592e07bcccee7230f35aade7f Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:23:35 +0000
Subject: [PATCH 21/38] 
 cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_dump.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../test/train_performance_1p_dump.sh         | 213 ++++++++++++++++++
 1 file changed, 213 insertions(+)
 create mode 100644 TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_dump.sh

diff --git a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_dump.sh b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_dump.sh
new file mode 100644
index 000000000..85ac16819
--- /dev/null
+++ b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_dump.sh
@@ -0,0 +1,213 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+source ~/.bashrc
+docker_enable="false"
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="ResNet50_ID0360_for_TensorFlow2.X"
+#训练epoch
+train_epochs=2
+#训练batch_size
+batch_size=256
+#训练step
+train_steps=`expr 1281167 / ${batch_size}`
+#学习率
+learning_rate=0.495
+
+#TF2.X独有，需要模型审视修改
+export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=True
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --conda_name* ]];then
+        conda_name=`echo ${para#*=}`
+        source set_conda.sh
+        source activate $conda_name
+    elif [[ $para == --docker_enable* ]];then
+        docker_enable=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#docker适配
+if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then
+   echo "docker_enable basic"
+   export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin
+   export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../tensorflow
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    #绑核，不需要绑核的模型删除，需要绑核的模型根据实际修改
+    cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'`
+    cpustep=`expr $cpucount / 8`
+    echo "taskset c steps:" $cpustep
+    let a=RANK_ID*$cpustep
+    let b=RANK_ID+1
+    let c=b*$cpustep-1
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup taskset -c $a-$c python3 resnet_ctl_imagenet_main.py \
+        --data_dir=${data_path} \
+        --num_accumulation_steps=1 \
+        --train_steps=${train_steps} \
+        --train_epochs=${train_epochs} \
+        --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \
+        --distribution_strategy=off \
+        --use_tf_while_loop=true \
+        --use_tf_function=true \
+        --enable_checkpoint_and_export \
+        --steps_per_loop=${train_steps} \
+        --base_learning_rate=${learning_rate} \
+        --momentum=0.901 \
+        --epochs_between_evals=1 \
+        --eval_offset_epochs=2 \
+        --optimizer=SGD \
+        --label_smoothing=0.1 \
+        --single_l2_loss_op \
+        --warmup_epochs=5 \
+        --weight_decay=0.000025 \
+        --lr_schedule=polynomial \
+        --drop_eval_remainder=True \
+        --precision_mode=${precision_mode} \
+        --over_dump=${over_dump} \
+        --over_dump_path=${over_dump_path} \
+        --data_dump_flag=${data_dump_flag} \
+        --data_dump_step=${data_dump_step} \
+        --data_dump_path=${data_dump_path} \
+        --batch_size=${batch_size} \
+        --profiling=${profiling} \
+        --profiling_dump_path=${profiling_dump_path}  > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep TimeHistory  $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'basic'_'docker'_'dump'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep train_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v BatchTimestamp|awk '{print $10}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+sed -i "/AttributeError/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
+sed -i "/MLL/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
+
+rm -rf $cur_path/output/data_dump;rm -rf $cur_path/../tensorflow/ge_proto*
\ No newline at end of file
-- 
Gitee


From cac4c877adc985db6d78b6e53071b7583e4f2f8a Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:40:39 +0000
Subject: [PATCH 22/38] add
 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/set_conda.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../ResNet50_ID0058_for_TensorFlow/test/set_conda.sh            | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/set_conda.sh

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/set_conda.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/set_conda.sh
new file mode 100644
index 000000000..55087d862
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/set_conda.sh
@@ -0,0 +1,2 @@
+export PATH=/home/anaconda3/bin:$PATH
+export LD_LIBRARY_PATH=/home/anaconda3/lib:$LD_LIBRARY_PATH
\ No newline at end of file
-- 
Gitee


From 2280523ffbe4c3fe7288669714022b6739b70dbe Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:41:27 +0000
Subject: [PATCH 23/38] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20precision=5Ftool?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ResNet50_ID0058_for_TensorFlow/precision_tool/.keep           | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/.keep

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/.keep b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/.keep
new file mode 100644
index 000000000..e69de29bb
-- 
Gitee


From c944f62ed74ca1b2f1c451c8ea29be39de73fc8b Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:42:52 +0000
Subject: [PATCH 24/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/README.md                  | 518 ++++++++++++++++++
 .../precision_tool/RELEASE.md                 |  66 +++
 .../precision_tool/__init__.py                |   0
 .../precision_tool/caffe_dump.py              | 132 +++++
 .../precision_tool/cli.py                     | 129 +++++
 .../precision_tool/tf_config.py               | 118 ++++
 .../precision_tool/tf_session.py              |  60 ++
 7 files changed, 1023 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/README.md
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/RELEASE.md
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/__init__.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/caffe_dump.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/cli.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_config.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_session.py

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/README.md b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/README.md
new file mode 100644
index 000000000..3f291afff
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/README.md
@@ -0,0 +1,518 @@
+# 精度问题分析工具
+
+## 功能介绍
+### 简介
+该工具包提供了精度比对常用的功能，当前该工具主要适配Tensorflow训练场景
+
+对于常用功能基本可以做到一键操作，同时提供Dump数据/图信息的交互式查询和操作入口
+
+推理场景可直接使用[推理一键式全流程精度比对](https://gitee.com/ascend/tools/tree/master/msquickcmp) 工具
+### 主要功能
+#### 已完成功能
+1. 简化脚本修改【手动/半自动】
+2. TF标杆数据生成【自动/半自动】
+3. 算子溢出检测分析【自动】
+4. 开启GE图Dump和图解析【自动】
+5. 开启数据Dump并进行全网比对【自动】
+6. 查询算子列表/节点信息【手动】
+7. 查询/解析Dump数据信息【手动】
+8. 数据比对【手动】
+### 工具获取
+1. 下载压缩包的方式获取
+   将https://gitee.com/ascend/tools 以压缩包形式下载
+2. 使用git命令方式获取
+3. 移动 tools/precision_tool 子目录至训练工作目录
+### 安装python3三方依赖
+```shell
+pip3 install rich gnureadline pexpect graphviz
+# ubuntu/Debian
+sudo apt-get install graphviz
+# fedora/Centos
+sudo yum install graphviz
+```
+### 工具执行依赖
+* 一般直接在NPU训练环境上部署该脚本，环境上能够正常执行CPU和NPU训练脚本
+* 如果需要进行数据Dump比对，则需要先检查并去除训练脚本内部使用到的随机处理，避免由于输入数据不一致导致数据比对结果不可用
+    ```python
+    # 对于使用tf.random / np.random / (python) random的可以通过固定随机种子的方式固定输入
+    # import tf_config.py 默认会设置上述三种random的seed，但由于import位置关系，可能不一定能作用到所有的关联代码，建议在代码确认合适位置手动嵌入
+    seed =987654
+    random.seed(seed)
+    tf.random.set_random_seed(seed)
+    np.random.seed(seed)
+  
+    # RunConfig/NPURunConfig中设置tf_random_seed固定网络随机因子
+    # Estimator中tf.random设置的随机种子并不能全局生效
+    # 需要使用下面的方式进行设置
+    run_config = tf.estimator.RunConfig(tf_random_seed=1, ...)
+    run_config = NPURunConfig(tf_random_seed=1, ...)
+    ```
+ * **理论上网络中的大多数随机均能通过上面的方式固定, 一般不需要再做下面的这些操作**
+    ```python
+    # 1. 参数初始化中的随机操作
+    #    加载checkpoint的方式能够固定大多数初始参数
+    saver.restore(sess, saver_dir)
+    
+    # 2. 输入数据的随机操作（例如对输入数据做shuffle操作）
+    dataset = tf.data.TFRecordDataset(tf_data)
+    dataset = dataset.shuffle(batch_size*10)    # 直接注释掉该行
+    
+    # 3. 模型中的随机操作（例如使用dropout）
+    net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') # 建议注释该行
+    
+    # 4. 图像预处理使用的随机操作(根据实际情况固定随机种子，或者替换成其他固定的预处理操作)
+    # 4.1 Random rotate
+    random_angle = tf.random_uniform([], - self.degree * 3.141592 / 180, self.degree * 3.141592 / 180)
+    image = tf.contrib.image.rotate(image, random_angle, interpolation='BILINEAR')
+    depth_gt = tf.contrib.image.rotate(depth_gt, random_angle, interpolation='NEAREST')
+  
+    # 4.2 Random flipping
+    do_flip = tf.random_uniform([], 0, 1)
+    image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(image), lambda: image)
+    depth_gt = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(depth_gt), lambda: depth_gt)
+    
+    # 4.3 Random crop
+    mage_depth = tf.concat([image, depth_gt], 2)
+    image_depth_cropped = tf.random_crop(image_depth, [self.params.height, self.params.width, 4])
+  
+    # 其他......
+    ```
+* 该工具基于**NPU的计算图**，**NPU的DUMP数据**，**NPU的溢出检测数据**，**TF的计算图meta文件**，**TF的DUMP数据**进行数据解析和分析。
+这几类依赖数据可以通过以下方式获取（只使用部分工具功能并不需要提前获取所有依赖数据）：
+#### 1. NPU的计算图获取
+   ```
+     注意：NPU的Dump数据和计算图存在一定的对应关系，需要同时获取 
+          避免在自定义的训练脚本中unset DUMP GRAPH相关的环境变量
+   ```
+* 【推荐】方法一：配置2、3依赖中的NPU数据Dump或者overflow检测功能，将自动配置上Dump GE图的环境变量
+
+* 【不推荐】方法二：参考迁移指导中的修改配置，执行NPU脚本，并将获取到的图转存至precision_data图目录
+   ```shell
+   export DUMP_GE_GRAPH=2
+   export DUMP_GRAPH_LEVEL=3
+   export DUMP_GRAPH_PATH=./precision_data/npu/debug_0/graph
+   # 未配置DUMP_GRAPH_PATH时，图文件将保存在脚本执行目录，可以直接转存至precision_data目录
+   mkdir -p ./precision_data/npu/debug_0/graph && mv ge_proto_*.txt ./precision_data/npu/debug_0/graph
+   ```
+#### 2. NPU的DUMP数据获取
+* 【推荐】方法一：在训练脚本中**import precision_tool.tf_config**，并使用precision_tool中提供的辅助命令行执行训练脚本 
+    ``` python
+    # NPU的DUMP获取和溢出检测数据的获取，均可按如下方式修改代码
+    # 注意：参数action可以设置为'dump'或'overflow'
+    # 引用 precision_tool/tf_config.py
+    import precision_tool.tf_config as npu_tf_config
+    
+    # 如果使用的是Estimator的NPURunConfig配置使能NPU，则可以参考以下修改
+    dump_config = npu_tf_config.estimator_dump_config(action='dump') # 新增行
+    npu_config = NPURunConfig(dump_config=dump_config)
+  
+    # 如果使用的是session.run或者使用tf.ConfigProto创建session_config传入tf.estimator.RunConfig的方式使能npu
+    # 可以参考如下修改
+    session_config = npu_tf_config.session_dump_config(session_config, action='dump') # 新增行
+    # tf.estimator
+    run_config = tf.estimator.RunConfig(session_config=session_config,...)
+    # tf.keras
+    npu_keras_sess = set_keras_session_npu_config(config=session_config)
+    # session run
+    with tf.Session(config=npu_config_proto(session_config)):
+        ......
+    
+    # 如果使用的是custom_op方式，则可以参考以下修改
+    config = tf.ConfigProto()
+    custom_op =  config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name =  "NpuOptimizer"
+    custom_op.parameter_map["use_off_line"].b = True
+    custom_op = npu_tf_config.update_custom_op(custom_op, action='dump')   # 新增行
+    ```
+
+* 【不推荐】方法二：参考[精度比对工具使用指南](https://www.hiascend.com/document?tag=community-developer) 修改训练脚本。
+   执行训练脚本，并将dump的数据拷贝到【precision_data/dump/npu/】目录
+#### 3. NPU的溢出检测数据的获取（缺少该数据将无法展示溢出检测结果）
+* 【推荐】方法一：在训练脚本中**import precision_tool.tf_config**，并按【2. NPU的DUMP数据获取】中修改训练代码，使用precision_tool中提供的辅助命令行执行训练脚本
+    ```python
+    # 需要将action设置成'overflow'
+    # 引用 precision_tool/tf_config.py
+    import precision_tool.tf_config as npu_tf_config
+    dump_config = npu_tf_config.estimator_dump_config(action='overflow') # 新增行
+    ```
+* 【不推荐】方法二：参考[使用溢出检测工具分析算子溢出](https://www.hiascend.com/document?tag=community-developer) 修改训练脚本，
+   并将溢出数据拷贝至【precision_tool/dump/overflow/】目录
+
+#### 4. TF的DUMP数据获取（缺少该数据无法使用数据比对功能）(适用于TF 1.15， TF2.x参考tfdbg_ascend)
+* 【推荐】方法一：在CPU/GPU训练脚本中添加tf_debug代码，并使用precision_tool中提供的辅助命令行工具生成标杆DUMP数据
+   ```python
+    import precision_tool.tf_config as npu_tf_config
+    
+    # 如果使用的是Estimator,EstimatorSpec加入training_hooks
+    estim_specs = tf.estimator.EstimatorSpec(training_hooks=[npu_tf_config.estimator_dump()])    
+    
+    # 如果使用的session.run，以下代码在为sess加上了tf_debug的wrapper
+    sess = npu_tf_config.sess_dump(sess=sess)
+   ```
+   ```shell
+   # 1. 执行脚本
+   # 2. 解析tf debug dump文件，生成算子输出tensor文件
+   # 注意：TF dump数据的原理是使用tf_debug的print_tensor(pt)命令实现的，由于训练代码提供了非常灵活的run()接口，
+   #      脚本无法感知用户需要dump的tensor在哪个run阶段，因此需要用户修改训练代码，在执行完正确的run后，立即退出。
+   #      例如，修改代码只执行一个step的训练，根据代码中run的次数，会获取到1~N个离线tf_debug的dump目录
+   #      precision_tool脚本会自动提取最后一个run阶段中出现的所有tensor作为标杆数据。
+   python3.7.5 precision_tool/cli.py tf_dump
+   
+   # 在precision_data/tf/dump/ 目录会存放提取的tensor
+   # 如果获取tensor不符合预期，可以检查下precision_data/dump/cpu_debug/目录, 只保留预期run阶段的tf_debug离线数据
+   # 执行以下命令重新生成
+   rm -rf precision_data/tf/dump/* && python3.7.5 precision_tool/cli.py tf_dump
+   ```
+* 【不推荐】方法二：参考[准备基于GPU/CPU运行生成的npy数据](https://www.hiascend.com/document?tag=community-developer)
+   获取CPU/GPU的TF数据，并拷贝至【precision/dump/cpu/】目录
+#### 5. TF计算图Meta文件的获取（可选）
+* 通过saver保存ckpt获取
+    ```python
+    # 修改CPU/NPU脚本
+    with tf.Session() as sess:
+       # do session.run()
+       saver = tf.train.Saver()
+       # 保存ckpt
+       saver.save(sess, saver_dir)
+    ```
+#### 6. 关闭NPU的融合功能（根据情况启用）
+* NPU会对计算图中的算子进行融合，以提高网络性能，由于大多数融合是自动识别的，可能存在未考虑到的场景，导致精度问题，
+  因此，可以尝试关闭融合定界网络问题是否是由于融合导致。
+  ```python
+    # 关闭融合可以和溢出检测/数据Dump同时进行，启用方法也类似
+    # 引用 precision_tool/tf_config.py
+    import precision_tool.tf_config as npu_tf_config
+    
+    # 如果使用的是Estimator的NPURunConfig配置使能NPU，则可以参考以下修改
+    npu_config = NPURunConfig(fusion_switch_file=npu_tf_config.FUSION_OFF_FILE) # 修改行
+    # 如果需要关闭指定的融合规则，则可以修改precision_tool/fusion_switch.cfg, 并参考如下修改
+    npu_config = NPURunConfig(fusion_switch_file=npu_tf_config.FUSION_SWITCH_FILE) # 关闭特定融合修改行
+  
+    # 如果使用的是session.run或者使用tf.ConfigProto创建session_config传入tf.estimator.RunConfig的方式使能npu
+    # 可以参考如下修改(数据Dump和关闭融合同时使能)
+    session_config = npu_tf_config.session_dump_config(session_config, action='dump|fusion_off') # 新增行
+    session_config = npu_tf_config.session_dump_config(session_config, action='dump|fusion_switch') # 关闭特定融合新增行
+    # tf.estimator
+    run_config = tf.estimator.RunConfig(session_config=session_config,...)
+    # tf.keras
+    npu_keras_sess = set_keras_session_npu_config(config=session_config)
+    # session run
+    with tf.Session(config=npu_config_proto(session_config)):
+        ......
+    # 如果有custom_op,也可以直接使用下面的方式配置
+    custom_op = npu_tf_config.update_custom_op(custom_op=custom_op, action='dump | fusion_off')
+  ```
+## 使用说明
+1.  配置文件precision_tool/config.py（正常默认即可）
+    ```python
+    # 如果需要dump特定曾的数据，则可以修改以下配置项
+    # 一般对比分析dump首层即可
+    # Dump config '0|5|10'
+    TF_DUMP_STEP = '0'
+    
+    # 融合开关配置，可以再该配置文件中配置融合开关的开启和关闭，使用方法参考以下文档：
+    # https://support.huaweicloud.com/tensorflowdevg-cann330alphaXtraining/atlastfadapi_07_0005.html
+    FUSION_SWITCH_FILE = './precision_tool/fusion_switch.cfg'
+    
+    # 依赖run包中的atc和msaccucmp.pyc工具，一般在run包安装目录，配置到父目录即可
+    # 默认run包安装在/usr/local/Ascend，可以不用修改。指定目录安装则需要修改
+    # parent dir path of msaccucmp.pyc and atc, usually run package dir
+    CMD_ROOT_PATH = '/usr/local/'
+    
+    # ASCEND Log Path
+    ASCEND_LOG_PATH = '/root/ascend/log/plog/'
+    
+    # 日志级别及数据分析目录设置
+    # TOOL CONFIG
+    LOG_LEVEL = "NOTSET"
+    # ModelArts场景下，可以根据情况将数据跟目录修改成自定义目录，并在完成后完整下载该目录
+    ROOT_DIR = './'
+    ```
+2. 启动脚本（交互命令行）
+    ```shell
+    python3 ./precision_tool/cli.py 
+    ```
+   
+### 交互模式命令
+1. ac -l [limit_num] -c
+    ```shell
+    # auto check. 自动化检测命令
+    # 列出Fusion信息;解析算子溢出信息;
+    # -c 可选，进行全网比对
+    # -l 可选，限制输出结果的条数（overflow解析的条数等）
+    PrecisionTool > ac -c
+   ╭──────────────────────────────────────────────────────────────────────────────────────────────────╮
+   │ [TransData][327] trans_TransData_1170                                                            │
+   │  - [AI Core][Status:32][TaskId:327] ['浮点计算有溢出']                                           │
+   │  - First overflow file timestamp [1619347786532995] -                                            │
+   │  |- TransData.trans_TransData_1170.327.1619347786532995.input.0.npy                              │
+   │   |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.11950836181640626] │
+   │  |- TransData.trans_TransData_1170.327.1619347786532995.output.0.npy                             │
+   │   |- [Shape: (32, 20, 8, 8, 16)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.07781982421875] │
+   ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
+    ```
+2. run [command]
+    ```shell
+    # 不退出交互命令环境执行shell命令，与内置命令不冲突的可以直接执行，否则需要加run前缀
+    PrecisionTool > run vim cli.py
+    PrecisionTool > vim cli.py
+    ```
+
+3. ls -n [op_name] -t [op_type] -f [fusion_pass] -k [kernel_name]
+    ```shell
+    # 通过[算子名]/[算子类型]查询网络里的算子，模糊匹配
+    # -n 算子节点名称
+    # -t  算子类型
+    # -f 融合类型
+    # -k kernel_name
+    PrecisionTool > ls -t Mul -n mul_3 -f TbeMulti
+   [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5b/Branch_1/mul_3
+   [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5c/Branch_1/mul_3
+   [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5d/Branch_1/mul_3
+   [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_6b/Branch_1/mul_3
+    ```
+
+4. ni (-n) [op_name] -s [save sub graph deep]
+    ```shell
+    # 通过[算子名]查询算子节点信息
+    # -n 指定节点名称
+    # -g graph名
+    # -a 显示attr信息
+    # -s 保存一个以当前算子节点为根，深度为参数值的子图
+   PrecisionTool >  ni gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual -s 3
+   ╭─────────────────── [GreaterEqual]gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual ────────────────────╮
+   │ [GreaterEqual] gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual                                       │
+   │ Input:                                                                                                                               │
+   │  -[0][DT_FLOAT][NHWC][32, 8, 8, 320] InceptionV3/InceptionV3/Mixed_7a/Branch_0/add_3:0                                               │
+   │  -[1][DT_FLOAT][NHWC][1, 8, 1, 1] InceptionV3/Mixed_7a/Branch_0/Conv2d_1a_3x3tau:0                                                   │
+   │  -[2][][[]][] atomic_addr_clean0_21:-1                                                                                               │
+   │ Output:                                                                                                                              │
+   │  -[0][DT_BOOL][NHWC][32, 8, 8, 320] ['trans_TransData_1170']                                                                         │
+   │ NpuDumpInput:                                                                                                                        │
+   │  -[0] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.input.0.npy  │
+   │   |- [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.846897] [Min: -8.368301] [Mean: -0.72565556]                                  │
+   │  -[1] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.input.1.npy  │
+   │   |- [Shape: (1, 8, 1, 1)] [Dtype: float32] [Max: 0.0] [Min: 0.0] [Mean: 0.0]                                                        │
+   │ NpuDumpOutput:                                                                                                                       │
+   │  -[0] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.output.0.npy │
+   │   |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.1176300048828125]                                      │
+   │ CpuDumpOutput:                                                                                                                       │
+   │  -[0] gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.0.1619492699305998.npy                         │
+   │   |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.11764373779296874]                                     │
+   ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+   2021-04-27 14:39:55 (15178) -[DEBUG]write 14953 bytes to './precision_data/dump/temp/op_graph/GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.3.gv'
+   2021-04-27 14:39:55 (15178) -[INFO]Sub graph saved to /root/sym/inception/precision_data/dump/temp/op_graph
+   ```
+   
+5. pt (-n) [*.npy] 
+    ```shell
+    # 查看某个dump数据块的数据信息
+    # -n 可选，含义是待查看的数据文件名
+    # 默认会将数据保存成 txt
+    PrecisionTool > pt TransData.trans_TransData_1170.327.1619347786532995.input.0.npy 
+   ╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+   │ Shape: (32, 8, 8, 320)                                                                                                  │
+   │ Dtype: bool                                                                                                             │
+   │ Max: True                                                                                                               │
+   │ Min: False                                                                                                              │
+   │ Mean: 0.11950836181640626                                                                                               │
+   │ Path: ./precision_data/dump/temp/overflow_decode/TransData.trans_TransData_1170.327.1619347786532995.input.0.npy        │
+   │ TxtFile: ./precision_data/dump/temp/overflow_decode/TransData.trans_TransData_1170.327.1619347786532995.input.0.npy.txt │
+   ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+    ```
+
+6. cp (-n) [left *.npy] [right *.npy] -p [print num] -al [atol] -rl [rtol]
+    ```shell
+    # 对比两个tensor的数据
+    # -n 指定需要对比的两个numpy名
+    # -p 指定输出的错误数据的个数及前多少个数据
+    # -al/rl 指定相对误差的参数,在两个场景中用到
+    # -s 保存成txt文件，默认打开
+    #   1. np.allclose(left, right, atol=al, rtol=rl)
+    #   2. err_cnt += 1 if abs(data_left[i] - data_right[i]) > (al + rl * abs(data_right[i]))
+    PrecisionTool > cp Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy -p 10 -s -al 0.002 -rl 0.005
+                      Error Item Table                                        Top Item Table
+   ┏━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ ┏━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+   ┃ Index ┃ Left          ┃ Right        ┃ Diff         ┃ ┃ Index ┃ Left        ┃ Right       ┃ Diff          ┃
+   ┡━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ ┡━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+   │ 155   │ 0.024600908   │ 0.022271132  │ 0.002329776  │ │ 0     │ -0.9206961  │ -0.9222216  │ 0.0015255213  │
+   │ 247   │ 0.015752593   │ 0.017937578  │ 0.0021849852 │ │ 1     │ -0.6416973  │ -0.64051837 │ 0.0011789203  │
+   │ 282   │ -0.0101207765 │ -0.007852031 │ 0.0022687456 │ │ 2     │ -0.35383835 │ -0.35433492 │ 0.0004965663  │
+   │ 292   │ 0.019581757   │ 0.02240482   │ 0.0028230622 │ │ 3     │ -0.18851271 │ -0.18883198 │ 0.00031927228 │
+   │ 640   │ -0.06593232   │ -0.06874806  │ 0.0028157383 │ │ 4     │ -0.43508735 │ -0.43534422 │ 0.00025686622 │
+   │ 1420  │ 0.09293677    │ 0.09586689   │ 0.0029301196 │ │ 5     │ 1.4447614   │ 1.4466647   │ 0.0019032955  │
+   │ 1462  │ -0.085207745  │ -0.088047795 │ 0.0028400496 │ │ 6     │ -0.3455438  │ -0.3444429  │ 0.0011008978  │
+   │ 1891  │ -0.03433288   │ -0.036525503 │ 0.002192624  │ │ 7     │ -0.6560242  │ -0.6564579  │ 0.0004336834  │
+   │ 2033  │ 0.06828873    │ 0.07139922   │ 0.0031104907 │ │ 8     │ -2.6964858  │ -2.6975214  │ 0.0010356903  │
+   │ 2246  │ -0.06376442   │ -0.06121233  │ 0.002552092  │ │ 9     │ -0.73746175 │ -0.73650354 │ 0.00095820427 │
+   └───────┴───────────────┴──────────────┴──────────────┘ └───────┴─────────────┴─────────────┴───────────────┘
+   ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+   │ Left:                                                                                                                                    │
+   │  |- NpyFile: ./precision_data/dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy     │
+   │  |- TxtFile: ./precision_data/dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy.txt │
+   │  |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.846897] [Min: -8.368301] [Mean: -0.72565556]                              │
+   │ DstFile:                                                                                                                                 │
+   │  |- NpyFile: ./precision_data/dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy                            │
+   │  |- TxtFile: ./precision_data/dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy.txt                        │
+   │  |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.8425903] [Min: -8.374472] [Mean: -0.7256237]                              │
+   │ NumCnt:   655360                                                                                                                         │
+   │ AllClose: False                                                                                                                          │
+   │ CosSim:   0.99999493                                                                                                                     │
+   │ ErrorPer: 0.023504638671875  (rl= 0.005, al= 0.002)                                                                                      │
+   ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+    ```
+
+7. vc -lt [left_path] -rt [right_path] -g [graph]
+   ```python
+    # 用于手动指定两个目录进行整网对比
+    # -lt 必选，其中一个文件目录
+    # -rt 必选，另一个目录，一般是标杆目录 
+    # -g 可选，指定-g将尝试解析graph内的映射关系比对（一般用于NPU和TF之间的数据比对， NPU与NPU之间比对不需要，直接按照算子name对比）
+    # 需要指定到dump数据所在的目录层级，precision_data/npu/debug_0/dump/20220217095546/3/ge_default_20220217095547_1/1/0/
+   ```
+8. vcs -f [file_name] -c [cos_sim_threshold] -l [limit]
+   ```python
+    # 查看精度比对结果的概要信息，可以更加预先相似的阈值过滤出低于阈值的算子/信息
+    # -f (--file) 可选，指定csv文件，不设置则默认遍历precision_data/temp/vector_compare/目录下最近产生的对比目录内的所有csv
+    # -c (--cos_sim) 可选，指定筛选所使用的预先相似度阈值，默认0.98
+    # -l (--limit) 可选，指定输出前多少个结果，默认值3
+    PrecisionTool > vcs -c 0.98 -l 2
+    2021-05-31 14:48:56 (2344298) -[INFO]Sub path num:[1]. Dirs[['20210529145750']], choose[20210529145750]
+    2021-05-31 14:48:56 (2344298) -[DEBUG]Find ['result_20210529145751.csv', 'result_20210529145836.csv', 'result_20210529145837.csv', 'result_20210529145849.csv', 'result_20210529150404.csv', 'result_20210529151102.csv'] result files in dir precision_data/temp/vector_compare/20210529145750
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145751.csv
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145836.csv
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 1 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145837.csv
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 2 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145849.csv
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 2 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529150404.csv
+    2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529151102.csv
+    ╭── [578] pixel_cls_loss/cond_1/TopKV2 ───╮
+    │ Left:  ['pixel_cls_loss/cond_1/TopKV2'] │
+    │ Right: ['pixel_cls_loss/cond_1/TopKV2'] │
+    │ Input:                                  │
+    │  - [0]1.0        - [1]nan               │
+    │ Output:                                 │
+    │  - [0]0.999999   - [1]0.978459          │
+    ╰─────────────────────────────────────────╯
+    ╭── [490] gradients/AddN_5 ───╮
+    │ Left:  ['gradients/AddN_5'] │
+    │ Right: ['gradients/AddN_5'] │
+    │ Input:                      │
+    │  - [0]nan        - [1]1.0   │
+    │ Output:                     │
+    │  - [0]0.05469               │
+    ╰─────────────────────────────╯
+   ```
+### Precision_data目录结构
+```
+precision_data/
+├── npu
+│   ├── debug_0
+|   |   ├── dump
+|   |       └── 20210510101133
+|   │   └── graph
+|   |       └── ge_proto_00000179_PreRunAfterBuild.txt
+│   └── debug_1
+├── tf
+|   ├── tf_debug
+|   └── dump
+├── overflow
+├── fusion
+└── temp
+    ├── op_graph
+    ├── decode
+    |   ├── dump_decode
+    |   ├── overflow_decode
+    |   └── dump_convert
+    └── vector_compare
+        ├── 20210510101133
+        |   ├── result_123456.csv
+        |   └── result_123455.csv
+        └── 20210510101134
+            └── result_123458.csv
+```
+### 配合msquickcmp一键式推理精度比对工具使用
+- msquickcmp会将中间dump数据和图自动保存在一个时间戳命名的目录内, 可以使用precision_tool工具直接对该目录进行分析
+```python
+output-path/timestamp
+├── dump_data
+├── input
+├── model
+├── result_2021211214657.csv
+└── tmp 
+```
+- 修改配置
+```python
+# file precision_tool/config.py
+# [train/infer] if adapt from msquickcmp result, set net type to infer
+NET_TYPE = 'infer'
+```
+- 执行以下命令
+```shell
+# 前提条件：
+#    当前目录没有precision_data目录（导入过程会新创建一个precision_data,用于保存导入数据）
+#    只有第一次需要使用infer子命令导入，后续直接python3 precision_tool/cli.py
+python3 precision_tool/cli.py infer output-path/timestamp
+```  
+
+### 基于checkpoint进行训练精度分析
+#### 获取checkpoint和网络数据数据
+```python
+from precision_tool.tf_session import PrecisionTfSession
+with PrecisionTfSession() as sess:
+    sess.run()
+# 执行完成后，将在precision_data/tf/checkpoint 目录生成一个checkpoint
+# 在precision_data/tf/checkpoint/inputs目录保存[input_tensor_name].npy的输入数据
+```
+
+#### 使用【train】命令进行cpu和npu dump数据的获取
+```shell
+# train -d [all/npu/cpu] -a [dump|fusion_off|overflow]
+python3 precision_tool/cli.py train -d all -a dump
+```
+
+### TF脚本修改参考
+
+```python
+# 打印动态Scale的Loss值
+loss_scale_manager = ExponentialUpdateLossScaleManager()
+scale_v = sess.run([loss_scale_manager.get_loss_scale()])
+print(">>> Current Loss Scale >>> ", scale_v)
+
+
+with tf.Session() as sess:
+   # do session.run()
+   saver = tf.train.Saver()
+   # 保存ckpt
+   saver.save(sess, saver_dir)
+   # ...
+   # 从ckpt恢复
+   saver.restore(sess, saver_dir)
+   # ...
+   # 保存Tensorboard
+   summary_writer = tf.summary.FileWriter(logdir=log_dir, graph=sess.graph)
+
+```
+
+### F&Q
+1. 安装gnureadline报错找不到lncurses
+   ```shell
+   /usr/bin/ld: cannot find -lncurses
+   collect2: error: ld returned 1 exit status
+   error: command 'gcc' failed with exit status 1
+   ```
+   ```shell
+   # 先尝试在本地查找libncurses.so*
+   find / -name libncurses.so*
+   # 如果能找到以下文件，直接创建一个libncurses.so指向libncurses.so.5即可，否则需要用包管理工具安装ncurses
+   /usr/lib64/libncurses.so.5
+   /usr/lib64/libncurses.so.5.9
+   /usr/lib64/libncursesw.so.5
+   # 创建软连接
+   ln -s /usr/lib64/libncurses.so.5.9 /usr/lib64/libncurses.so
+   ```
+#### 参与贡献
+
+1.  Fork 本仓库
+2.  新建 Feat_xxx 分支
+3.  提交代码
+4.  新建 Pull Request
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/RELEASE.md b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/RELEASE.md
new file mode 100644
index 000000000..6ebb752c4
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/RELEASE.md
@@ -0,0 +1,66 @@
+#Release 0.1.11
+## Update
+* 兼容csv mapping解析summary dump数据
+* 新增打印输入输出地址偏移
+* TF2.x dump时默认保存输入和输出
+
+#Release 0.1.10
+## Update
+* 兼容新的opdebug dump格式
+
+
+#Release 0.1.9
+## Update
+* 兼容新的summary dump的csv格式
+
+#Release 0.1.8
+## Update
+* 兼容新的fusion_result格式
+
+#Release 0.1.7
+## Update
+* 兼容新平台溢出检测数据解析
+
+#Release 0.1.6
+## Update
+* 支持获取profiling数据
+* 兼容一些溢出监测dump数据的修改 
+* 修复溢出检测数据解析问题
+
+# Release 0.1.5
+## Update
+* 修复PT命令重复打屏的问题
+* 模糊匹配溢出检测算子名
+* 修复PT Dump的H5文件解析的一些问题
+
+# Release 0.1.4
+## Update
+* 完善训练随机固定场景
+
+
+# Release 0.1.3
+## Update
+* 支持解析Torch Dump的H5数据
+
+# Release 0.1.2
+## Update
+* 适配部分dump数据格式
+
+# Release 0.1.1
+## Features
+* 新增NpuPrintLossScaleCallBack，用于TF2.x下打印scale值
+* 新增自动查找子图Data节点真实输入节点功能
+
+## Update
+* 优化部分推理场景自动对比目录名和graph名不匹配的场景识别逻辑
+
+## Bugfix
+* 溢出错误码解析崩溃bugfix
+
+
+# Release 0.1.0
+## Feature
+* 新增基于Checkpoint加载执行网络精度对比的能力
+
+## Update
+* 优化目录组织结构
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/__init__.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/caffe_dump.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/caffe_dump.py
new file mode 100644
index 000000000..3c1b5982d
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/caffe_dump.py
@@ -0,0 +1,132 @@
+# coding=utf-8
+"""
+Source code:
+    https://bbs.huaweicloud.com/blogs/181056
+Example:
+    python3.7 caffe_dump.py -m resnet50.prototxt -w resnet50.caffemodel -i test.bin -n 'data:0' -o ./output_dir
+
+Guide for setting up Caffe/Tensorflow precision golden data generation environments:
+    https://bbs.huaweicloud.com/blogs/181059
+"""
+import caffe
+import sys
+import argparse
+import os
+import caffe.proto.caffe_pb2 as caffe_pb2
+import google.protobuf.text_format
+import json
+import numpy as np
+import time
+
+TIME_LENGTH = 1000
+FILE_PERMISSION_FLAG = 0o600
+
+
+class CaffeProcess:
+    def __init__(self):
+        parse = argparse.ArgumentParser()
+        parse.add_argument("-w", dest="weight_file_path",
+                           help="<Required> the caffe weight file path",
+                           required=True)
+        parse.add_argument("-m", dest="model_file_path",
+                           help="<Required> the caffe model file path",
+                           required=True)
+        parse.add_argument("-o", dest="output_path", help="<Required> the output path",
+                           required=True)
+        parse.add_argument("-i", "--input_bins", dest="input_bins", help="input_bins bins. e.g. './a.bin;./c.bin'",
+                           required=True)
+        parse.add_argument("-n", "--input_names", dest="input_names",
+                           help="input nodes name. e.g. 'graph_input_0:0;graph_input_0:1'",
+                           required=True)
+        args, _ = parse.parse_known_args(sys.argv[1:])
+        self.weight_file_path = os.path.realpath(args.weight_file_path)
+        self.model_file_path = os.path.realpath(args.model_file_path)
+        self.input_bins = args.input_bins.split(";")
+        self.input_names = args.input_names.split(";")
+        self.output_path = os.path.realpath(args.output_path)
+        self.net_param = None
+        self.cur_layer_idx = -1
+
+    @staticmethod
+    def _check_file_valid(path, is_file):
+        if not os.path.exists(path):
+            print('Error: The path "' + path + '" does not exist.')
+            exit(-1)
+        if is_file:
+            if not os.path.isfile(path):
+                print('Error: The path "' + path + '" is not a file.')
+                exit(-1)
+        else:
+            if not os.path.isdir(path):
+                print('Error: The path "' + path + '" is not a directory.')
+                exit(-1)
+
+    def _check_arguments_valid(self):
+        self._check_file_valid(self.model_file_path, True)
+        self._check_file_valid(self.weight_file_path, True)
+        self._check_file_valid(self.output_path, False)
+        for input_file in self.input_bins:
+            self._check_file_valid(input_file, True)
+
+    @staticmethod
+    def calDataSize(shape):
+        dataSize = 1
+        for dim in shape:
+            dataSize *= dim
+        return dataSize
+
+    def _load_inputs(self, net):
+        inputs_map = {}
+        for layer_name, blob in net.blobs.items():
+            if layer_name in self.input_names:
+                input_bin = np.fromfile(
+                    self.input_bins[self.input_names.index(layer_name)], np.float32)
+                input_bin_shape = blob.data.shape
+                if self.calDataSize(input_bin_shape) == self.calDataSize(input_bin.shape):
+                    input_bin = input_bin.reshape(input_bin_shape)
+                else:
+                    print("Error: input node data size %d not match with input bin data size %d.", self.calDataSize(
+                        input_bin_shape), self.calDataSize(input_bin.shape))
+                    exit(-1)
+                inputs_map[layer_name] = input_bin
+        return inputs_map
+
+    def process(self):
+        """
+        Function Description:
+            process the caffe net, save result as dump data
+        """
+        # check path valid
+        self._check_arguments_valid()
+
+        # load model and weight file
+        net = caffe.Net(self.model_file_path, self.weight_file_path,
+                        caffe.TEST)
+        inputs_map = self._load_inputs(net)
+        for key, value in inputs_map.items():
+            net.blobs[key].data[...] = value
+        # process
+        net.forward()
+
+        # read prototxt file
+        net_param = caffe_pb2.NetParameter()
+        with open(self.model_file_path, 'rb') as model_file:
+            google.protobuf.text_format.Parse(model_file.read(), net_param)
+        for layer in net_param.layer:
+            name = layer.name.replace("/", "_").replace(".", "_")
+            index = 0
+            for top in layer.top:
+                data = net.blobs[top].data[...]
+                file_name = name + "." + str(index) + "." + str(
+                    round(time.time() * 1000000)) + ".npy"
+                output_dump_path = os.path.join(self.output_path, file_name)
+                np.save(output_dump_path, data)
+                os.chmod(output_dump_path, FILE_PERMISSION_FLAG)
+                print('The dump data of "' + layer.name
+                      + '" has been saved to "' + output_dump_path + '".')
+                index += 1
+
+
+if __name__ == "__main__":
+    caffe_process = CaffeProcess()
+    caffe_process.process()
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/cli.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/cli.py
new file mode 100644
index 000000000..f46368dee
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/cli.py
@@ -0,0 +1,129 @@
+# coding=utf-8
+"""
+cli
+"""
+import os
+import sys
+
+from lib.precision_tool import PrecisionTool
+from lib.train.train_analysis import TrainAnalysis
+from lib.interactive_cli import InteractiveCli
+from lib.util.precision_tool_exception import PrecisionToolException
+from lib.util.util import util
+from lib.dump.tf_dump import TfDump
+from lib.adapter.msquickcmp_adapter import MsQuickCmpAdapter
+from lib.adapter.offline_om_adapter import OfflineOmAdapter
+from lib.config import config as cfg
+
+INTRODUCE_DOC = \
+    "==============<Precision Tool>=================\n" \
+    "Usage:\n" \
+    "  Single mode:\n" \
+    "    Exp:\n" \
+    "      Dump TF data:\n" \
+    "       > python3.7.5 precision_tool/cli.py tf_dump \n" \
+    "      Adapt msquickcmp data:\n" \
+    "       > python3.7.5 precision_tool/cli.py infer [data path of msquickcmp output] \n" \
+    "  Interactive mode:\n" \
+    "    Exp:\n" \
+    "      Start command line:\n" \
+    "       > python3.7.5 precision_tool/cli.py\n"
+
+
+def _run_tf_dbg_dump(cmdline):
+    """ Generate tf dump files with tf debug files."""
+    tf_dump = TfDump()
+    tf_dump.run_tf_dbg_dump(cmdline)
+
+
+def _unset_flags():
+    if cfg.PRECISION_TOOL_OVERFLOW_FLAG in os.environ:
+        del os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG]
+    if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ:
+        del os.environ[cfg.PRECISION_TOOL_DUMP_FLAG]
+
+
+def _run_npu_dump(cmd):
+    """Deprecate function."""
+    _unset_flags()
+    log = util.get_log()
+    os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] = 'True'
+    log.info("Start run NPU script with dump data.")
+    ret = util.execute_command(cmd)
+    log.info("Finish run NPU script with dump data. ret [%s]", ret)
+    _unset_flags()
+
+
+def _run_npu_overflow(cmd):
+    """Deprecate function."""
+    _unset_flags()
+    log = util.get_log()
+    os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG] = 'True'
+    log.info("Start run NPU script with overflow check process....")
+    ret = util.execute_command(cmd)
+    log.info("Finish run NPU script with overflow check process. ret [%s]", ret)
+    precision_tool = PrecisionTool()
+    precision_tool.prepare()
+    precision_tool.do_check_overflow()
+    _unset_flags()
+
+
+def _run_infer_adapter(output_path):
+    """ Run precision_tool with msquickcmp output data
+    :param output_path: msquickcmp output path
+    :return: None
+    """
+    if OfflineOmAdapter.validate(output_path):
+        adapter = OfflineOmAdapter(output_path)
+    else:
+        adapter = MsQuickCmpAdapter(output_path)
+    adapter.run()
+    _run_interactive_cli()
+
+
+def _run_interactive_cli(cli=None):
+    """ Run precision_tool in interactive mode
+    :param cli:
+    :return:
+    """
+    util.get_log().info("Interactive command mode.")
+    if cli is None:
+        cli = InteractiveCli()
+    try:
+        cli.cmdloop(intro="Enjoy!")
+    except KeyboardInterrupt:
+        util.get_log().info("Bye.......")
+
+
+def _run_cli_with_data(data_path):
+    """ Run precision with specific data path, default is precision_data."""
+    cfg.DATA_ROOT_DIR = data_path
+    _run_interactive_cli()
+
+
+function_list = {
+    'tf_dump': _run_tf_dbg_dump,
+    'npu_dump': _run_npu_dump,
+    'npu_overflow': _run_npu_overflow,
+    'infer': _run_infer_adapter,
+    'data': _run_cli_with_data
+}
+
+
+def main():
+    while len(sys.argv) > 1:
+        util.get_log().info("Single command mode.")
+        function_key = sys.argv[1]
+        cmd_line = sys.argv[2] if len(sys.argv) > 2 else None
+        if function_key in function_list:
+            return function_list[function_key](cmd_line)
+        precision_tool = PrecisionTool()
+        return precision_tool.single_cmd(sys.argv)
+    _run_interactive_cli()
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except PrecisionToolException as pte:
+        util.get_log().error(pte.error_info)
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_config.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_config.py
new file mode 100644
index 000000000..b159b0e99
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_config.py
@@ -0,0 +1,118 @@
+# coding=utf-8
+import os
+import random
+import tensorflow as tf
+from .lib.adapter.tf_adapter import TfAdapter
+from .lib.config import config as cfg
+
+
+adapter = TfAdapter()
+
+
+def seed_everything(seed=cfg.DUMP_SEED):
+    """ set random seed
+    :param seed: random seed
+    :return: None
+    """
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    random.seed(seed)
+    if hasattr(tf.random, 'set_seed'):
+        tf.random.set_seed(seed)
+    elif hasattr(tf.random, 'set_random_seed'):
+        tf.random.set_random_seed(seed)
+    print("[PrecisionTool] Set Tensorflow random seed to %d success." % seed)
+    try:
+        import numpy as np
+        np.random.seed(seed)
+        print("[PrecisionTool] Set numpy random seed to %d success." % seed)
+    except ImportError as err:
+        np = None
+        print("[PrecisionTool] No numpy module.", err)
+    try:
+        from tfdeterminism import patch
+        patch()
+        print("[PrecisionTool] patch tf determinism success.")
+    except Exception as err:
+        print("[PrecisionTool] No tfdeterminism module. Install it by pip3 install tfdeterminism.", err)
+
+
+# set global random seed
+seed_everything()
+
+
+def sess_dump(sess):
+    """wrapper session with dumping debug wrapper.
+    In session run mode. Use sess=sess_dump(sess)
+    :param sess: origin session
+    :return: Session
+    """
+    return adapter.sess_dump(sess)
+
+
+def estimator_dump():
+    """In estimator mode. estim_spec = tf.estimator.EstimatorSpec(traing_hooks=[estimator_dump()])
+    :return:
+    """
+    return adapter.estimator_dump()
+
+
+def npu_device_dump_config(npu_device, action):
+    """For tf2.x
+    :param npu_device: npu_device
+    :param action: dump | overflow| fusion_off | fusion_switch
+    :return: npu_device
+    """
+    return adapter.npu_device_dump_config(npu_device, action)
+
+
+def estimator_dump_config(action=None):
+    """return DumpConfig.
+    In estimator mode. set dump_config in NPURunConfig().
+    exp. config = NPURunConfig(dump_config=estimator_dum_config(), session_config=session_config)
+    :return: DumpConfig
+    """
+    return adapter.estimator_dump_config(action)
+
+
+def session_dump_config(session_config=None, action=None, dump_layer=None):
+    """
+    In TF session mode. set dump_config in session_config.
+    exp. config = session_dump_config()
+         config.[set your own configs]
+         with tf.Session(config=config) as sess:
+            sess.run(_)
+            tf_debug.LocalCLIDebugWrapperSession(sess=sess, ui_type="readline")
+    :param session_config: original session config
+    :param action: if set action, no need to start app with cli wrapper
+    :return: config_pb2.ConfigProto
+    """
+    return adapter.session_dump_config(session_config, action, dump_layer)
+
+
+def update_custom_op(custom_op, action=None, dump_layer=None):
+    """Update custom_op
+    :param custom_op: origin custom op
+    :param action: dump | overflow | fusion_off | fusion_switch
+    :param dump_layer: layers to dump, split by space
+    :return:
+    """
+    return adapter.update_custom_op(custom_op, action, dump_layer)
+
+
+class NpuPrintLossScaleCallBack(tf.keras.callbacks.Callback):
+    """
+    For TF2.x callbacks. Usage:
+        callbacks = []
+        # append other callbacks.
+        callbacks.append(NpuPrintLossScaleCallBack(opt))
+        model.fit(xx, xx, callbacks=callbacks)
+    """
+    def __init__(self, optimizer, loss=None):
+        super(NpuPrintLossScaleCallBack, self).__init__()
+        self.optimizer = optimizer
+        self.loss = loss
+
+    def on_train_batch_begin(self, batch, logs=None):
+        print("PrecisionTool: Train steps {}, loss_scale={:.3f} / not_overflow_status={}".format(
+            batch, self.optimizer.loss_scale.numpy(), self.optimizer.last_step_finite.numpy()
+        ), flush=True)
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_session.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_session.py
new file mode 100644
index 000000000..73cd8f7ab
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_session.py
@@ -0,0 +1,60 @@
+# coding=utf-8
+import tensorflow as tf
+import numpy as np
+from .lib.util.util import util
+from .lib.train.train_analysis import TrainAnalysis
+from .lib.config import config as cfg
+
+
+class PrecisionTfSession(tf.Session):
+    def __init__(self, target='', graph=None, config=None):
+        super().__init__(target, graph, config)
+        self.log = util.get_log()
+        self._create_dir()
+        self.running = False
+
+    def run(self, fetches, feed_dict=None, options=None, run_metadata=None):
+        """ wrapper super.run() """
+        run_before_after = False
+        if not self.running:
+            self.running = True
+            run_before_after = True
+        if run_before_after:
+            self._before_run(feed_dict)
+        res = super(tf.Session, self).run(fetches, feed_dict, options, run_metadata)
+        if run_before_after:
+            # saver will call run func.
+            self._after_run()
+            self.running = False
+        return res
+
+    @staticmethod
+    def _create_dir():
+        util.create_dir(cfg.TF_CKPT_ROOT)
+        util.create_dir(cfg.TF_CKPT_INPUT_DIR)
+
+    def _save_data(self, feed, feed_val):
+        self.log.info('Save: %s', feed)
+        file_name = TrainAnalysis.gen_feed_file_name(feed.name)
+        np.save(file_name, feed_val)
+
+    def _before_run(self, feed_dict):
+        """
+        save feed dict tensors
+        :return: None
+        """
+        if feed_dict is not None:
+            self.log.info('Session run with feed_dict, will save feed dict.')
+            for feed, feed_val in feed_dict.items():
+                if not isinstance(feed, tf.Tensor):
+                    return
+                self._save_data(feed, feed_val)
+        # Iterator case
+
+    def _after_run(self):
+        """
+        save checkpoint for dump and
+        :return:
+        """
+        saver = tf.train.Saver()
+        saver.save(self, cfg.TF_CKPT_FILE)
-- 
Gitee


From 0ac19642e861041406b52b21be22ee6f2f5c9ff9 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:43:10 +0000
Subject: [PATCH 25/38] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20lib?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ResNet50_ID0058_for_TensorFlow/precision_tool/lib/.keep       | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/.keep

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/.keep b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/.keep
new file mode 100644
index 000000000..e69de29bb
-- 
Gitee


From 9ea904c1290577c3ef151e2afab9417b1fba8be1 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:43:34 +0000
Subject: [PATCH 26/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/adapter/fusion.py      | 112 ++++++++++
 .../lib/adapter/msquickcmp_adapter.py         |  87 ++++++++
 .../lib/adapter/offline_om_adapter.py         |  23 ++
 .../precision_tool/lib/adapter/overflow.py    | 194 +++++++++++++++++
 .../precision_tool/lib/adapter/tf_adapter.py  | 200 ++++++++++++++++++
 5 files changed, 616 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/fusion.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/msquickcmp_adapter.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/offline_om_adapter.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/overflow.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/tf_adapter.py

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/fusion.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/fusion.py
new file mode 100644
index 000000000..b440b1055
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/fusion.py
@@ -0,0 +1,112 @@
+# coding=utf-8
+import json
+import os
+import shutil
+from ..util.util import util
+from ..config import config as cfg
+
+
+FUSION_RESULT_FILE_NAME = 'fusion_result.json'
+EFFECT_TIMES_KEY = 'effect_times'
+GRAPH_FUSION_KEY = 'graph_fusion'
+UB_FUSION_KEY = 'ub_fusion'
+GRAPH_ID_KEYS = ['graphId', 'session_and_graph_id']
+
+
+class FusionResult(object):
+    def __init__(self, fusion_json):
+        self.fusion_json = fusion_json
+
+    def get_effect_graph_fusion(self):
+        """Get effect graph fusion rule"""
+        if GRAPH_FUSION_KEY in self.fusion_json:
+            return self._get_effect_fusion(self.fusion_json[GRAPH_FUSION_KEY])
+        return {}
+
+    def get_effect_ub_fusion(self):
+        """Get effect UB fusion rule"""
+        if UB_FUSION_KEY in self.fusion_json:
+            return self._get_effect_fusion(self.fusion_json[UB_FUSION_KEY])
+        return {}
+
+    def graph_id(self):
+        """Get graph id"""
+        for key in GRAPH_ID_KEYS:
+            if key in self.fusion_json:
+                return self.fusion_json[key]
+        return "NONE"
+
+    @staticmethod
+    def _get_effect_fusion(fusion):
+        res = {}
+        for fusion_name in fusion:
+            effect_times = int(fusion[fusion_name][EFFECT_TIMES_KEY])
+            if effect_times > 0:
+                res[fusion_name] = effect_times
+        return res
+
+
+class Fusion(object):
+    def __init__(self):
+        self.fusion_result = []
+        self.log = util.get_log()
+
+    def prepare(self, json_path='./'):
+        """Prepare fusion rule manager
+        :param json_path: path to fusion_result.json
+        :return: None
+        """
+        util.create_dir(cfg.FUSION_DIR)
+        file_path = os.path.join(json_path, FUSION_RESULT_FILE_NAME)
+        file_path_local = os.path.join(cfg.FUSION_DIR, FUSION_RESULT_FILE_NAME)
+        if not os.path.isfile(file_path):
+            if not os.path.isfile(file_path_local):
+                self.log.debug("Can not find fusion result json.")
+                return
+        else:
+            shutil.copy(file_path, cfg.FUSION_DIR)
+        fe_jsons = self._get_result_jsons(file_path_local)
+        for fe_json in fe_jsons:
+            self.fusion_result.append(FusionResult(fe_json))
+
+    def check(self):
+        """Check fusion rules
+        :return: None
+        """
+        self.log.info("Check effect fusion rule list.")
+        for fusion in self.fusion_result:
+            graph_fusion_table = self._build_table('Graph Fusion [GraphID: %s]' % fusion.graph_id(),
+                                                   fusion.get_effect_graph_fusion())
+            ub_fusion_table = self._build_table('UB Fusion [GraphID: %s]' % fusion.graph_id(),
+                                                fusion.get_effect_ub_fusion())
+            util.print_panel(util.create_columns([graph_fusion_table, ub_fusion_table]),
+                             title='GraphID:' + fusion.graph_id(), fit=True)
+
+    @staticmethod
+    def _get_result_jsons(file_name):
+        result_jsons = []
+        with open(file_name, 'r') as f:
+            txt = f.read()
+            try:
+                result_jsons = json.loads(txt)
+                if isinstance(result_jsons, dict):
+                    result_jsons = [result_jsons]
+            except ValueError:
+                sk = []
+                start = -1
+                for i in range(len(txt)):
+                    if txt[i] == '{':
+                        sk.append('{')
+                    if txt[i] == '}':
+                        sk.pop()
+                    if len(sk) == 0:
+                        result_jsons.append(json.loads(txt[start+1: i+1]))
+                        start = i
+        return result_jsons
+
+    @staticmethod
+    def _build_table(title, fusion):
+        table = util.create_table(title, ['Fusion Name', 'Effect times'])
+        for f in fusion:
+            table.add_row(f, str(fusion[f]))
+        return table
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/msquickcmp_adapter.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/msquickcmp_adapter.py
new file mode 100644
index 000000000..525dc8ee5
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/msquickcmp_adapter.py
@@ -0,0 +1,87 @@
+# coding=utf-8
+import os
+import time
+import pathlib
+import shutil
+from ..util.util import util
+from ..util.constant import Constant
+from ..config import config as cfg
+from ..util.precision_tool_exception import PrecisionToolException
+
+
+class MsQuickCmpAdapter(object):
+    def __init__(self, output_path):
+        self.output_path = output_path
+        self.log = util.get_log()
+
+    def run(self):
+        if self.output_path is None or not os.path.isdir(self.output_path):
+            raise PrecisionToolException("Invalid output path.")
+        if os.path.exists(cfg.DATA_ROOT_DIR):
+            raise PrecisionToolException("Precision data dir exist, can not adapt msquickcmp result.")
+
+        for dir_path, dir_names, file_names in os.walk(self.output_path, followlinks=True):
+            if 'model' in dir_names:
+                self._adapt_model(os.path.join(dir_path, 'model'))
+            if 'dump_data' in dir_names:
+                self._adapt_dump(os.path.join(dir_path, 'dump_data'))
+            for file_name in file_names:
+                if str(file_name).endswith(Constant.Suffix.CSV):
+                    self._adapt_vector_compare_result(os.path.join(dir_path, file_name))
+
+    def _adapt_model(self, path):
+        file_names = os.listdir(path)
+        graph_id = 0
+        for file_name in file_names:
+            if str(file_name).endswith(Constant.Suffix.JSON):
+                self.log.info("Find msquickcmp model json: %s", file_name)
+                util.create_dir(cfg.DEFAULT_NPU_GRAPH_DIR)
+                graph_file_name = 'ge_proto_%d_%s.txt' % (graph_id, cfg.BUILD_JSON_GRAPH_NAME)
+                graph_json_file_name = graph_file_name + Constant.Suffix.JSON
+                pathlib.Path(os.path.join(cfg.DEFAULT_NPU_GRAPH_DIR, graph_file_name)).touch()
+                src_path = os.path.join(path, file_name)
+                dst_path = os.path.join(cfg.DEFAULT_NPU_GRAPH_DIR, graph_json_file_name)
+                self.log.info("Copy graph file: %s->%s", src_path, dst_path)
+                shutil.copy(src_path, dst_path)
+                time.sleep(3)
+                pathlib.Path(dst_path).touch()
+        if not util.empty_dir(cfg.DEFAULT_NPU_GRAPH_DIR):
+            self.log.info("Adapt model success.")
+
+    def _adapt_dump(self, path):
+        dir_names = os.listdir(path)
+        if 'tf' in dir_names:
+            self._adapt_tf_dump(os.path.join(path, 'tf'))
+        if 'onnx' in dir_names:
+            self._adapt_tf_dump(os.path.join(path, 'onnx'))
+        if 'npu' in dir_names:
+            self._adapt_npu_dump(os.path.join(path, 'npu'))
+
+    def _adapt_tf_dump(self, path):
+        if util.empty_dir(path):
+            return
+        src_path = os.path.abspath(path)
+        util.create_dir(cfg.TF_DIR)
+        dst_path = cfg.TF_DUMP_DIR
+        self.log.info("Create symbol link file: %s->%s", src_path, dst_path)
+        os.symlink(src_path, dst_path)
+        self.log.info("Adapt tf dump success.")
+
+    def _adapt_npu_dump(self, path):
+        sub_dirs = os.listdir(path)
+        self.log.info("Find npu dump dir:%s", sub_dirs)
+        sub_dirs = filter(lambda x: str(x).isdigit(), sub_dirs)
+        for sub_dir in sub_dirs:
+            util.create_dir(cfg.DEFAULT_NPU_DUMP_DIR)
+            src_path = os.path.abspath(os.path.join(path, sub_dir))
+            dst_path = os.path.join(cfg.DEFAULT_NPU_DUMP_DIR, sub_dir)
+            self.log.info("Create symbol link file: %s->%s", src_path, dst_path)
+            os.symlink(src_path, dst_path)
+            self.log.info("Adapt npu dump success.")
+
+    def _adapt_vector_compare_result(self, path):
+        target_path = os.path.join(cfg.VECTOR_COMPARE_PATH, '0')
+        util.create_dir(target_path)
+        dst_path = os.path.join(target_path, os.path.basename(path))
+        shutil.copy(path, dst_path)
+        self.log.info("Adapt vector compare result.")
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/offline_om_adapter.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/offline_om_adapter.py
new file mode 100644
index 000000000..a6cc8a5fb
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/offline_om_adapter.py
@@ -0,0 +1,23 @@
+# coding=utf-8
+import os
+import time
+import pathlib
+import shutil
+from ..util.util import util
+from ..util.constant import Constant
+from ..config import config as cfg
+from ..util.precision_tool_exception import PrecisionToolException
+
+
+class OfflineOmAdapter(object):
+    """自动解析om文件至GE图"""
+    def __init__(self, file_name):
+        self.file_name = file_name
+        self.log = util.get_log()
+
+    @staticmethod
+    def validate(file_name):
+        return os.path.isfile(file_name) and str(file_name).endswith(Constant.Suffix.OM)
+    
+    def run(self):
+        self.log("To impl")
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/overflow.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/overflow.py
new file mode 100644
index 000000000..7908c18be
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/overflow.py
@@ -0,0 +1,194 @@
+# coding=utf-8
+import json
+import os
+
+from ..util.util import util
+from ..util.precision_tool_exception import PrecisionToolException
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.constant import Constant
+from ..config import config as cfg
+
+
+AI_CORE_OVERFLOW_STATUS = {
+    '0x8': '符号证书最小附属NEG符号位取反溢出',
+    '0x10': '整数加法、减法、乘法或乘加操作计算有溢出',
+    '0x20': '浮点计算有溢出',
+    '0x80': '浮点数转无符号数的输入是负数',
+    '0x100': 'FP32转FP16或32符号整数转FP16中出现溢出',
+    '0x400': 'CUBE累加出现溢出'
+}
+DHA_ATOMIC_ADD_STATUS = {
+    '0x9': '[atomic overflow] 向上溢出',
+    '0xA': '[atomic underflow] 向下溢出',
+    '0xB': '[atomic src nan] 源操作数非法',
+    '0xC': '[atomic dst nan] 目的操作数非法',
+    '0xD': '[atomic both nan] 源操作数和目的操作数均非法'
+}
+L2_ATOMIC_ADD_STATUS = {
+    '000': '[atomic no error] 无异常',
+    '001': '[atomic overflow] 向上溢出',
+    '010': '[atomic underflow] 向下溢出',
+    '011': '[atomic src nan] 源操作数非法',
+    '100': '[atomic dst nan] 目的操作数非法',
+    '101': '[atomic both nan] 源操作数和目的操作数均非法'
+}
+
+
+class Overflow(object):
+    def __init__(self):
+        """Init"""
+        self.log = util.get_log()
+        self.debug_files = None
+
+    @catch_tool_exception
+    def prepare(self):
+        """Prepare"""
+        # find right path in DUMP_FILES_NPU_ALL
+        util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR)
+        sub_dir = util.get_newest_dir(cfg.NPU_OVERFLOW_DUMP_DIR)
+        overflow_dump_files = util.list_npu_dump_files(os.path.join(cfg.NPU_OVERFLOW_DUMP_DIR, sub_dir))
+        self.debug_files = [item for item in overflow_dump_files.values() if item.op_type == 'Opdebug']
+        # sort by timestamp
+        self.debug_files = sorted(self.debug_files, key=lambda x: x.timestamp)
+        self.log.info("Find [%d] debug files in overflow dir.", len(self.debug_files))
+
+    def check(self, max_num=3):
+        """Check overflow info"""
+        if len(self.debug_files) == 0:
+            self.log.info("[Overflow] Checked success. find [0] overflow node!")
+            return
+        self.log.info("[Overflow] Find [%s] overflow debug file. Will show top %s ops.", len(self.debug_files), max_num)
+        for i, debug_file in enumerate(self.debug_files):
+            debug_decode_files = self._decode_file(debug_file, True)
+            with open(debug_decode_files[0].path, 'r') as f:
+                overflow_json = json.load(f)
+                util.print_panel(self._json_summary(overflow_json, debug_file))
+            if i >= max_num:
+                break
+
+    def _json_summary(self, json_txt, debug_file):
+        res = []
+        detail = {'task_id': -1}
+        if 'magic' in json_txt:
+            # version 2
+            detail = json_txt['acc_list']['data']
+            res.append(' - [AccType:%s][Status:%s][TaskId:%s]' % (
+                json_txt['acc_list'], detail['status'], detail['task_id']))
+        if 'AI Core' in json_txt and json_txt['AI Core']['status'] > 0:
+            detail = json_txt['AI Core']
+            res.append(' - [AI Core][Status:%s][TaskId:%s] %s' % (
+                detail['status'], detail['task_id'], self._decode_ai_core_status(detail['status'])))
+        if 'DHA Atomic Add' in json_txt and json_txt['DHA Atomic Add']['status'] > 0:
+            detail = json_txt['DHA Atomic Add']
+            res.append(' - [DHA Atomic Add][Status:%s][TaskId:%s] %s' % (
+                detail['status'], detail['task_id'], self._decode_dha_atomic_add_status(detail['status'])))
+        if 'L2 Atomic Add' in json_txt and json_txt['L2 Atomic Add']['status'] > 0:
+            detail = json_txt['L2 Atomic Add']
+            res.append(' - [L2 Atomic Add][Status:%s][TaskId:%s] %s' % (
+                detail['status'], detail['task_id'], self._decode_l2_atomic_add_status(detail['status'])))
+        if str(detail['task_id']) == '-1':
+            detail['task_id'] = debug_file.task_id
+        if str(detail['stream_id']) == '-1':
+            detail['stream_id'] = debug_file.stream_id
+        dump_file_info = self._find_dump_files_by_task_id(detail['task_id'], detail['stream_id'],
+                                                          debug_file.dir_path)
+        res.append(' - First overflow file timestamp [%s] -' % debug_file.timestamp)
+        if dump_file_info is None:
+            self.log.warning("Can not find any dump file for debug file: %s, op task id: %s", debug_file.file_name,
+                             detail['task_id'])
+        else:
+            dump_decode_files = self._decode_file(dump_file_info)
+            # sort input/output & index
+            sorted(dump_decode_files, key=lambda x: x.idx)
+            for anchor_type in ['input', 'output']:
+                for dump_decode_file in dump_decode_files:
+                    if dump_decode_file.type != anchor_type:
+                        continue
+                    res.append(' ├─ %s' % dump_decode_file.file_name)
+                    res.append('  └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(dump_decode_file.path))
+            res.insert(0, '[green][%s][%s][/green] %s' % (dump_file_info.op_type, dump_file_info.task_id,
+                                                          dump_file_info.op_name))
+        return Constant.NEW_LINE.join(res)
+
+    @staticmethod
+    def _decode_file(file_info, debug=False):
+        file_name = file_info.file_name
+        if debug:
+            decode_files = util.list_debug_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name)
+        else:
+            decode_files = util.list_npu_dump_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name)
+        if len(decode_files) == 0:
+            # decode info file
+            util.convert_dump_to_npy(file_info.path, cfg.OVERFLOW_DECODE_DIR)
+            if debug:
+                decode_files = util.list_debug_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name)
+            else:
+                decode_files = util.list_npu_dump_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name)
+        if len(decode_files) == 0:
+            raise PrecisionToolException("Decode overflow debug file: %s failed." % file_name)
+        decode_files = sorted(decode_files.values(), key=lambda x: x.timestamp)
+        return decode_files
+
+    @staticmethod
+    def _find_dump_files_by_task_id(task_id, stream_id, search_dir):
+        dump_files = util.list_npu_dump_files(search_dir)
+        dump_file_list = [item for item in dump_files.values() if item.op_type != 'Opdebug']
+        dump_file_list = sorted(dump_file_list, key=lambda x: x.timestamp)
+        for dump_file in dump_file_list:
+            if dump_file.task_id == int(task_id) and dump_file.stream_id == int(stream_id):
+                return dump_file
+        return None
+
+    def _decode_ai_core_status(self, status):
+        error_code = []
+        if type(status) is not int:
+            return error_code
+        bin_status = ''.join(reversed(bin(status)))
+        prefix = ''
+        self.log.debug('Decode AI Core Overflow status:[%s]', hex(status))
+        for i in range(len(bin_status)):
+            if bin_status[i] == '1':
+                if hex(int('1' + prefix, 2)) not in AI_CORE_OVERFLOW_STATUS:
+                    self.log.warning("Unknown AI Core overflow status: [%s]", hex(int('1' + prefix, 2)))
+                    continue
+                error_code.append(AI_CORE_OVERFLOW_STATUS[hex(int('1' + prefix, 2))])
+            prefix += '0'
+        return error_code
+
+    def _decode_l2_atomic_add_status(self, status):
+        if type(status) is not int:
+            return 'status is not int.'
+        code, _ = self._sub_bin_code(status, 16, 18)
+        if code in L2_ATOMIC_ADD_STATUS:
+            return L2_ATOMIC_ADD_STATUS[code]
+        return 'Status invalid'
+
+    def _decode_dha_atomic_add_status(self, status):
+        if type(status) is not int:
+            return 'status is not int.'
+        _, code = self._sub_bin_code(status, 8, 15)
+        if code in DHA_ATOMIC_ADD_STATUS:
+            return DHA_ATOMIC_ADD_STATUS[status]
+        return 'Status invalid'
+
+    @staticmethod
+    def _sub_bin_code(status, start, end):
+        """ Get specific bit code from status in bin format
+        :param status: status num
+        :param start: start bit
+        :param end: end bit
+        :return: result in bin format and hex format
+        """
+        bin_code = bin(status).replace('0b', '')
+        append_num = end + 1 - len(bin_code)
+        if append_num > 0:
+            bin_list = ['0'] * append_num
+            bin_list.append(bin_code)
+            bin_code = ''.join(bin_list)
+        bin_start = len(bin_code) - end - 1
+        bin_end = len(bin_code) - start
+        bin_start = max(0, bin_start)
+        bin_code = bin_code[bin_start: bin_end]
+        return bin_code, hex(int(bin_code, 2))
+
+
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/tf_adapter.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/tf_adapter.py
new file mode 100644
index 000000000..980b9fe33
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/tf_adapter.py
@@ -0,0 +1,200 @@
+# coding=utf-8
+import os
+from ..util.util import util
+from ..config import config as cfg
+FLAG_DUMP_GE_GRAPH = 'DUMP_GE_GRAPH'
+FLAG_DUMP_GRAPH_LEVEL = 'DUMP_GRAPH_LEVEL'
+FLAG_DUMP_GRAPH_PATH = 'DUMP_GRAPH_PATH'
+FLAG_NPU_DUMP_GRAPH = 'NPU_DUMP_GRAPH'
+FUSION_SWITCH_FILE = os.path.join(os.path.dirname(__file__), '../config/fusion_switch.cfg')
+FUSION_OFF_FILE = os.path.join(os.path.dirname(__file__), '../config/fusion_off.cfg')
+
+
+class TfAdapter(object):
+    def __init__(self):
+        self.log = util.get_log()
+
+    def sess_dump(self, sess):
+        """wrapper session with dumping debug wrapper.
+        In session run mode. Use sess=sess_dump(sess)
+        :param sess: origin session
+        :return: Session
+        """
+        from tensorflow.python import debug as tf_debug
+        self._init()
+        return tf_debug.DumpingDebugWrapperSession(sess, cfg.TF_DEBUG_DUMP_DIR)
+
+    def estimator_dump(self):
+        """In estimator mode. estim_spec = tf.estimator.EstimatorSpec(traing_hooks=[estimator_dump()])
+        :return:
+        """
+        from tensorflow.python import debug as tf_debug
+        self._init()
+        return tf_debug.DumpingDebugHook(cfg.TF_DEBUG_DUMP_DIR)
+
+    def session_dump_config(self, session_config=None, action=None, dump_layer=None):
+        """
+        In TF session mode. set dump_config in session_config.
+        exp. config = session_dump_config()
+             config.[set your own configs]
+             with tf.Session(config=config) as sess:
+                sess.run(_)
+                tf_debug.LocalCLIDebugWrapperSession(sess=sess, ui_type="readline")
+        :param session_config: original session config
+        :param action: if set action, no need to start app with cli wrapper
+        :return: config_pb2.ConfigProto
+        """
+        from tensorflow.core.protobuf import config_pb2
+        from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+        if ((not isinstance(session_config, config_pb2.ConfigProto)) and
+                (not issubclass(type(session_config), config_pb2.ConfigProto))):
+            session_config = config_pb2.ConfigProto()
+        custom_op = None
+        for existed_custom_op in session_config.graph_options.rewrite_options.custom_optimizers:
+            if existed_custom_op.name == 'NpuOptimizer':
+                custom_op = existed_custom_op
+        if custom_op is None:
+            custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+        custom_op.name = 'NpuOptimizer'
+        custom_op.parameter_map['use_off_line'].b = True
+        self.update_custom_op(custom_op, action, dump_layer)
+        session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+        return session_config
+
+    def estimator_dump_config(self, action=None):
+        """return DumpConfig.
+        In estimator mode. set dump_config in NPURunConfig().
+        exp. config = NPURunConfig(dump_config=estimator_dum_config(), session_config=session_config)
+        :return: DumpConfig
+        """
+        from npu_bridge.npu_init import DumpConfig
+        self._init()
+        if self._is_overflow(action):
+            config = DumpConfig(enable_dump_debug=True, dump_path=cfg.NPU_OVERFLOW_DUMP_DIR, dump_mode="all")
+        elif self._is_dump(action):
+            config = DumpConfig(enable_dump=True, dump_path=cfg.DEFAULT_NPU_DUMP_DIR, dump_step=cfg.TF_DUMP_STEP,
+                                dump_mode="all")
+        else:
+            config = DumpConfig()
+        return config
+
+    def npu_device_dump_config(self, npu_device, action):
+        """For tf2.x
+        :param npu_device: npu_device
+        :param action: dump | overflow| fusion_off | fusion_switch
+        :return: npu_device
+        """
+        self._init()
+        if self._is_overflow(action):
+            npu_device.global_options().dump_config.enable_dump_debug = True
+            npu_device.global_options().dump_config.dump_path = cfg.NPU_OVERFLOW_DUMP_DIR
+            npu_device.global_options().dump_config.dump_debug_mode = "all"
+            npu_device.global_options().op_debug_level = cfg.OP_DEBUG_LEVEL
+        if self._is_dump(action):
+            npu_device.global_options().dump_config.enable_dump = True
+            npu_device.global_options().dump_config.dump_path = cfg.DEFAULT_NPU_DUMP_DIR
+            npu_device.global_options().dump_config.dump_mode = "all"
+            npu_device.global_options().op_debug_level = cfg.OP_DEBUG_LEVEL
+            npu_device.global_options().dump_config.dump_step = cfg.TF_DUMP_STEP
+            if self._is_dump_stats(action):
+                npu_device.global_options().dump_config.dump_data = "stats"
+        if self._is_fusion_off(action):
+            npu_device.global_options().fusion_switch_file = FUSION_OFF_FILE
+            print("[PrecisionTool] Set fusion switch file: ", FUSION_OFF_FILE)
+        if self._is_fusion_switch(action):
+            npu_device.global_options().fusion_switch_file = FUSION_SWITCH_FILE
+            print("[PrecisionTool] Set fusion switch file: ", FUSION_SWITCH_FILE)
+        return npu_device
+
+    def update_custom_op(self, custom_op, action=None, dump_layer=None):
+        """Update custom_op
+        :param custom_op: origin custom op
+        :param action: dump | overflow | fusion_off | fusion_switch
+        :return:
+        """
+        import tensorflow as tf
+        self._init()
+        custom_op.parameter_map['debug_dir'].s = tf.compat.as_bytes(cfg.DEFAULT_OP_DEBUG_DIR)
+        if self._is_overflow(action):
+            custom_op.parameter_map['enable_dump_debug'].b = True
+            custom_op.parameter_map['dump_debug_mode'].s = tf.compat.as_bytes("all")
+            custom_op.parameter_map['dump_path'].s = tf.compat.as_bytes(cfg.NPU_OVERFLOW_DUMP_DIR)
+            custom_op.parameter_map['op_debug_level'].i = cfg.OP_DEBUG_LEVEL
+        elif self._is_dump(action):
+            custom_op.parameter_map['enable_dump'].b = True
+            custom_op.parameter_map['dump_mode'].s = tf.compat.as_bytes("all")
+            custom_op.parameter_map['dump_path'].s = tf.compat.as_bytes(cfg.DEFAULT_NPU_DUMP_DIR)
+            custom_op.parameter_map['op_debug_level'].i = cfg.OP_DEBUG_LEVEL
+            custom_op.parameter_map['dump_step'].s = tf.compat.as_bytes(cfg.TF_DUMP_STEP)
+            if self._is_dump_stats(action):
+                custom_op.parameter_map['dump_data'].s = tf.compat.as_bytes("stats")
+            if dump_layer is not None:
+                custom_op.parameter_map['dump_layer'].s = tf.compat.as_bytes(dump_layer)
+        if self._is_fusion_off(action):
+            custom_op.parameter_map['fusion_switch_file'].s = tf.compat.as_bytes(FUSION_OFF_FILE)
+            print("[PrecisionTool] Set fusion switch file: ", FUSION_OFF_FILE)
+        elif self._is_fusion_switch(action):
+            custom_op.parameter_map['fusion_switch_file'].s = tf.compat.as_bytes(FUSION_SWITCH_FILE)
+            print("[PrecisionTool] Set fusion switch file: ", FUSION_SWITCH_FILE)
+        if self._is_prof(action):
+            custom_op.parameter_map["profiling_mode"].b = True
+            custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes('{"output":"%s",\
+                "storage_limit": "1000MB","training_trace":"on","l2":"on","hccl":"on","task_trace":"on",\
+                    "aicpu":"on","fp_point":"", "bp_point":"","aic_metrics":"PipeUtilization","msproftx":"on"}' % 
+                    cfg.PROFILING_DIR) 
+        return custom_op
+
+    def _init(self):
+        util.create_dir(cfg.DEFAULT_OP_DEBUG_DIR)
+        util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR)
+        util.create_dir(cfg.DEFAULT_NPU_DUMP_DIR)
+        util.create_dir(cfg.DEFAULT_NPU_GRAPH_DIR)
+        util.create_dir(cfg.PROFILING_DIR)
+        self._set_dump_graph_flags()
+
+    @staticmethod
+    def _set_dump_graph_flags():
+        os.environ[FLAG_DUMP_GE_GRAPH] = str(cfg.DUMP_GE_GRAPH_VALUE)
+        os.environ[FLAG_DUMP_GRAPH_LEVEL] = str(cfg.DUMP_GRAPH_LEVEL_VALUE)
+        os.environ[FLAG_DUMP_GRAPH_PATH] = cfg.DEFAULT_NPU_GRAPH_DIR
+        os.environ[FLAG_NPU_DUMP_GRAPH] = 'true'
+
+    @staticmethod
+    def _is_dump(action):
+        if action is not None:
+            return 'dump' in action
+        if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] == 'True':
+            print("[PrecisionTool] enable npu dump >======")
+            return True
+        return False
+    
+    @staticmethod
+    def _is_dump_stats(action):
+        if action is not None:
+            return 'dump_stats' in action
+        if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] == 'True':
+            print("[PrecisionTool] enable npu dump >======")
+            return True
+        return False
+
+    @staticmethod
+    def _is_overflow(action):
+        if action is not None:
+            return 'overflow' in action
+        if cfg.PRECISION_TOOL_OVERFLOW_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG] == 'True':
+            print("[PrecisionTool] enable npu overflow >======")
+            return True
+        return False
+
+    @staticmethod
+    def _is_fusion_off(action):
+        return 'fusion_off' in action if action is not None else False
+
+    @staticmethod
+    def _is_fusion_switch(action):
+        return ('fusion_switch' in action) if action is not None else False
+
+    @staticmethod
+    def _is_prof(action):
+        return ('prof' in action) if action is not None else False
+
-- 
Gitee


From 78b5b652173db7e0c90f8904c566fadac171ac2a Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:43:49 +0000
Subject: [PATCH 27/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/compare/compare.py     | 215 ++++++++++++++++++
 .../lib/compare/compare_result.py             | 143 ++++++++++++
 2 files changed, 358 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare_result.py

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare.py
new file mode 100644
index 000000000..e166f5042
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare.py
@@ -0,0 +1,215 @@
+# coding=utf-8
+"""
+Compare
+"""
+import json
+import os
+import numpy as np
+from .compare_result import CompareResult
+from ..util.constant import Constant
+from ..util.util import util
+from ..config import config as cfg
+from ..util.precision_tool_exception import PrecisionToolException
+from ..util.precision_tool_exception import catch_tool_exception
+
+
+class Compare(object):
+    def __init__(self):
+        """Init"""
+        super(Compare, self).__init__()
+        self.log = util.get_log()
+        self.vector_compare_results = {}
+
+    @catch_tool_exception
+    def prepare(self):
+        util.create_dir(cfg.VECTOR_COMPARE_PATH)
+
+    def npu_tf_vector_compare(self, graphs, npu_root_dir, tf_root_dir, result_dir):
+        """Compare npu dump data with tf dump data
+        :param graphs: npu ge graph json file list
+        :param npu_root_dir:
+        :param tf_root_dir:
+        :param result_dir: result dir
+        :return:
+        """
+        for graph_file in graphs:
+            self.log.info("Compare npu tf with graph %s", graph_file)
+            sub_graphs = self._get_sub_graphs(graph_file)
+            if sub_graphs is None:
+                continue
+            for sub_graph in sub_graphs:
+                npu_dir = self._get_sub_dir_by_sub_graph_name(sub_graph, npu_root_dir)
+
+                if npu_dir is None:
+                    self.log.warning("Can not find any sub graph dir named %s", npu_dir)
+                    # for some infer case, sub_graph name may not match sub dir name.
+                    npu_dir_0 = self._get_sub_dir_by_sub_graph_name(sub_graph + '_0', npu_root_dir)
+                    if npu_dir_0 is None:
+                        self.log.warning("Can not find any sub graph dir named %s", npu_dir_0)
+                        continue
+                    npu_dir = npu_dir_0
+                self.vector_compare(npu_dir, tf_root_dir, result_dir, graph_file)
+
+    @catch_tool_exception
+    def _get_sub_dir_by_sub_graph_name(self, sub_graph, npu_root_dir):
+        sub_graph_dirs = []
+        for dir_path, dir_names, file_names in os.walk(npu_root_dir, followlinks=True):
+            if sub_graph in dir_names:
+                # walk sub graph dir
+                for sub_dir_path, sub_dir_names, sub_file_names in os.walk(os.path.join(dir_path, sub_graph),
+                                                                           followlinks=True):
+                    if len(sub_dir_names) == 0:
+                        sub_graph_dirs.append(sub_dir_path)
+        if len(sub_graph_dirs) == 0:
+            return None
+        self.log.warning("Find [%d] dirs in sub graph dir [%s], %s, compare first.", len(sub_graph_dirs), sub_graph,
+                         sub_graph_dirs)
+        return sub_graph_dirs[0]
+
+    @catch_tool_exception
+    def _get_sub_graphs(self, graph_file):
+        with open(graph_file, 'r') as f:
+            graph_json = json.load(f)
+            if 'graph' not in graph_json:
+                raise PrecisionToolException("No graph in file: %s" % graph_file)
+            sub_graphs = []
+            for graph in graph_json['graph']:
+                sub_graphs.append(graph['name'])
+        return sub_graphs
+
+    '''
+    @staticmethod
+    def _get_ge_default_dirs(self, root_dir):
+        for dir_path, dir_names, file_names in os.walk(root_dir, followlinks=True):
+            for dir_name in dir_names:
+    '''
+
+    def npu_vector_compare(self, debug_0_root_dir, debug_1_root_dir):
+        """Compare two npu dump data
+        :param debug_0_root_dir:
+        :param debug_1_root_dir:
+        :return:
+        """
+        # debug_0_sub_dirs = self._get_ge_default_dirs(debug_0_root_dir)
+        # debug_1_sub_dirs = self._get_ge_default_dirs(debug_1_root_dir)
+
+    def vector_compare(self, lh_path, rh_path, result_dir, graph_json=None):
+        """Compare all ops"""
+        if lh_path is None or util.empty_dir(lh_path):
+            raise PrecisionToolException("No valid dump file in %s" % lh_path)
+        if rh_path is None or util.empty_dir(rh_path):
+            raise PrecisionToolException("No valid dump file in %s" % rh_path)
+        self.log.info("Start vector compare process...")
+        util.compare_vector(lh_path, rh_path, graph_json, result_dir)
+        self.log.info("Vector compare process finish.")
+
+    def _get_compare_result_by_file_name(self, file_name):
+        results = []
+        if file_name is None:
+            sub_dir = util.get_newest_dir(cfg.VECTOR_COMPARE_PATH)
+            if sub_dir == '':
+                raise PrecisionToolException("Empty vector compare path:%s" % cfg.VECTOR_COMPARE_PATH)
+            file_name = os.path.join(cfg.VECTOR_COMPARE_PATH, sub_dir)
+        if os.path.isfile(file_name):
+            results.append(CompareResult(file_name))
+        if os.path.isdir(file_name):
+            vector_compare_result_files = util.list_vector_compare_result_files(file_name)
+            if vector_compare_result_files is None or len(vector_compare_result_files) == 0:
+                raise PrecisionToolException("Can not find any vector compare result in dir:%s" % file_name)
+            file_list = sorted(vector_compare_result_files.values(), key=lambda x: x.timestamp)
+            file_names = [x.file_name for x in file_list]
+            self.log.debug("Find %s result files in dir %s", file_names, file_name)
+            for file in file_list:
+                results.append(CompareResult(file.path))
+        return results
+
+    @catch_tool_exception
+    def vector_summary(self, file_name=None, cos_sim_threshold=0.98, limit=1):
+        """Print not NaN result in vector compare result"""
+        compare_results = self._get_compare_result_by_file_name(file_name)
+        error_ops = []
+        for compare_result in compare_results:
+            err_ops = compare_result.get_op_by_cosine_sim_threshold(cos_sim_threshold, limit)
+            self.log.info("Find %d ops less then %s in %s", len(err_ops), cos_sim_threshold, compare_result.file_path)
+            error_ops.extend(err_ops)
+        if len(error_ops) == 0:
+            self.log.info("Can not find any compare result over threshold: %s" % cos_sim_threshold)
+        else:
+            for i, error_op in enumerate(error_ops):
+                if i < limit:
+                    error_op.summary(cos_sim_threshold)
+        return error_ops
+
+    def compare_data(self, left, right, save_txt=False, rl=0.001, al=0.001, diff_count=20):
+        """Compare data"""
+        left = self._detect_file(left)
+        right = self._detect_file(right)
+        if left is None or right is None:
+            raise PrecisionToolException("invalid input or output")
+        # save to txt
+        if save_txt:
+            util.save_npy_to_txt(left)
+            util.save_npy_to_txt(right)
+        # compare data
+        total_cnt, all_close, cos_sim, err_percent = self._do_compare_data(left, right, rl, al, diff_count)
+        content = ['Left:', ' ├─ NpyFile: %s' % left]
+        if save_txt:
+            content.append(' ├─ TxtFile: [green]%s.txt[/green]' % left)
+        content.append(' └─ NpySpec: [yellow]%s[/yellow]' % util.gen_npy_info_txt(left))
+        content.append('Right:')
+        content.append(' ├─ NpyFile: %s' % right)
+        if save_txt:
+            content.append(' ├─ TxtFile: [green]%s.txt[/green]' % right)
+        content.append(' └─ NpySpec: [yellow]%s[/yellow]' % util.gen_npy_info_txt(right))
+        content.append('NumCnt:   %s' % total_cnt)
+        content.append('AllClose: %s' % all_close)
+        content.append('CosSim:   %s' % cos_sim)
+        content.append('ErrorPer: %s  (rl= %s, al= %s)' % (err_percent, rl, al))
+        util.print_panel(Constant.NEW_LINE.join(content))
+
+    def _do_compare_data(self, left, right, rl=0.001, al=0.001, diff_count=20):
+        data_left = np.load(left).astype(np.float32)
+        data_right = np.load(right).astype(np.float32)
+        shape_left = data_left.shape
+        shape_right = data_right.shape
+        if shape_left != shape_right:
+            self.log.warning("Data shape not equal: %s vs %s", data_left.shape, data_right.shape)
+        data_left = data_left.reshape(-1)
+        data_right = data_right.reshape(-1)
+        if data_left.shape[0] != data_right.shape[0]:
+            self.log.warning("Data size not equal: %s vs %s", data_left.shape, data_right.shape)
+            if data_left.shape[0] < data_right.shape[0]:
+                data_left = np.pad(data_left, (0, data_right.shape[0] - data_left.shape[0]), 'constant')
+            else:
+                data_right = np.pad(data_right,(0, data_left.shape[0] - data_right.shape[0]), 'constant')
+        all_close = np.allclose(data_left, data_right, atol=al, rtol=rl)
+        # cos_sim = 1 - spatial.distance.cosine(data_left, data_right)
+        cos_sim = np.dot(data_left, data_right) / (
+                np.sqrt(np.dot(data_left, data_left)) * np.sqrt(np.dot(data_right, data_right)))
+        err_cnt = 0
+        total_cnt = data_left.shape[0]
+        diff_table_columns = ['Index', 'Left', 'Right', 'Diff']
+        err_table = util.create_table("Error Item Table", diff_table_columns)
+        top_table = util.create_table("Top Item Table", diff_table_columns)
+        for i in range(total_cnt):
+            abs_diff = abs(data_left[i] - data_right[i])
+            if i < diff_count:
+                top_table.add_row(str(i), str(data_left[i]), str(data_right[i]), str(abs_diff))
+            if abs_diff > (al + rl * abs(data_right[i])):
+                if err_cnt < diff_count:
+                    err_table.add_row(str(i), str(data_left[i]), str(data_right[i]), str(abs_diff))
+                err_cnt += 1
+        err_percent = float(err_cnt / total_cnt)
+        util.print(util.create_columns([err_table, top_table]))
+        return total_cnt, all_close, cos_sim, err_percent
+
+    def _detect_file(self, file_name):
+        """Find files in npu/overflow/cpu dump dir"""
+        if os.path.isfile(file_name):
+            return file_name
+        for parent_dir in [cfg.TMP_DIR, cfg.TF_DUMP_DIR]:
+            file_infos = util.list_numpy_files(parent_dir, file_name)
+            if len(file_infos) > 0:
+                self.log.info("Find %s, choose first one.", list(file_infos.keys()))
+                return list(file_infos.values())[0].path
+        return None
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare_result.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare_result.py
new file mode 100644
index 000000000..5e67dd24d
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare_result.py
@@ -0,0 +1,143 @@
+import collections
+import os
+import numpy as np
+from ..util.util import util
+from ..util.constant import Constant
+from ..util.precision_tool_exception import PrecisionToolException
+from ..util.precision_tool_exception import catch_tool_exception
+
+
+class RowMap(object):
+    """
+    'Index': 0,
+    'LeftOp': 1,
+    'RightOp': 2,
+    'TensorIdx': 3,    # TensorIndex
+    'CosSim': 4,    # CosineSimilarity
+    'MaxAbs': 5,   # MaxAbsoluteError
+    'ARE': 6,   # AccumulatedRelativeError
+    'RED': 7,   # RelativeEuclideanDistance
+    'KLD': 8,   # KullbackLeiblerDivergence
+    'StandardDeviation': 9     # StandardDeviation
+    """
+    def __init__(self, item=None):
+        self.index = 0
+        self.left = 1
+        self.right = 2
+        self.tensor_index = 3
+        self.cosine_similarity = 4
+        self.max_abs = 5
+        if item is not None:
+            self.update(item)
+
+    def update(self, item):
+        for i, value in enumerate(item):
+            self.left = i if value == 'LeftOp' else self.left
+            self.right = i if value == 'RightOp' else self.right
+            self.tensor_index = i if value == 'TensorIndex' else self.tensor_index
+            self.cosine_similarity = i if value == 'CosineSimilarity' else self.cosine_similarity
+            self.max_abs = i if value == 'MaxAbsoluteError' else self.max_abs
+
+
+class CompareItem(object):
+    def __init__(self, op_name, item, row_map):
+        self.row_map = row_map
+        self.index = int(item[self.row_map.index])
+        self.op_name = op_name
+        self.left = item[self.row_map.left].split(" ")
+        self.right = item[self.row_map.right].split(" ")
+        self.input = []
+        self.output = []
+
+    def update(self, item):
+        tensor_index = item[self.row_map.tensor_index]
+        if tensor_index not in ['NaN', '*']:
+            item_detail = tensor_index.split(':')
+            if len(item_detail) != 3:
+                raise PrecisionToolException("item:%d tensor index invalid. [%s]" % (
+                    item[self.row_map.index], tensor_index))
+            if item_detail[1] == 'input':
+                self.input.insert(int(item_detail[2]), item)
+            else:
+                self.output.insert(int(item_detail[2]), item)
+
+    def is_cosine_sim_over_threshold(self, threshold):
+        for item in self.output:
+            if item[self.row_map.cosine_similarity] == 'NaN':
+                continue
+            if float(item[self.row_map.cosine_similarity]) <= threshold:
+                return True
+        return False
+
+    @staticmethod
+    def _color_data(data, threshold):
+        try:
+            data = float(data)
+            if np.isnan(data):
+                raise ValueError
+            elif data <= threshold:
+                return "[red]%s[/red]" % data
+            else:
+                return "[green]%s[/green]" % data
+        except ValueError:
+            return "[yellow]%s[/yellow]" % data
+
+    def summary(self, threshold):
+        content = ["Left:  %s" % self.left, "Right: %s" % self.right, "Input: "]
+        input_txt = []
+        for i, item in enumerate(self.input):
+            input_txt.append(" - [%d]%s" % (i, self._color_data(item[self.row_map.cosine_similarity], threshold)))
+        content.extend([Constant.TAB_LINE.join(input_txt), "Output:"])
+        output_txt = []
+        for i, item in enumerate(self.output):
+            output_txt.append(" - [%d]%s" % (i, self._color_data(item[self.row_map.cosine_similarity], threshold)))
+        content.append(Constant.TAB_LINE.join(output_txt))
+        title = "[%d] %s" % (self.index, self.op_name)
+        util.print_panel(Constant.NEW_LINE.join(content), title=title)
+
+
+class CompareResult(object):
+    def __init__(self, file_path):
+        self.file_path = file_path
+        self.ops = None
+        self.prepare()
+
+    @catch_tool_exception
+    def prepare(self):
+        if not str(self.file_path).endswith(Constant.Suffix.CSV):
+            raise PrecisionToolException("Compare result file %s not a csv file." % self.file_path)
+        if not os.path.isfile(self.file_path):
+            raise PrecisionToolException("Compare result file %s not exist." % self.file_path)
+        items = util.read_csv(self.file_path)
+        self.ops = collections.OrderedDict()
+        row_map = RowMap()
+        for item in items:
+            if item[row_map.index] == 'Index':
+                row_map.update(item)
+                continue
+            if item[row_map.tensor_index] in ['NaN', '*']:
+                continue
+            tensor_index = item[row_map.tensor_index]
+            op_name = tensor_index.split(":")[0]
+            if op_name not in self.ops:
+                self.ops[op_name] = CompareItem(op_name, item, row_map)
+            op = self.ops[op_name]
+            op.update(item)
+
+    def get_compare_item_by_op(self, op_name):
+        if self.ops is None:
+            self.prepare()
+        if self.ops is None:
+            raise PrecisionToolException("Invalid compare result file: %s" % self.file_path)
+        if op_name in self.ops:
+            return self.ops[op_name]
+        return None
+
+    def get_op_by_cosine_sim_threshold(self, threshold, limit=-1):
+        result = []
+        for compare_item in self.ops.values():
+            if compare_item.is_cosine_sim_over_threshold(threshold):
+                result.append(compare_item)
+                if len(result) == limit:
+                    break
+        return result
-- 
Gitee


From e9ff37c666b10bce48f9b29e2bfe5d9291f4f340 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:44:07 +0000
Subject: [PATCH 28/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/config/config.py       | 114 ++++++++++++++++++
 .../precision_tool/lib/config/fusion_off.cfg  |  10 ++
 .../lib/config/fusion_switch.cfg              |   6 +
 3 files changed, 130 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/config.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_off.cfg
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_switch.cfg

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/config.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/config.py
new file mode 100644
index 000000000..f9568666c
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/config.py
@@ -0,0 +1,114 @@
+# coding=utf-8
+import os
+
+# Dump config '0|5|10'
+TF_DUMP_STEP = '0'
+
+# path to run package operator cmp compare
+# default may be /usr/local/Ascend/
+CMD_ROOT_PATH = '/usr/local/Ascend/'
+ASCEND_SET_ENV = os.path.join(CMD_ROOT_PATH, 'bin/setenv.bash')
+
+
+# ASCEND Log Path
+ASCEND_LOG_PATH = '/root/ascend/log/plog/'
+
+# TOOL CONFIG
+LOG_LEVEL = "NOTSET"
+ROOT_DIR = ''
+
+# [train/infer] if adapt from msquickcmp result, set net type to infer
+NET_TYPE = 'infer'
+
+'''
+precision_data/
+├── npu
+│   ├── debug_0
+|   |   ├── dump
+|   |   |   └── 20210510101133
+|   │   └── graph
+|   |       └── ge_proto_00000179_PreRunAfterBuild.txt
+│   └── debug_1
+├── tf
+|   ├── tf_debug
+|   └── dump
+├── overflow
+├── fusion
+└── temp
+    ├── op_graph
+    ├── decode
+    |   ├── dump_decode
+    |   ├── overflow_decode
+    |   └── dump_convert
+    └── vector_compare
+        ├── 20210510101133
+        |   ├── result_123456.csv
+        |   └── result_123455.csv
+        └── 20210510101134
+            └── result_123458.csv
+'''
+
+# Static dirs, do not change
+DATA_ROOT_DIR = os.path.join(ROOT_DIR, 'precision_data')
+
+# fusion
+FUSION_DIR = os.path.join(DATA_ROOT_DIR, 'fusion')
+
+# npu dump/graph parent dir
+NPU_DIR = os.path.join(DATA_ROOT_DIR, 'npu')
+DEFAULT_NPU_DIR = os.path.join(NPU_DIR, 'debug_0')
+DEFAULT_NPU_DUMP_DIR = os.path.join(DEFAULT_NPU_DIR, 'dump')
+DEFAULT_NPU_GRAPH_DIR = os.path.join(DEFAULT_NPU_DIR, 'graph')
+PROFILING_DIR = os.path.join(DEFAULT_NPU_DIR, 'prof')
+DEFAULT_OP_DEBUG_DIR = DEFAULT_NPU_DIR
+
+# npu overflow dir
+OVERFLOW_DIR = os.path.join(DATA_ROOT_DIR, 'overflow')
+NPU_OVERFLOW_DUMP_DIR = os.path.join(OVERFLOW_DIR, 'dump')
+
+# tf dirs
+TF_DIR = os.path.join(DATA_ROOT_DIR, 'tf')
+TF_DEBUG_DUMP_DIR = os.path.join(TF_DIR, 'tf_debug')
+TF_DUMP_DIR = os.path.join(TF_DIR, 'dump')
+TF_GRAPH_DIR = os.path.join(TF_DIR, 'graph')
+# tf checkpoints
+TF_CKPT_ROOT = os.path.join(TF_DIR, 'checkpoints')
+TF_CKPT_FILE = os.path.join(TF_CKPT_ROOT, 'ckpt')
+TF_CKPT_INPUT_DIR = os.path.join(TF_CKPT_ROOT, 'input')
+
+# pytroch dirs
+PT_DIR = os.path.join(DATA_ROOT_DIR, 'pt')
+PT_NPU_DIR = os.path.join(PT_DIR, 'npu')
+PT_GPU_DIR = os.path.join(PT_DIR, 'gpu')
+
+# tmp dirs
+TMP_DIR = os.path.join(DATA_ROOT_DIR, 'temp')
+OP_GRAPH_DIR = os.path.join(TMP_DIR, 'op_graph')
+
+DECODE_DIR = os.path.join(TMP_DIR, 'decode')
+OVERFLOW_DECODE_DIR = os.path.join(DECODE_DIR, 'overflow_decode')
+DUMP_DECODE_DIR = os.path.join(DECODE_DIR, 'dump_decode')
+PT_DUMP_DECODE_DIR = os.path.join(DECODE_DIR, 'pt')
+DUMP_CONVERT_DIR = os.path.join(DECODE_DIR, 'dump_convert')
+
+VECTOR_COMPARE_PATH = os.path.join(TMP_DIR, 'vector_compare')
+TF_TENSOR_NAMES = os.path.join(TMP_DIR, 'tf_tensor_names.txt')
+TF_TENSOR_DUMP_CMD = os.path.join(TMP_DIR, 'tf_tensor_cmd.txt')
+
+# FLAG
+PRECISION_TOOL_OVERFLOW_FLAG = 'PRECISION_TOOL_OVERFLOW'
+PRECISION_TOOL_DUMP_FLAG = 'PRECISION_TOOL_DUMP'
+
+# for previous version, set 0
+OP_DEBUG_LEVEL = 4
+# DUMP CONFIG
+DUMP_GE_GRAPH_VALUE = 2
+DUMP_GRAPH_LEVEL_VALUE = 3
+DUMP_SEED = 2022
+
+# TF_DEBUG
+TF_DEBUG_TIMEOUT = 360
+
+# MSACCUCMP
+MS_ACCU_CMP = r'msaccucmp.py[c]?'
+BUILD_JSON_GRAPH_NAME = 'Build'
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_off.cfg b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_off.cfg
new file mode 100644
index 000000000..823672b74
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_off.cfg
@@ -0,0 +1,10 @@
+{
+  "Switch": {
+    "GraphFusion": {
+      "ALL": "off"
+    },
+    "UBFusion": {
+      "ALL": "off"
+    }
+  }
+}
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_switch.cfg b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_switch.cfg
new file mode 100644
index 000000000..572ad4271
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_switch.cfg
@@ -0,0 +1,6 @@
+{
+"Switch": {
+  "GraphFusion": {},
+  "UBFusion": {}
+}
+}
\ No newline at end of file
-- 
Gitee


From 653bdfcdafc56c75c0f6eb379ca5c4975c610799 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:44:26 +0000
Subject: [PATCH 29/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/dump/dump_manager.py   |  89 ++++++++
 .../precision_tool/lib/dump/npu_dump.py       | 200 ++++++++++++++++++
 .../precision_tool/lib/dump/pt_dump.py        |  65 ++++++
 .../precision_tool/lib/dump/tf_dump.py        | 141 ++++++++++++
 4 files changed, 495 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/dump_manager.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/npu_dump.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/pt_dump.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/tf_dump.py

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/dump_manager.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/dump_manager.py
new file mode 100644
index 000000000..939f4fa47
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/dump_manager.py
@@ -0,0 +1,89 @@
+# coding=utf-8
+import os
+import collections
+from ..util.util import util
+from ..util.constant import Constant
+from .npu_dump import NpuDump
+from .tf_dump import TfDump
+from .pt_dump import PtDump
+from ..config import config as cfg
+
+
+class DumpManager(object):
+    def __init__(self):
+        self.npu_dumps = collections.OrderedDict()
+        self.pt_dump = PtDump(cfg.PT_DIR)
+        self.tf_dump = TfDump(cfg.TF_DUMP_DIR)
+        self._init_dirs()
+
+    def prepare(self):
+        # 1. prepare npu dump
+        sub_dirs = os.listdir(cfg.NPU_DIR)
+        if len(sub_dirs) == 0:
+            # create default
+            sub_dirs = [Constant.DEFAULT_DEBUG_ID]
+        sorted(sub_dirs)
+        for sub_dir in sub_dirs:
+            npu_dump = NpuDump(sub_dir)
+            npu_dump.prepare()
+            self.npu_dumps[sub_dir] = npu_dump
+        # 2. prepare tf dump
+        self.tf_dump.prepare()
+        # 3. prepare pt dump
+        self.pt_dump.prepare()
+
+    def get_dump_root_dir(self, debug_id):
+        if debug_id in self.npu_dumps:
+            return self.npu_dumps[debug_id].dump_root
+        return None
+
+    def op_dump_summary(self, ops):
+        npu_result = collections.OrderedDict()
+        for debug_id, op in ops.items():
+            if debug_id in self.npu_dumps:
+                npu_result[debug_id] = collections.OrderedDict()
+                for op_detail in op:
+                    npu_result[debug_id][op_detail.graph_name] = self.npu_dumps[debug_id].op_dump_summary(op_detail)
+        tf_result = None
+        if self.tf_dump is not None and len(ops[Constant.DEFAULT_DEBUG_ID]) != 0:
+            tf_result = self.tf_dump.op_dump_summary(ops[Constant.DEFAULT_DEBUG_ID][0])
+        return npu_result, tf_result
+
+    def pt_dump_summary(self, ir_name):
+        """Pytorch dump summary"""
+        return self.pt_dump.op_dump_summary(ir_name)
+
+    def convert_npu_dump(self, name, data_format=None, dst_path=None):
+        for _, npu_dump in enumerate(self.npu_dumps.values()):
+            npu_dump.convert_npu_dump(name, data_format, dst_path)
+
+    def print_tensor(self, file_name, is_convert):
+        """Print numpy data file"""
+        if os.path.isfile(file_name):
+            return util.print_npy_summary(os.path.dirname(file_name), os.path.basename(file_name), is_convert)
+        # file_name = file_name.replace('/', '_')
+        # npu decode file
+        npu_convert_files = self.npu_dumps[Constant.DEFAULT_DEBUG_ID].get_npu_dump_decode_files_by_name(file_name)
+        self._print_tensors(npu_convert_files, is_convert)
+        # util.list_npu_dump_convert_files(cfg.DECODE_DIR, file_name)
+        # tf decode file
+        tf_decode_files = self.tf_dump.get_dump_files_by_name(file_name, True)
+        self._print_tensors(tf_decode_files, is_convert)
+        # pt decode file
+        pt_decode_files = self.pt_dump.get_dump_files_by_name(file_name)
+        self._print_tensors(pt_decode_files, is_convert)
+
+    @staticmethod
+    def _print_tensors(file_infos, is_convert):
+        if file_infos is not None:
+            for file_info in file_infos.values():
+                util.print_npy_summary(file_info.dir_path, file_info.file_name, is_convert)
+
+    @staticmethod
+    def _init_dirs():
+        """Create dump file dirs"""
+        util.create_dir(cfg.DUMP_DECODE_DIR)
+        util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR)
+        util.create_dir(cfg.OVERFLOW_DECODE_DIR)
+        util.create_dir(cfg.TF_DUMP_DIR)
+        util.create_dir(cfg.PT_DIR)
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/npu_dump.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/npu_dump.py
new file mode 100644
index 000000000..7e3e4a9d8
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/npu_dump.py
@@ -0,0 +1,200 @@
+# coding=utf-8
+import os
+import re
+from ..util.util import util
+from ..util.constant import Constant
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.precision_tool_exception import PrecisionToolException
+from ..config import config as cfg
+
+
+class NpuDumpDecodeFile(object):
+    def __init__(self):
+        self.log = util.get_log()
+        self.input_files = {}
+        self.output_files = {}
+        self.timestamp = -1
+        self.op_name = ''
+        self.op_type = ''
+        self.task_id = -1
+        # self.stream_id = -1
+
+    def update(self, file_info):
+        """Prepare op npu decode file map."""
+        if not self._check(file_info):
+            self.log.warning('Invalid NpuDumpDecodeFile: %s', file_info)
+            return
+        if file_info.type == 'input':
+            self.input_files[file_info.idx] = file_info
+        else:
+            self.output_files[file_info.idx] = file_info
+
+    def summary(self):
+        txt = ['[yellow][%s][TaskID: %d][/yellow][green][%s][/green] %s' % (
+            self.timestamp, self.task_id, self.op_type, self.op_name)]
+        if len(self.input_files) > 0:
+            info = self.input_files[0]
+            shape, dtype, max_data, min_data, mean = util.npy_info(info.path)
+            txt.append(' - Input:  [green][0][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % (
+                shape, dtype, max_data, min_data, mean, info.file_name))
+            for idx in range(1, len(self.input_files)):
+                info = self.input_files[idx]
+                shape, dtype, max_data, min_data, mean = util.npy_info(info.path)
+                txt.append('           [green][%d][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % (
+                    idx, shape, dtype, max_data, min_data, mean, info.file_name))
+        if len(self.output_files) > 0:
+            info = self.output_files[0]
+            shape, dtype, max_data, min_data, mean = util.npy_info(info.path)
+            txt.append(' - Output: [green][0][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % (
+                shape, dtype, max_data, min_data, mean, info.file_name))
+            for idx in range(1, len(self.output_files)):
+                info = self.output_files[idx]
+                shape, dtype, max_data, min_data, mean = util.npy_info(info.path)
+                txt.append('           [green][%d][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % (
+                    idx, shape, dtype, max_data, min_data, mean, info.file_name))
+        return Constant.NEW_LINE.join(txt)
+
+    def _check(self, file_info):
+        if self.timestamp == -1:
+            self.timestamp = file_info.timestamp
+            self.op_name = file_info.op_name
+            self.op_type = file_info.op_type
+            self.task_id = file_info.task_id
+            # self.stream_id = file_info['stream']
+            return True
+        return self.timestamp == file_info['timestamp']
+
+
+class NpuDump(object):
+    def __init__(self, debug_id=Constant.DEFAULT_DEBUG_ID):
+        """Init"""
+        self.log = util.get_log()
+        self.debug_id = debug_id
+        npu_root = os.path.join(cfg.NPU_DIR, debug_id)
+        self.dump_root = os.path.join(npu_root, Constant.DUMP)
+        self.decode_dir = os.path.join(cfg.DUMP_DECODE_DIR, debug_id)
+        self.dump_files = None
+        self._init_dirs()
+
+    def prepare(self):
+        """Prepare npu/cpu dump files"""
+        self._parse_dump_files()
+
+    def get_dump_files_by_op(self, op):
+        """Get npu dump files by Op"""
+        npu_files = {}
+        op_name = op.name().replace('/', '_').replace('.', '_')
+        match_name = op.type() + '.' + op_name + '\\.'
+        for f in self.dump_files:
+            # match op name and graph name, infer dump directory may not has graph
+            if re.match(match_name, f) and (op.graph_name in self.dump_files[f].path or cfg.NET_TYPE == 'infer'):
+                npu_files[f] = self.dump_files[f]
+        return npu_files
+
+    @catch_tool_exception
+    def op_dump_summary(self, op):
+        """ print op dump info"""
+        if op is None:
+            raise PrecisionToolException("Get None operator")
+        # search npu dump file by op name
+        npu_dump_files = self.get_npu_dump_decode_files_by_op(op)
+        npu_dump_files = sorted(npu_dump_files.values(), key=lambda x: (x.idx, x.timestamp))
+        input_txt = ['NpuDumpInput:']
+        output_txt = ['NpuDumpOutput:']
+        for npu_dump_file in npu_dump_files:
+            if str(npu_dump_file.file_name).endswith(Constant.Suffix.CSV):
+                detail = util.read_csv(npu_dump_file.path)
+                input_txt.append(' -[%d]%s' % (npu_dump_file.idx, npu_dump_file.file_name))
+                output_txt.append(' -[%d]%s' % (npu_dump_file.idx, npu_dump_file.file_name))
+                for item in detail:
+                    item_txt = '[Shape: %s] [Dtype: %s] [Max: %s] [Min: %s] [Mean: %s]' % (item[5], item[3], item[6], item[7], item[8])
+                    if item[0] == 'Input':
+                        input_txt.append('   └─ [green][%s][/green][yellow]%s[/yellow]' % (item[1], item_txt))
+                    elif item[0] == 'Output':
+                        output_txt.append('   └─ [green][%s][/green][yellow]%s[/yellow]' % (item[1], item_txt))
+                continue
+            if npu_dump_file.type == 'input':
+                input_txt.append(' -[green][%s][/green] %s' % (npu_dump_file.idx, npu_dump_file.file_name))
+                input_txt.append('   └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(npu_dump_file.path))
+            else:
+                output_txt.append(' -[green][%s][/green] %s' % (npu_dump_file.idx, npu_dump_file.file_name))
+                output_txt.append('   └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(npu_dump_file.path))
+        input_txt.extend(output_txt)
+        return Constant.NEW_LINE.join(input_txt)
+
+    def _init_dirs(self):
+        util.create_dir(self.dump_root)
+        util.create_dir(self.decode_dir)
+
+    @catch_tool_exception
+    def _parse_dump_files(self):
+        """prepare npu dump, support soft link"""
+        sub_dir = util.get_newest_dir(self.dump_root)
+        sub_dir = os.path.join(self.dump_root, sub_dir) if sub_dir != '' else self.dump_root
+        self.dump_files = util.list_npu_dump_files(sub_dir)
+
+    def list_dump(self, dir_path, file_name):
+        """list dump"""
+
+    @staticmethod
+    def get_npu_dump_decode_files_by_name(file_name):
+        file_name = file_name.replace('/', '_')
+        return util.list_npu_dump_convert_files(cfg.DECODE_DIR, file_name)
+
+    def get_npu_dump_decode_files_by_op(self, op):
+        """Get npu dump decode files by op"""
+        dump_files = self.get_dump_files_by_op(op)
+        result = {}
+        for dump_file_key in dump_files.keys():
+            dump_file = dump_files[dump_file_key]
+            if str(dump_file.file_name).endswith(Constant.Suffix.CSV):
+                result.update({dump_file_key: dump_file})
+                continue
+            dump_decode_files = util.list_npu_dump_decode_files(self.decode_dir, dump_file.file_name)
+            if len(dump_decode_files) == 0:
+                util.convert_dump_to_npy(dump_file.path, self.decode_dir)
+            dump_decode_files = util.list_npu_dump_decode_files(self.decode_dir, dump_file.file_name)
+            result.update(dump_decode_files)
+        return result
+
+    def convert_npu_dump(self, name, data_format=None, dst_path=None):
+        """Convert npu dump to npy of data_format"""
+        if os.path.isfile(name):
+            # absolute path to file
+            self.log.info("Decode file: %s", name)
+            file_name = os.path.basename(name)
+            file_path = name
+        elif os.path.isdir(name):
+            # decode all files in path
+            self.log.info("Decode all files in path: %s", name)
+            file_name = ''
+            file_path = name
+        elif self.dump_files is not None and name in self.dump_files:
+            self.log.info("Decode npu dump file: %s in default dump path", name)
+            file_info = self.dump_files[name]
+            file_name = file_info.file_name
+            file_path = file_info.path
+        else:
+            # maybe op name
+            file_info = self._get_file_by_op_name(name)
+            if file_info is None:
+                raise PrecisionToolException("Can not find any op/dump file named %s" % name)
+            file_name = file_info.file_name
+            file_path = file_info.path
+        dst_path = cfg.DUMP_CONVERT_DIR if dst_path is None else dst_path
+        util.convert_dump_to_npy(file_path, dst_path, data_format)
+        dump_convert_files = util.list_npu_dump_convert_files(dst_path, file_name)
+        # print result info
+
+        summary_txt = ['SrcFile: %s' % name]
+        for convert_file in dump_convert_files.values():
+            summary_txt.append(' - %s' % convert_file.file_name)
+        util.print_panel(Constant.NEW_LINE.join(summary_txt))
+
+    def _get_file_by_op_name(self, op_name):
+        """Get dump file info by op name"""
+        op_name = op_name.replace('/', '_')
+        for file_info in self.dump_files.values():
+            if file_info.op_name == op_name:
+                return file_info
+        return None
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/pt_dump.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/pt_dump.py
new file mode 100644
index 000000000..27fd006dc
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/pt_dump.py
@@ -0,0 +1,65 @@
+# coding=utf-8
+from ..util.util import util
+from ..util.h5_util import H5Util
+from ..util.h5_util import gen_h5_data_name
+from ..config import config as cfg
+from ..util.constant import Constant
+
+
+class PtDump(object):
+    def __init__(self, data_dir):
+        self.log = util.get_log()
+        self.npu = None
+        self.gpu = None
+        self.data_dir = data_dir
+
+    def prepare(self):
+        util.create_dir(cfg.PT_NPU_DIR)
+        util.create_dir(cfg.PT_GPU_DIR)
+        util.create_dir(cfg.PT_DUMP_DECODE_DIR)
+        if not util.empty_dir(cfg.PT_NPU_DIR):
+            npu_h5_files = util.list_h5_files(cfg.PT_NPU_DIR)
+            if len(npu_h5_files) != 0:
+                file_list = sorted(npu_h5_files.values(), key=lambda x: x.timestamp)
+                self.npu = H5Util(file_list[0].path, prefix='npu')
+        if not util.empty_dir(cfg.PT_GPU_DIR):
+            gpu_h5_files = util.list_h5_files(cfg.PT_GPU_DIR)
+            if len(gpu_h5_files) != 0:
+                file_list = sorted(gpu_h5_files.values(), key=lambda x: x.timestamp)
+                self.gpu = H5Util(file_list[0].path, prefix='gpu')
+
+    @staticmethod
+    def get_dump_files_by_name(file_name):
+        """Get dump files by name"""
+        npu_pattern = gen_h5_data_name(file_name, 'npu') if '/' in file_name else file_name
+        gpu_pattern = gen_h5_data_name(file_name, 'gpu') if '/' in file_name else file_name
+        files = util.list_numpy_files(cfg.PT_DUMP_DECODE_DIR, extern_pattern=npu_pattern)
+        files.update(util.list_numpy_files(cfg.PT_DUMP_DECODE_DIR, extern_pattern=gpu_pattern))
+        return files
+
+    def op_dump_summary(self, ir_name):
+        summary_list = []
+        op_id = self._parse_op_id(ir_name)
+        if self.npu is not None:
+            h5_op = self.npu.get_op(op_id)
+            if h5_op is not None:
+                summary_list.append('NPU:')
+                summary_list.append(h5_op.summary())
+        if self.gpu is not None:
+            h5_op = self.gpu.get_op(op_id)
+            if h5_op is not None:
+                summary_list.append('GPU:')
+                summary_list.append(h5_op.summary())
+        return Constant.NEW_LINE.join(summary_list)
+
+    @staticmethod
+    def _parse_op_id(ir_name):
+        op_id = str(ir_name)
+        if op_id.isnumeric():
+            op_id = ir_name
+        else:
+            for name in op_id.split('/'):
+                if name.isnumeric():
+                    op_id = name
+                    break
+        return op_id
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/tf_dump.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/tf_dump.py
new file mode 100644
index 000000000..bb4230f54
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/tf_dump.py
@@ -0,0 +1,141 @@
+# coding=utf-8
+import os
+import re
+import time
+import sys
+from ..util.util import util
+from ..util.constant import Constant
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.precision_tool_exception import PrecisionToolException
+from ..config import config as cfg
+
+
+class TfDump(object):
+    def __init__(self, dump_root=cfg.TF_DUMP_DIR):
+        self.log = util.get_log()
+        self.dump_root = dump_root
+        self.dump_files = None
+
+    def prepare(self):
+        if not os.path.exists(self.dump_root):
+            util.create_dir(self.dump_root)
+        self._parse_dump_files()
+
+    def get_dump_files_by_op(self, op):
+        """Get cpu dump files by op"""
+        tf_files = {}
+        for output in op.outputs():
+            if output.data_dump_origin_name() != '':
+                tf_files.update(self.get_dump_files_by_name(output.data_dump_origin_name()))
+        if len(tf_files) == 0:
+            tf_files.update(self.get_dump_files_by_name(op.name()))
+        return tf_files
+
+    def get_dump_files_by_name(self, name, likely=False):
+        match_name = name.replace('/', '_')
+        if not likely:
+            match_name = match_name.replace('.', '_') + '\\.'
+        tf_files = {}
+        for f in self.dump_files:
+            if re.match(match_name, f):
+                tf_files[f] = self.dump_files[f]
+        return tf_files
+
+    @catch_tool_exception
+    def op_dump_summary(self, op):
+        # cpu dump info
+        if op is None:
+            return ''
+        cpu_dump_txt = ['TfDumpOutput:']
+        cpu_dump_files = self.get_dump_files_by_op(op)
+        cpu_dump_file_list = sorted(cpu_dump_files.values(), key=lambda x: x.timestamp)
+        for cpu_dump_file in cpu_dump_file_list:
+            cpu_dump_txt.append(' -[green][%s][/green] %s' % (cpu_dump_file.idx, cpu_dump_file.file_name))
+            cpu_dump_txt.append('   └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(cpu_dump_file.path))
+        return Constant.NEW_LINE.join(cpu_dump_txt)
+
+    def _parse_dump_files(self):
+        self.dump_files = util.list_cpu_dump_decode_files(self.dump_root)
+
+    def run_tf_dbg_dump(self, cmd_line=None):
+        """Run tf train script to get dump data."""
+        if os.path.exists(cfg.TF_DEBUG_DUMP_DIR) and len(os.listdir(cfg.TF_DEBUG_DUMP_DIR)) != 0:
+            self.log.info("TF offline debug path [%s] is not empty, will analyze it directly." % cfg.TF_DEBUG_DUMP_DIR)
+        elif cmd_line is not None:
+            self.log.info("Run command: %s" % cmd_line)
+            util.execute_command(cmd_line)
+            self.log.info("Run finish, start analyze TF dump.")
+        if not os.path.exists(cfg.TF_DEBUG_DUMP_DIR) or len(os.listdir(cfg.TF_DEBUG_DUMP_DIR)) == 0:
+            raise PrecisionToolException("Empty tf debug dir. %s" % cfg.TF_DEBUG_DUMP_DIR)
+        run_dirs = os.listdir(cfg.TF_DEBUG_DUMP_DIR)
+        run_dirs.sort()
+        # create dirs
+        util.create_dir(cfg.TF_DUMP_DIR)
+        util.create_dir(cfg.TMP_DIR)
+        # extra the last run dir
+        for run_dir in run_dirs:
+            time.sleep(1)
+            command = "%s -m tensorflow.python.debug.cli.offline_analyzer --ui_type readline --dump_dir %s" % (
+                util.python, os.path.join(cfg.TF_DEBUG_DUMP_DIR, run_dir))
+            self._do_run_tf_dbg_dump(command, 0)
+
+    @staticmethod
+    def _make_pt_commands(tensor_name_path):
+        pt_command_list = []
+        tensor_count = {}
+        with open(tensor_name_path) as tensor_name_file:
+            # skip 3 line
+            next(tensor_name_file)
+            next(tensor_name_file)
+            next(tensor_name_file)
+            # start to convert tensor to pt command
+            for line in tensor_name_file:
+                new_line = line.strip()
+                tensor_name = new_line[new_line.rfind(' ') + 1:]
+                if tensor_name not in tensor_count:
+                    tensor_count[tensor_name] = 0
+                else:
+                    tensor_count[tensor_name] += 1
+
+                npy_file_name = "%s.%s.npy" % (tensor_name.replace("/", "_").replace(":", "."),
+                                               str(round(time.time() * 1000000)))
+                pt_command_list.append("pt %s -n %d -w %s" %
+                                       (tensor_name, tensor_count[tensor_name],
+                                        os.path.join(cfg.TF_DUMP_DIR, npy_file_name)))
+        return pt_command_list
+
+    def _do_run_tf_dbg_dump(self, cmd_line, run_times=2):
+        """Run tf debug with pexpect, should set tf debug ui_type='readline'"""
+        try:
+            import pexpect
+            import readline
+        except ImportError as import_err:
+            self.log.error("Import failed with err:%s. You can run "
+                           "'pip3 install pexpect gnureadline pyreadline' to fix it.",
+                           import_err)
+            raise PrecisionToolException("Import module error.")
+        self.log.info("======< Auto run tf train process to dump data >======")
+        self.log.info("Send run times: %d", run_times)
+        tf_dbg = pexpect.spawn(cmd_line)
+        # tf_dbg.logfile = open(cfg.DUMP_FILES_CPU_LOG, 'wb')
+        tf_dbg.logfile = sys.stdout.buffer
+        for i in range(run_times):
+            tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT)
+            self.log.info("Process %d tf_debug.run", i + 1)
+            tf_dbg.sendline('run')
+        self.log.info("Generate tensor name file.")
+        tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT)
+        tf_dbg.sendline('lt > %s' % cfg.TF_TENSOR_NAMES)
+        tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT)
+        if not os.path.exists(cfg.TF_TENSOR_NAMES):
+            self.log.error("Failed to get tensor name in tf_debug.")
+            raise PrecisionToolException("Get tensor name in tf_debug failed.")
+        self.log.info("Save tensor name success. Generate tf dump commands from file: %s", cfg.TF_TENSOR_NAMES)
+        pt_commands = self._make_pt_commands(cfg.TF_TENSOR_NAMES)
+        self.log.info("Pt %d tensors." % len(pt_commands))
+        for cmd in pt_commands:
+            self.log.debug(cmd.strip())
+            tf_dbg.sendline(cmd.strip())
+            tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT)
+        tf_dbg.sendline('exit')
+        self.log.info('Finish dump tf data')
-- 
Gitee


From 7402ffda3526e9ba678f5d32c2679a8cf9c55570 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:44:44 +0000
Subject: [PATCH 30/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/graph/desc.py          | 154 +++++++++
 .../precision_tool/lib/graph/graph_manager.py | 111 ++++++
 .../precision_tool/lib/graph/npu_graph.py     | 316 ++++++++++++++++++
 .../precision_tool/lib/graph/op.py            | 255 ++++++++++++++
 .../precision_tool/lib/graph/tf_graph.py      |  45 +++
 5 files changed, 881 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/desc.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/graph_manager.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/npu_graph.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/op.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/tf_graph.py

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/desc.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/desc.py
new file mode 100644
index 000000000..5ac35dbfd
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/desc.py
@@ -0,0 +1,154 @@
+# coding=utf-8
+from ..util.util import util
+
+ATTR = 'attr'
+ATTR_KEY = 'key'
+ATTR_VALUE = 'value'
+DATA_DUMP_ORIGIN_OUTPUT_INDEX = '_datadump_origin_output_index'
+FUSION_ORIGIN_OUTPUT_INDEX = '_fusion_origin_output_index'
+DATA_DUMP_ORIGIN_NAME = '_datadump_origin_name'
+ORIGIN_FORMAT = 'origin_format'
+ORIGIN_SHAPE = 'origin_shape'
+VALUE_RANGE = 'value_range'
+SHAPE_RANGE = 'shape_range'
+DT_STRING = 's'
+DT_INT = 'i'
+DT_LIST_LIST_INT = 'list_list_int'
+DT_LIST_LIST_I = 'list_list_i'
+DT_LIST = 'list'
+DT_LIST_INT = 'list_i'
+DATA_TYPE_DEFAULT_VALUE = {
+    'i': 0,
+    's': ''
+}
+
+
+class Desc(object):
+    """ Op desc
+        shape: data shape
+        dtype: data type
+        format: data format
+        npu_file: npu file name/path
+        cpu_file: cpu file name/path
+        idx: input idx
+    """
+    def __init__(self, desc_json, index):
+        self.desc_json = desc_json
+        self.index = index
+        self.log = util.get_log()
+
+    def idx(self):
+        return self.index
+
+    def shape(self):
+        return self.desc_json['shape']['dim'] if 'shape' in self.desc_json else []
+
+    def dtype(self):
+        return self.desc_json['dtype'] if 'dtype' in self.desc_json else ''
+
+    def format(self):
+        return self.desc_json['layout'] if 'layout' in self.desc_json else []
+
+    def origin_shape(self):
+        return self._get_attr_list(ORIGIN_SHAPE, DT_INT)
+
+    def origin_format(self):
+        return self._get_attr(ORIGIN_FORMAT, DT_STRING)
+
+    def value_range(self):
+        return self._get_attr_list_list(VALUE_RANGE, DT_LIST_INT)
+
+    def shape_range(self):
+        return self._get_attr_list_list(SHAPE_RANGE, DT_LIST_INT)
+
+    def _get_attr_list_list(self, key, data_type):
+        val = self._get_attr_base(key, DT_LIST_LIST_INT)
+        if val is None or DT_LIST_LIST_I not in val:
+            return []
+        res = []
+        for item in val[DT_LIST_LIST_I]:
+            if data_type in item:
+                res.append(item[data_type])
+        return res
+
+    def _get_attr_list(self, key, data_type):
+        val = self._get_attr_base(key, DT_LIST)
+        return val[data_type] if val is not None and data_type in val else []
+
+    def _get_attr(self, key, data_type):
+        val = self._get_attr_base(key, data_type)
+        return val if val is not None else DATA_TYPE_DEFAULT_VALUE[data_type]
+
+    def _get_attr_base(self, key, data_type):
+        if ATTR in self.desc_json:
+            for attr in self.desc_json[ATTR]:
+                if attr[ATTR_KEY] == key:
+                    if attr[ATTR_VALUE] is not None and data_type in attr[ATTR_VALUE]:
+                        return attr[ATTR_VALUE][data_type]
+        return None
+
+    def compare(self, right_desc):
+        if self.dtype() == right_desc.dtype() and self.format() == right_desc.format():
+            return "[green][%d] [%s][%s] %s[/green]" % (self.idx(), self.dtype(), self.format(), self.shape()), True
+        else:
+            return "[yellow][%d] [%s][%s] %s | [%s][%s] %s[/yellow]" % (
+                self.idx(), self.dtype(), self.format(), self.shape(),
+                right_desc.dtype(), right_desc.format(), right_desc.shape()), False
+
+    def data_dump_origin_name(self):
+        return ''
+
+
+class InputDesc(Desc):
+    def __init__(self, name, desc_json, index):
+        super(InputDesc, self).__init__(desc_json, index)
+        if name == '':
+            self.log.warning('invalid input name.')
+        name_info = name.split(':')
+        self.op_name = name
+        self.peer_index = -2
+        if len(name_info) == 2:
+            self.op_name = name_info[0]
+            self.peer_index = int(name_info[1])
+
+    def name(self):
+        return self.op_name
+
+    def peer_idx(self):
+        return self.peer_index
+
+    def is_control(self):
+        return self.peer_index == -1
+
+    def summary(self, origin_txt=False):
+        """idx | dtype | format | shape | [blue]value_range | shape_range| origin_shape[/blue] | op_name | peer_idx"""
+        if origin_txt:
+            return "[%d][%s][%s]%s %s:%d" % (self.idx(), self.dtype(), self.format(),
+                                             self.shape(), self.name(), self.peer_idx())
+        return "[green][%d][/green][yellow][%s][%s]%s[/yellow][blue] %s %s %s[/blue] %s:%d" % (
+            self.idx(), self.dtype(), self.format(), self.shape(),
+            self.value_range(), self.shape_range(), self.origin_shape(), self.name(), self.peer_idx())
+
+
+class OutputDesc(Desc):
+    def __init__(self, name, desc_json, index):
+        super(OutputDesc, self).__init__(desc_json, index)
+        if name == '':
+            self.log.warning('invalid output name.')
+        self.op_names = name.split(':')
+
+    def names(self):
+        return self.op_names
+
+    def summary(self, origin_txt=False):
+        if origin_txt:
+            return "[%d][%s][%s]%s %s" % (self.idx(), self.dtype(), self.format(), self.shape(), self.names())
+        return "[green][%d][/green][yellow][%s][%s]%s[/yellow][blue] %s %s %s[/blue] %s" % (
+            self.idx(), self.dtype(), self.format(), self.shape(),
+            self.value_range(), self.shape_range(), self.origin_shape(), self.names())
+
+    def data_dump_origin_name(self):
+        return self._get_attr(DATA_DUMP_ORIGIN_NAME, DT_STRING)
+
+    def data_dump_origin_output_index(self):
+        return self._get_attr(DATA_DUMP_ORIGIN_OUTPUT_INDEX, DT_INT)
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/graph_manager.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/graph_manager.py
new file mode 100644
index 000000000..cef861741
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/graph_manager.py
@@ -0,0 +1,111 @@
+# coding=utf-8
+"""
+Graph Manager
+"""
+import os
+import collections
+from ..util.constant import Constant
+from .npu_graph import NpuGraph
+from .tf_graph import TfGraph
+from ..util.util import util
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.precision_tool_exception import PrecisionToolException
+from ..config import config as cfg
+
+
+class GraphManager(object):
+    def __init__(self):
+        self.log = util.get_log()
+        self.npu_graphs = collections.OrderedDict()
+        self.tf_graph = None
+
+    def prepare(self):
+        # prepare npu graphs
+        if not os.path.exists(cfg.NPU_DIR):
+            util.create_dir(cfg.NPU_DIR)
+        sub_dirs = os.listdir(cfg.NPU_DIR)
+        if len(sub_dirs) == 0:
+            # create default dir
+            sub_dirs = [Constant.DEFAULT_DEBUG_ID]
+        for sub_dir in sub_dirs:
+            npu_graph = NpuGraph(sub_dir)
+            npu_graph.prepare()
+            self.npu_graphs[sub_dir] = npu_graph
+        # prepare cpu graph
+        self.tf_graph = TfGraph(cfg.TF_GRAPH_DIR)
+
+    def check_cast(self):
+        for graph in self.npu_graphs.values():
+            graph.check_cast()
+
+    def check_dtype(self):
+        for graph in self.npu_graphs.values():
+            graph.check_dtype()
+
+    def check_similarity(self):
+        self._check_npu_graph_similarity()
+
+    def _check_npu_graph_similarity(self):
+        """Check npu graph similarity"""
+        if len(self.npu_graphs) < 2:
+            self.log.debug("Only one npu debug, no need to check npu graph similarity.")
+            return
+        left_graphs = self.npu_graphs[Constant.DEFAULT_DEBUG_ID].sub_graphs
+        right_graphs = self.npu_graphs[Constant.NPU_DEBUG_ID_1].sub_graphs
+        for left_graph in left_graphs.values():
+            for right_graph in right_graphs.values():
+                if left_graph.graph_id != right_graph.graph_id:
+                    continue
+                left_graph.compare(right_graph)
+
+    def get_graphs(self, debug_id):
+        if debug_id not in self.npu_graphs:
+            raise PrecisionToolException("Get graphs failed with no debug_id:%s" % debug_id)
+        return self.npu_graphs[debug_id].build_json_files
+
+    def get_ops(self, op_name, graph_name=None):
+        """ Get npu/tf ops by op_name
+        :param op_name: op name
+        :param graph_name: graph name
+        :return: npu op dict: debug_id->Op, tf op
+        """
+        npu_ops = collections.OrderedDict()
+        for debug_id, npu_graph in self.npu_graphs.items():
+            npu_ops[debug_id] = npu_graph.get_op(op_name, graph_name)
+        # tf graph op
+        return npu_ops, None
+
+    def print_op_list(self, op_type='', op_name='', pass_name='', kernel_name=''):
+        if op_type == '' and op_name == '' and pass_name == '' and kernel_name == '':
+            table_list = []
+            for debug_id, graph in self.npu_graphs.items():
+                table = util.create_table(debug_id, ["OpType", "Count"])
+                op_type_counter = collections.Counter()
+                for op in graph.ops_list:
+                    op_type_counter[op.type()] += 1
+                for op_types, count in op_type_counter.items():
+                    table.add_row(op_types, str(count))
+                table_list.append(table)
+            util.render(util.create_columns(table_list))
+
+        else:
+            for debug_id, graph in self.npu_graphs.items():
+                ops = graph.list_ops(op_type, op_name, pass_name, kernel_name)
+                ops_txt = ['[green][%s][/green][yellow][%s][/yellow] %s' % (
+                    op.type(), op.pass_name(), op.name()) for op in ops]
+                util.print_panel(Constant.NEW_LINE.join(ops_txt), debug_id)
+
+    @staticmethod
+    def op_graph_summary(ops, attr_detail=False):
+        npu_summary = collections.OrderedDict()
+        for debug_id, op in ops.items():
+            npu_summary[debug_id] = collections.OrderedDict()
+            for op_detail in op:
+                npu_summary[debug_id][op_detail.graph_name] = op_detail.summary(attr_detail=attr_detail)
+        return npu_summary, None
+
+    def save_sub_graph(self, ops, deep):
+        for debug_id, op in ops.items():
+            if debug_id in self.npu_graphs:
+                for op_detail in op:
+                    self.npu_graphs[debug_id].save_sub_graph(op_detail, deep)
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/npu_graph.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/npu_graph.py
new file mode 100644
index 000000000..5a93b9718
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/npu_graph.py
@@ -0,0 +1,316 @@
+# coding=utf-8
+"""
+Graph Manager
+"""
+import json
+import os
+import collections
+import time
+from .op import Op
+from ..util.util import util
+from ..util.constant import Constant
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.precision_tool_exception import PrecisionToolException
+from ..config import config as cfg
+
+DANGEROUS_CAST = {
+    'DT_FLOAT': ['DT_INT32']
+}
+
+NO_DIG_OPS = ['AtomicAddrClean', 'NetOutput']
+CKPT_META_SHUFFIX='.meta'
+
+OP_CAST = 'Cast'
+
+
+class NpuSubGraph(object):
+    def __init__(self, graph_json, build_file, npu_graph):
+        self.log = util.get_log()
+        self.graph_name = graph_json['name']
+        self.npu_graph = npu_graph
+        self.graph = graph_json
+        self.build_file = build_file
+        self.ops_list = collections.OrderedDict()
+        self.ops_type_list = {}
+        self._prepare()
+        self.graph_id = self._get_graph_id()
+
+    def _prepare(self):
+        self.log.debug("Graph %s operator count: %d" % (self.graph_name, len(self.graph['op'])))
+        for op_json in self.graph['op']:
+            op_name = op_json['name']
+            op_type = op_json['type']
+            if op_name not in self.ops_list:
+                self.ops_list[op_name] = []
+            op = Op(op_json, self.ops_list, self.graph['name'], self.npu_graph, self)
+            if op_type not in self.ops_type_list:
+                self.ops_type_list[op_type] = {}
+            self.ops_list[op_name] = op
+            self.ops_type_list[op_type][op_name] = op
+
+    def _get_graph_id(self):
+        if 'attr' in self.graph:
+            for item in self.graph['attr']:
+                if item['key'] == '_session_graph_id':
+                    return item['value']['s']
+        self.log.warning("Unknown sub graph id.")
+        return "UNKNOWN"
+
+    def compare(self, sub_graph):
+        """compare with another sub graph"""
+        if not isinstance(sub_graph, NpuSubGraph):
+            raise PrecisionToolException("Should compare with another subgraph.")
+        right_ops_list = sub_graph.ops_list
+        ignore_ops = ["TransData", "Cast", "Recv", "Send", "Variable", "NetOutput", "NoOp", "Assign", "Constant",
+                      "StreamActive"]
+        similar_count = 0
+        for op_name in self.ops_list:
+            if self.ops_list[op_name].type() in ignore_ops:
+                continue
+            if op_name not in right_ops_list:
+                self.log.warning("Can not Find [%s] %s in right subgraph.", self.ops_list[op_name].type(), op_name)
+                continue
+            result, similar = self.ops_list[op_name].compare(right_ops_list[op_name])
+            if not similar:
+                util.print_panel(result, title=op_name)
+            else:
+                similar_count += 1
+        for op_name in right_ops_list:
+            if right_ops_list[op_name].type() in ignore_ops:
+                continue
+            if op_name not in self.ops_list:
+                self.log.warning("Can not Find [%s] %s in left subgraph.", right_ops_list[op_name].type(), op_name)
+        self.log.info("Compare [%s] [%s], similarity is [%s / %s]",
+                      self.graph_name, sub_graph.graph_name, similar_count, len(self.ops_list))
+
+    def get_op(self, name):
+        if name in self.ops_list:
+            return [self.ops_list[name]]
+        guess_op_list = []
+        for op_detail in self.ops_list.values():
+            if name in op_detail.name() or name == str(op_detail.name()).replace('/', '_'):
+                guess_op_list.append(op_detail)
+        return guess_op_list
+
+    def get_parent_node_by_subgraph_name(self, graph_name):
+        ops = []
+        for op_detail in self.ops_list.values():
+            if graph_name in op_detail.subgraph_names():
+                ops.append(op_detail)
+        return ops
+
+    def get_op_by_type(self, op_type):
+        ops = []
+        for op_detail in self.ops_list.values():
+            if op_type == op_detail.type():
+                ops.append(op_detail)
+        return ops
+
+    def check_cast(self):
+        cast_list = {}
+        danger_cast_list = {}
+        if OP_CAST in self.ops_type_list:
+            cast_ops = self.ops_type_list[OP_CAST]
+            for op in cast_ops.values():
+                input_type = ''
+                output_type = ''
+                for input_desc in op.inputs():
+                    input_type = input_desc.dtype() if input_desc.dtype() != '' else input_type
+                for output_desc in op.outputs():
+                    output_type = output_desc.dtype() if output_desc.dtype() != '' else output_type
+                cast_type = "%s -> %s" % (input_type, output_type)
+                if cast_type not in cast_list:
+                    cast_list[cast_type] = []
+                cast_list[cast_type].append(op.name())
+        for cast_type in cast_list:
+            if self._is_dangerous_cast(cast_type):
+                summary_txt = "[green][Cast][/green][red][%s][/red] %s" % (cast_type, cast_list[cast_type])
+                util.print(summary_txt)
+
+    @staticmethod
+    def _is_dangerous_cast(cast_type):
+        """Check if cast """
+        cast_info = cast_type.split(" -> ")
+        input_dtype = cast_info[0]
+        output_dtype = cast_info[1]
+        if input_dtype in DANGEROUS_CAST:
+            if output_dtype in DANGEROUS_CAST[input_dtype]:
+                return True
+        return False
+
+
+class NpuGraph(object):
+    def __init__(self, debug_id=Constant.DEFAULT_DEBUG_ID):
+        self.log = util.get_log()
+        self.build_files = None
+        self.build_json_files = []
+        self.debug_id = debug_id
+        self.npu_root = os.path.join(cfg.NPU_DIR, debug_id)
+        self.graph_root = os.path.join(self.npu_root, Constant.GRAPH)
+        self.sub_graphs = collections.OrderedDict()
+        self.ops_list = []
+        util.create_dir(self.graph_root)
+
+    @catch_tool_exception
+    def prepare(self):
+        """prepare"""
+        self._prepare_npu_graphs()
+        if self.build_files is not None:
+            for build_file in self.build_files:
+                self._parse_ops(build_file)
+
+    def check_cast(self):
+        """Check cast op type"""
+        for sub_graph in self.sub_graphs.values():
+            sub_graph.check_cast()
+
+    def check_dtype(self):
+        """Check op input/output dtype"""
+        for op in self.ops_list:
+            input_dtype = ''
+            for input_desc in op.inputs():
+                input_dtype += ' ' + input_desc.dtype()
+            output_dtype = ''
+            for output_desc in op.outputs():
+                output_dtype += ' ' + output_desc.dtype()
+            util.print('[green][%s][/green] %s\n - Input:  %s\n - Output: %s' % (
+                op.type(), op.name(), input_dtype, output_dtype))
+
+    def check_similarity(self):
+        """Check graph similarity."""
+
+    @catch_tool_exception
+    def save_sub_graph(self, op, deep=0, dump_manager=None, compare_manager=None):
+        """Save sub graph"""
+        if op is None:
+            raise PrecisionToolException("Save sub graph failed as root operator is None.")
+        try:
+            from graphviz import Digraph
+            file_name_list = [self.debug_id, op.graph_name, op.type(), op.name().replace('/', '_').replace('.', '_'),
+                              str(deep), 'gv']
+            file_name = '.'.join(file_name_list)
+            path = os.path.join(cfg.OP_GRAPH_DIR, file_name)
+            dot = Digraph(file_name, filename=path, node_attr={'shape': 'Mrecord'}, format='svg')
+            dot_list = []
+            edge_list = []
+            self._gen_sub_graph(dot, op, deep, dot_list, edge_list, 'red', direction='all',
+                                dump_manager=dump_manager, compare_manager=compare_manager)
+            dot.format = 'svg'
+            dot.save(path)
+            self.log.info("Sub graph saved to %s" % os.path.abspath(cfg.OP_GRAPH_DIR))
+            try:
+                dot.view(path)
+                time.sleep(1)
+            except Exception as err:
+                raise PrecisionToolException(
+                    "graphviz not install, use [yum/apt-get] install graphviz xdg-utils. %s" % err)
+        except ImportError as err:
+            raise PrecisionToolException("Save sub graph failed as import graphviz module failed. %s" % err)
+
+    def _gen_sub_graph(self, dot, op, deep, dot_list, edge_list, color='black', direction='all',
+                       dump_manager=None, compare_manager=None):
+        """Gen sub graph"""
+        if deep == 0 or op.type() in NO_DIG_OPS:
+            return
+        if op.name() not in dot_list:
+            dot.node(op.name(), self._gen_sub_graph_label(op), color=color, tooltip=op.summary(True))
+            dot_list.append(op.name())
+        # add input and output
+        for desc in op.inputs():
+            sub_op = self.get_op(desc.name(), op.graph_name)
+            if len(sub_op) != 0:
+                sub_op = sub_op[0]
+                if direction in ['all', 'input']:
+                    self._gen_sub_graph(dot, sub_op, deep - 1, dot_list, edge_list, direction='input')
+                if sub_op.name() in dot_list:
+                    src_edge = '%s:o%d' % (sub_op.name(), desc.peer_idx())
+                else:
+                    dot.node(sub_op.name(), self._gen_sub_graph_label(sub_op), color=color, tooltip=op.summary(True))
+                    src_edge = '%s:o%d' % (sub_op.name(), desc.peer_idx())
+                dst_edge = '%s:i%d' % (op.name(), desc.idx())
+                if src_edge + dst_edge not in edge_list:
+                    dot.edge(src_edge, dst_edge)
+                    edge_list.append(src_edge + dst_edge)
+        # add output
+        for desc in op.outputs():
+            for out_node_name in desc.names():
+                sub_op = self.get_op(out_node_name, op.graph_name)
+                if len(sub_op) != 0 and direction in ['all', 'output']:
+                    sub_op = sub_op[0]
+                    self._gen_sub_graph(dot, sub_op, deep - 1, dot_list, edge_list, direction='output')
+
+    def _gen_sub_graph_label(self, op):
+        input_labels = []
+        for desc in op.inputs():
+            input_labels.append(self._gen_sub_graph_desc(desc, 'i'))
+        output_labels = []
+        for desc in op.outputs():
+            output_labels.append(self._gen_sub_graph_desc(desc, 'o'))
+        str_cell = '|'
+        return '{{ %s } | [%s] %s | { %s }}' % (str_cell.join(input_labels), op.type(), op.name(),
+                                                str_cell.join(output_labels))
+
+    @staticmethod
+    def _gen_sub_graph_desc(desc, id_prefix):
+        desc_str = r'<%s%d> [%d]' % (id_prefix, desc.idx(), desc.idx())
+        desc_str = r'%s [%s]' % (desc_str, desc.dtype()) if desc.dtype() != '' else desc_str
+        desc_str = r'%s\n%s' % (desc_str, desc.shape()) if len(desc.shape()) != 0 else desc_str
+        return desc_str
+
+    def list_ops(self, op_type='', op_name='', pass_name='', kernel_name=''):
+        """list ops in graph"""
+        return filter(lambda op: op_type in op.type() and op_name in op.name() and (
+                pass_name == '' or pass_name in op.pass_name()) and kernel_name in op.kernel_name(), self.ops_list)
+
+    def get_op(self, name, graph_name=None):
+        """get op by name"""
+        # get op in specific sub graph
+        if graph_name is not None and graph_name in self.sub_graphs:
+            return self.sub_graphs[graph_name].get_op(name)
+        ops = []
+        for sub_graph in self.sub_graphs.values():
+            ops.extend(sub_graph.get_op(name))
+        # check if there is an exact match operation
+        match_ops = list(filter(lambda x: x.name() == name, ops))
+        if len(match_ops) != 0:
+            return match_ops
+        # return guess operations by name
+        self.log.info("Can not find Operator named %s. You may mean the operator bellow.", name)
+        guess_op_name_list = ['[green][%s][/green] %s' % (x.type(), x.name()) for x in ops]
+        util.print_panel(Constant.NEW_LINE.join(guess_op_name_list), title='Possible Operators')
+        return ops
+
+    def get_parent_node_by_subgraph_name(self, graph_name):
+        ops = []
+        for sub_graph in self.sub_graphs.values():
+            ops.extend(sub_graph.get_parent_node_by_subgraph_name(graph_name))
+        return ops
+
+    def _prepare_npu_graphs(self):
+        """prepare ge graphs  """
+        # move graphs to precision data dir
+        graph_files = util.list_ge_graph_files(self.graph_root)
+        self.build_files = sorted(filter(lambda x: x.graph_name == cfg.BUILD_JSON_GRAPH_NAME, graph_files.values()),
+                                  key=lambda x: x.graph_id)
+        if len(self.build_files) == 0:
+            self.log.warning("Can not find any build files in dir: %s", self.graph_root)
+        self.log.info("Find [%d] GE build files.", len(self.build_files))
+
+    @catch_tool_exception
+    def _parse_ops(self, build_file):
+        """Parse *_Build.txt.json to op objects."""
+        build_file_json = build_file.path + '.json'
+        build_file_json = util.convert_proto_to_json(build_file.path, build_file_json)
+        if build_file_json is not None:
+            self.build_json_files.append(build_file_json)
+        with open(build_file_json, 'r') as f:
+            graph_json = json.load(f)
+            if 'graph' not in graph_json:
+                raise PrecisionToolException("No graph in file: %s" % build_file.file_name)
+            if len(graph_json['graph']) != 1:
+                self.log.warning("There are more then one graph in ge build file, find %d" % len(graph_json['graph']))
+            # sub_graphs = []
+            for graph in graph_json['graph']:
+                npu_sub_graph = NpuSubGraph(graph, build_file, self)
+                self.sub_graphs[graph['name']] = npu_sub_graph
+                self.ops_list.extend(npu_sub_graph.ops_list.values())
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/op.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/op.py
new file mode 100644
index 000000000..2dbd5a31b
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/op.py
@@ -0,0 +1,255 @@
+# coding=utf-8
+import json
+import re
+from typing import List
+from .desc import InputDesc
+from .desc import OutputDesc
+from ..util.util import util
+from ..util.constant import Constant
+from ..util.precision_tool_exception import PrecisionToolException
+
+NO_INPUT_NODES = ['Data', 'AtomicAddrClean', 'Recv', 'Constant']
+NO_OUTPUT_NODES = ['Send', 'Recv', 'NetOutput', 'PartitionedCall']
+
+JSON_KEY_NAME = 'name'
+JSON_KEY_ID = 'id'
+JSON_KEY_TYPE = 'type'
+JSON_KEY_ATTR = 'attr'
+JSON_KEY = 'key'
+JSON_VALUE = 'value'
+JSON_KEY_LIST = 'list'
+JSON_KEY_STR = 's'
+JSON_KEY_INT = 'i'
+JSON_KEY_INPUT_I = 'input_i'
+JSON_KEY_OUTPUT_I = 'output_i'
+JSON_KEY_PASS_NAME = 'pass_name'
+JSON_KEY_DATA_DUMP_ORIGINAL_OP_NAMES = '_datadump_original_op_names'
+JSON_KEY_GE_ATTR_OP_KERNEL_LIB_NAME = "_ge_attr_op_kernel_lib_name"
+JSON_KEY_PARENT_NODE_INDEX = "_parent_node_index"
+JSON_KEY_SUBGRAPH_NAME = "subgraph_name"
+
+KERNEL_NAME_SHUFFIX = '_kernelname'
+
+
+class Op(object):
+    """ Op class.
+        name: op name
+        type: op type
+        inputs: list of input descs
+        outputs: list of output descs
+    """
+    def __init__(self, op_json, op_list, graph_name, npu_graph, sub_graph):
+        """Init"""
+        self.op_json = op_json
+        self.op_list = op_list
+        self.graph_name = graph_name
+        self.npu_graph = npu_graph
+        self.sub_graph = sub_graph
+        self.input_list = None
+        self.output_list = None
+        self.log = util.get_log()
+
+    def name(self):
+        """Get op name"""
+        return self.op_json[JSON_KEY_NAME]
+
+    def id(self):
+        """Get op id"""
+        return self.op_json[JSON_KEY_ID] if JSON_KEY_ID in self.op_json else ''
+
+    def json(self):
+        return json.dumps(self.op_json, indent=2)
+
+    def type(self):
+        """Get op type"""
+        return self.op_json[JSON_KEY_TYPE]
+
+    def subgraph_names(self):
+        return self.op_json[JSON_KEY_SUBGRAPH_NAME] if JSON_KEY_SUBGRAPH_NAME in self.op_json else []
+
+    def inputs(self):
+        """Get the input list"""
+        if self.input_list is None:
+            self._parse_inputs()
+        if len(self.input_list) == 0 and self.type() == 'Data':
+            # Looking for Real Data
+            self._looking_for_real_inputs()
+        return self.input_list
+
+    def input_addr(self):
+        return self.op_json[JSON_KEY_INPUT_I]
+
+    def outputs(self):
+        """Get output list"""
+        if self.output_list is None:
+            self._parse_outputs()
+        if len(self.output_list) == 0 and self.type() == 'PartitionedCall':
+            self._looking_for_real_outputs()
+        return self.output_list
+
+    def output_addr(self):
+        return self.op_json[JSON_KEY_OUTPUT_I]
+
+    def pass_name(self):
+        return self._attr(JSON_KEY_PASS_NAME)
+
+    def kernel_name(self):
+        return self._attr(self.name() + KERNEL_NAME_SHUFFIX)
+
+    def ge_attr_op_kernel_lib_name(self):
+        return self._attr(JSON_KEY_GE_ATTR_OP_KERNEL_LIB_NAME)
+
+    def data_dump_original_op_names(self):
+        return self._attr(JSON_KEY_DATA_DUMP_ORIGINAL_OP_NAMES)
+
+    def parent_node_index(self):
+        return self._attr(JSON_KEY_PARENT_NODE_INDEX)
+
+    def _attr(self, key):
+        if JSON_KEY_ATTR in self.op_json:
+            for attr in self.op_json[JSON_KEY_ATTR]:
+                if key == attr[JSON_KEY]:
+                    if JSON_KEY_STR in attr[JSON_VALUE]:
+                        return attr[JSON_VALUE][JSON_KEY_STR]
+                    elif JSON_KEY_LIST in attr[JSON_VALUE]:
+                        if JSON_KEY_STR in attr[JSON_VALUE][JSON_KEY_LIST]:
+                            return attr[JSON_VALUE][JSON_KEY_LIST][JSON_KEY_STR]
+                    elif JSON_KEY_INT in attr[JSON_VALUE]:
+                        return attr[JSON_VALUE][JSON_KEY_INT]
+                    else:
+                        self.log.warning("Unknown attr format: %s", attr[JSON_VALUE])
+        return ''
+
+    def compare(self, right_op):
+        """Compare with another op"""
+        if not isinstance(right_op, Op):
+            raise PrecisionToolException("Should compare with another op.")
+        res_str = ['LeftOp(Type/Name) : [green][%s][/green] %s' % (self.type(), self.name()),
+                   'RightOp(Type/Name): [green][%s][/green] %s' % (right_op.type(), right_op.name())]
+        similar = True
+        if len(self.inputs()) != len(right_op.inputs()):
+            res_str.append("Input: [yellow]Input num mismatch.[/yellow]")
+        else:
+            res_str.append("Input:")
+        for left_input in self.inputs():
+            for right_input in right_op.inputs():
+                if left_input.idx() != right_input.idx():
+                    continue
+                txt, input_similar = left_input.compare(right_input)
+                res_str.append(' - ' + txt)
+                similar = similar and input_similar
+        if len(self.outputs()) != len(right_op.outputs()):
+            res_str.append("Output: [yellow]Output num mismatch.[/yellow]")
+        else:
+            res_str.append("Output:")
+        for left_output in self.outputs():
+            for right_output in right_op.outputs():
+                if left_output.idx() != right_output.idx():
+                    continue
+                txt, output_similar = left_output.compare(right_output)
+                res_str.append(' - ' + txt)
+                similar = similar and output_similar
+        return Constant.NEW_LINE.join(res_str), similar
+
+    def _attr_detail(self):
+        """Gen attr details"""
+        res_str = []
+        if JSON_KEY_ATTR in self.op_json:
+            res_str = [' ' + str(i) for i in self.op_json[JSON_KEY_ATTR]]
+        return Constant.NEW_LINE.join(res_str)
+
+    def summary(self, origin_txt=False, attr_detail=False):
+        """Summary of current op"""
+        res_str = ['Op(Type/Name): [green][%s][/green] %s' % (self.type(), self.name()),
+                   'ID:    [yellow]%s[/yellow]' % self.id(),
+                   'KernelName:    [yellow]%s[/yellow]' % self.kernel_name(),
+                   'KernelLibName: [yellow]%s[/yellow]' % self.ge_attr_op_kernel_lib_name(),
+                   'GraphName:     [yellow]%s[/yellow]' % self.graph_name]
+        pass_name = self.pass_name()
+        if pass_name != '':
+            res_str.append('PassName: [yellow]%s[/yellow]' % pass_name)
+        origin_op = self.data_dump_original_op_names()
+        if origin_op != '':
+            res_str.append('OriginalOp: %s' % origin_op)
+        if attr_detail:
+            res_str.append(self._attr_detail())
+        res_str.append('InputAddr : [yellow]%s[/yellow]' % self.input_addr())
+        res_str.append('OutputAddr: [yellow]%s[/yellow]' % self.output_addr())
+        res_str.append('Input:%s' % InputDesc.summary.__doc__)
+        for i in self.inputs():
+            res_str.append(' -' + i.summary(origin_txt))
+        res_str.append('Output:')
+        for i in self.outputs():
+            res_str.append(' -' + i.summary(origin_txt))
+        return Constant.NEW_LINE.join(res_str)
+
+    def _parse_inputs(self):
+        """ parse input desc in graph """
+        self.input_list = []
+        if 'input' not in self.op_json:
+            if self.type() not in NO_INPUT_NODES:
+                self.log.warning('Parse Op[%s][%s] inputs error.' % (self.type(), self.name()))
+            return self.input_list
+        desc_index = 0
+        for i in range(len(self.op_json['input'])):
+            name = self.op_json['input'][i]
+            if name == '':
+                # if self.type() not in NO_INPUT_NODES:
+                # self.log.warning('invalid input name.')
+                continue
+            name_info = name.split(':')
+            if len(name_info) == 2 and int(name_info[1]) == -1:
+                # control edge
+                self.input_list.append(InputDesc(name, [], i))
+            else:
+                self.input_list.append(InputDesc(name, self.op_json['input_desc'][desc_index], i))
+                desc_index += 1
+        self.input_list.sort(key=lambda x: x.index)
+        return self.input_list
+
+    def _parse_outputs(self):
+        """ parse output desc in graph """
+        self.output_list = []
+        if 'dst_index' not in self.op_json:
+            if self.type() not in NO_OUTPUT_NODES:
+                self.log.warning('Parse Op[%s][%s] outputs error.' % (self.type(), self.name()))
+            return self.output_list
+        desc_index = 0
+        for i in range(len(self.op_json['dst_index'])):
+            dst_name = self.op_json['dst_name'][i]
+            if self.op_json['dst_index'][i] == -1:
+                # control edge
+                self.output_list.append(OutputDesc(dst_name, [], -1))
+            else:
+                self.output_list.append(OutputDesc(dst_name, self.op_json['output_desc'][desc_index], desc_index))
+                desc_index += 1
+        self.output_list.sort(key=lambda x: x.index)
+        return self.output_list
+
+    def _looking_for_real_inputs(self):
+        """Find real inputs of subgraph data node."""
+        graph_name = self.graph_name
+        parent_node_idx = self.parent_node_index()
+        parent_nodes = self.npu_graph.get_parent_node_by_subgraph_name(graph_name)
+        self.log.debug("Find %s parent nodes." % len(parent_nodes))
+        for parent_node in parent_nodes:
+            inputs = parent_node.inputs()
+            if len(inputs) <= parent_node_idx:
+                self.log.warning("Parent node has %d inputs, bug need index %d" % (len(inputs), parent_node_idx))
+                continue
+            self.input_list.append(inputs[parent_node_idx])
+
+    def _looking_for_real_outputs(self):
+        """Find real outputs of PartitionedCall Node"""
+        subgraph_names = self.subgraph_names()
+        for subgraph_name in subgraph_names:
+            net_output_with_subgraph_name = subgraph_name + '_Node_Output'
+            net_output_nodes = self.npu_graph.get_op(net_output_with_subgraph_name)
+            self.log.debug("Find %s net output nodes, just need one." % len(net_output_nodes))
+            self.log.info("Note: PartitionCall output nodes is the node connect to PartitionCall from inside.")
+            for output_node in net_output_nodes:
+                self.output_list = output_node.inputs()
+
+
+
+
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/tf_graph.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/tf_graph.py
new file mode 100644
index 000000000..acf8c8920
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/tf_graph.py
@@ -0,0 +1,45 @@
+# coding=utf-8
+import collections
+import logging
+import os
+from ..util.util import util
+from ..util.precision_tool_exception import catch_tool_exception
+from ..util.precision_tool_exception import PrecisionToolException
+from ..config import config as cfg
+
+CKPT_META_SHUFFIX='.meta'
+
+
+class TfGraph(object):
+    def __init__(self, graph_root=cfg.TF_GRAPH_DIR):
+        """"""
+        self.graph_root = graph_root
+        self.log = util.get_log()
+        self.op_list = collections.OrderedDict()
+
+    @catch_tool_exception
+    def get_op_list(self, ckpt_path=None):
+        if self.op_list is None:
+            self._convert_ckpt_to_graph(ckpt_path)
+        return self.op_list
+
+    def _convert_ckpt_to_graph(self, ckpt_path):
+        log_level = self.log.level
+        try:
+            self.log.setLevel('ERROR')
+            import tensorflow as tf
+            self.log.setLevel(log_level)
+        except ImportError as err:
+            self.log.setLevel(log_level)
+            raise PrecisionToolException("Import tensorflow failed.")
+        meta_files = util.list_cpu_graph_files(ckpt_path)
+        if len(meta_files) == 0:
+            raise PrecisionToolException("Can not find any ckpt meta files.")
+        file_list = sorted(meta_files.values(), key=lambda x: x['timestamp'])
+        ckpt_file = file_list[-1]
+        self.log.info("Find %d tf ckpt meta files, choose [%s]" % (len(meta_files), ckpt_file['file_name']))
+        self.op_list = collections.OrderedDict()
+        saver = tf.train.import_meta_graph(ckpt_file['path'], clear_devices=True)
+        graph = tf.get_default_graph()
+        for op in graph.get_operations():
+            self.op_list[op.name] = op
-- 
Gitee


From 5e2aeaea3b64d9f24789e29fda0ed9547e15e691 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:46:17 +0000
Subject: [PATCH 31/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../lib/train/train_analysis.py               | 112 ++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/train/train_analysis.py

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/train/train_analysis.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/train/train_analysis.py
new file mode 100644
index 000000000..b7547d677
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/train/train_analysis.py
@@ -0,0 +1,112 @@
+# coding=utf-8
+import os
+import numpy as np
+from ..adapter.tf_adapter import TfAdapter
+from ..dump.tf_dump import TfDump
+from ..util.util import util
+from ..config import config as cfg
+from ..util.precision_tool_exception import PrecisionToolException
+
+
+class TrainAnalysis(object):
+    def __init__(self):
+        self.log = util.get_log()
+        self.tf_adapter = TfAdapter()
+
+    @staticmethod
+    def gen_feed_file_name(name):
+        file_name = str(name).replace(':', '_').replace('/', '_') + '.npy'
+        return os.path.join(cfg.TF_CKPT_INPUT_DIR, file_name)
+
+    def _init_session(self, device='npu', action='dump'):
+        """"""
+        import tensorflow as tf
+        if device == 'npu':
+            # util.execute_command('source %s', cfg.ASCEND_SET_ENV)
+            return tf.Session(config=self.tf_adapter.session_dump_config(None, action=action))
+        sess = tf.Session(config=tf.ConfigProto())
+        return self.tf_adapter.sess_dump(sess)
+
+    def _reset_dropout_rate(self, graph):
+        import tensorflow as tf
+        for op in graph.get_operations():
+            if 'dropout' in op.name and 'rate' in op.name:
+                self.log.debug("Find dropout rate node [%s][%s]" % (op.type, op.name))
+                # tensor = graph.get_tensor_by_name(op.name)
+                if op.type != 'Const':
+                    self.log.warning("Drop out op [%s] is not Const, skip reset rate. May cause difference.")
+                    continue
+                op._set_attr('value', tf.AttrValue(tensor=tf.make_tensor_proto(0.0, tf.float32)))
+                self.log.debug("Set op: %s" % str(op))
+
+    def _prepare_graph(self, graph):
+        graph.seed = cfg.DUMP_SEED
+        self._reset_dropout_rate(graph)
+        return graph
+
+    def _load_train_graph(self, sess):
+        import tensorflow as tf
+        if util.empty_dir(cfg.TF_CKPT_ROOT):
+            raise PrecisionToolException('checkpoint dir [%s] is empty, can not run train analysis process.' %
+                                         cfg.TF_CKPT_ROOT)
+        checkpoint = tf.train.latest_checkpoint(cfg.TF_CKPT_ROOT)
+        if checkpoint is None:
+            raise PrecisionToolException('Load ckpt failed from [%s].' % cfg.TF_CKPT_ROOT)
+        saver = tf.train.import_meta_graph(checkpoint + '.meta')
+        self._prepare_graph(tf.get_default_graph())
+        saver.restore(sess, checkpoint)
+        return tf.get_default_graph()
+
+    @staticmethod
+    def _get_input_from_graph(graph):
+        input_nodes = []
+        tensor_index = {}
+        for op in graph.get_operations():
+            if 'Placeholder' == op.type:
+                if op.name in tensor_index:
+                    tensor_index[op.name] += 1
+                else:
+                    tensor_index[op.name] = 0
+                node = graph.get_tensor_by_name(op.name + ':' + str(tensor_index[op.name]))
+                input_nodes.append(node)
+        return input_nodes
+
+    def _get_input_tensors(self, input_nodes):
+        feed_map = {}
+        for node in input_nodes:
+            file_name = self.gen_feed_file_name(node.name)
+            if os.path.isfile(file_name):
+                feed_map[node] = np.load(file_name)
+            else:
+                # TD data type
+                feed_map[node] = np.random.random(node.shape)
+        return feed_map
+
+    def _build_feed_map(self, graph):
+        input_nodes = self._get_input_from_graph(graph)
+        return self._get_input_tensors(input_nodes)
+
+    def _analysis(self, device, action='dump'):
+        import tensorflow as tf
+        if device == 'npu':
+            import npu_bridge.npu_init
+        sess = self._init_session(device, action=action)
+        graph = self._load_train_graph(sess)
+        train_op = tf.get_collection(tf.GraphKeys.TRAIN_OP)
+        feed_map = self._build_feed_map(graph)
+        sess.run(train_op, feed_dict=feed_map)
+        if device == 'cpu':
+            tf_dump = TfDump()
+            tf_dump.run_tf_dbg_dump()
+
+    def run(self, device='all', action='dump'):
+        """
+        :param device: all | npu | cpu
+        :param action: dump | overflow | fusion_switch | fusion_off
+        :return:
+        """
+        if device == 'all':
+            self._analysis('cpu', action)
+            self._analysis('npu', action)
+        else:
+            self._analysis(device, action)
-- 
Gitee


From 91a77b9e32683bec9f82027a0e0e8bd88449c94d Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:46:32 +0000
Subject: [PATCH 32/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/util/constant.py       |  20 +
 .../precision_tool/lib/util/file_desc.py      |  38 ++
 .../precision_tool/lib/util/h5_util.py        | 190 +++++++
 .../precision_tool/lib/util/onnx_builder.py   |   0
 .../lib/util/precision_tool_exception.py      |  24 +
 .../precision_tool/lib/util/tool_object.py    |  10 +
 .../precision_tool/lib/util/util.py           | 536 ++++++++++++++++++
 7 files changed, 818 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/constant.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/file_desc.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/h5_util.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/onnx_builder.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/precision_tool_exception.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/tool_object.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/util.py

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/constant.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/constant.py
new file mode 100644
index 000000000..8106bb8d4
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/constant.py
@@ -0,0 +1,20 @@
+# coding=utf-8
+
+
+class Constant(object):
+    VERSION = "0.1.11"
+    NEW_LINE = "\n"
+    TAB_LINE = "\t"
+    DEFAULT_DEBUG_ID = "debug_0"
+    NPU_DEBUG_ID_1 = "debug_1"
+    GRAPH = "graph"
+    DUMP = "dump"
+
+    class Suffix(object):
+        JSON = '.json'
+        CSV = '.csv'
+        H5 = '.h5'
+        OM = '.om'
+
+    class Pattern(object):
+        GE_PROTO_GRAPH_PATTERN = r'^ge_proto_([0-9]+)_(graph_[0-9]+_)*([A-Za-z0-9_-]+)\.txt$'
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/file_desc.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/file_desc.py
new file mode 100644
index 000000000..0773632b1
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/file_desc.py
@@ -0,0 +1,38 @@
+# coding=utf-8
+import os
+
+
+class FileDesc(object):
+    def __init__(self, file_name, dir_path, timestamp=-1):
+        self.file_name = file_name
+        self.dir_path = dir_path
+        self.path = os.path.join(dir_path, file_name)
+        self.timestamp = timestamp
+        self.idx = 0
+        if self.timestamp == -1:
+            self.timestamp = os.path.getmtime(self.path)
+
+
+class BuildGraphFileDesc(FileDesc):
+    def __init__(self, file_name, dir_path, timestamp, graph_id, graph_name):
+        super(BuildGraphFileDesc, self).__init__(file_name, dir_path, timestamp)
+        self.graph_id = graph_id
+        self.graph_name = graph_name
+
+
+class NpuDumpFileDesc(FileDesc):
+    def __init__(self, file_name, dir_path, timestamp, op_name, op_type, task_id, stream_id=0):
+        super(NpuDumpFileDesc, self).__init__(file_name, dir_path, timestamp)
+        self.op_name = op_name
+        self.op_type = op_type
+        self.task_id = task_id
+        stream_id = 0 if stream_id is None else int(stream_id)
+        self.stream_id = stream_id
+        self.idx = dir_path.split(os.sep)[-1]
+
+
+class DumpDecodeFileDesc(NpuDumpFileDesc):
+    def __init__(self, file_name, dir_path, timestamp, op_name, op_type, task_id, anchor_type, anchor_idx):
+        super(DumpDecodeFileDesc, self).__init__(file_name, dir_path, timestamp, op_name, op_type, task_id)
+        self.type = anchor_type
+        self.idx = anchor_idx
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/h5_util.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/h5_util.py
new file mode 100644
index 000000000..1b294ab97
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/h5_util.py
@@ -0,0 +1,190 @@
+import collections
+import os
+import numpy as np
+
+try:
+    import h5py
+except ImportError as import_err:
+    h5py = None
+    print("Failed to import h5py. some function may disable. Run 'pip3 install h5py' to fix it.",
+          import_err)
+
+from ..util.util import util
+from ..util.constant import Constant
+from ..config import config as cfg
+
+
+class IdxType(object):
+    # /batch_norm/88/input/xxx
+    OP_TYPE = 'OP_TYPE'
+    OP_NAME = 'OP_NAME'
+    OP_ANC = 'OP_ANC'
+
+
+H5_NAME_IDX = [IdxType.OP_TYPE, IdxType.OP_NAME, IdxType.OP_ANC]
+
+
+def gen_h5_data_name(name, prefix='npu'):
+    return "%s_h5%s.npy" % (prefix, name.replace('/', '_'))
+
+
+class H5Data(object):
+    def __init__(self, data, prefix='npu'):
+        self.data = data
+        self.prefix = prefix
+        self.name = gen_h5_data_name(self.data.name, self.prefix)
+
+    def np_data(self):
+        np_data = np.array(self.data)
+        self._save(np_data)
+        return np_data
+
+    def _save(self, data):
+        path = os.path.join(cfg.PT_DUMP_DECODE_DIR, self.name)
+        np.save(path, data)
+
+
+class H5Op(object):
+    def __init__(self, name, h5_node, prefix='npu'):
+        self.log = util.get_log()
+        self.name = name
+        self.prefix = prefix
+        self.h5_node = h5_node
+        self.inputs = {}
+        self.outputs = {}
+        self.group = {
+            'grads': {},
+            'tensors': {},
+            'grad_inputs': {},
+            'result': {}
+        }
+        '''
+        self.input_grad = {}
+        self.output_grad = {}
+        self.input_tensor = {}
+        self.output_tensor = {}
+        '''
+        self._prepare()
+
+    def summary(self):
+        summary_txt = []
+        summary_txt.extend(self._gen_txt(self.inputs, '-Input:'))
+        summary_txt.extend(self._gen_txt(self.outputs, '-Output:'))
+        summary_txt.extend(self._gen_txt(self.group['grads'], 'Grads:'))
+        summary_txt.extend(self._gen_txt(self.group['tensors'], '-Tensors:'))
+        summary_txt.extend(self._gen_txt(self.group['grad_inputs'], '-GradInputs:'))
+        summary_txt.extend(self._gen_txt(self.group['result'], '-Result:'))
+        return Constant.NEW_LINE.join(summary_txt)
+
+    @staticmethod
+    def _gen_txt(h5_data, name):
+        if len(h5_data) == 0:
+            return []
+        txt = [name]
+        for idx, data in enumerate(h5_data.values()):
+            txt.append(' └─[green][%s][/green] %s' % (idx, data.name))
+            txt.append('   └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(data.np_data()))
+        return txt
+
+    def _parse_group(self, node):
+        sub_node_type = node.name.split('/')[-1]
+        if sub_node_type in self.group.keys():
+            for item in node:
+                sub_node = node[item]
+                if isinstance(sub_node, h5py.Dataset):
+                    self.group[sub_node_type][item] = H5Data(sub_node, self.prefix)
+                else:
+                    self.log.warning("Unknown sub node: %s" % sub_node)
+        else:
+            self.log.warning("Unknown sub node type: %s(%s)" % (sub_node_type, node))
+
+    def _prepare_input_output(self, node, desc_type):
+        for desc_name in node:
+            sub_node = node[desc_name]
+            if isinstance(sub_node, h5py.Group):
+                self._parse_group(sub_node)
+            elif isinstance(sub_node, h5py.Dataset):
+                update_dict = self.inputs if desc_type == 'input' else self.outputs
+                update_dict[desc_name] = H5Data(sub_node, self.prefix)
+            else:
+                self.log.warning("Unknown type: %s(%s)" % (type(sub_node), sub_node))
+
+    def _prepare(self):
+        for desc_type in self.h5_node:
+            if desc_type in ['input', 'output']:
+                self._prepare_input_output(self.h5_node[desc_type], desc_type)
+            else:
+                self.log.warning("Unknown desc type: %s(%s)" % (desc_type, self.h5_node))
+
+
+class H5Util(object):
+    def __init__(self, file_name, prefix):
+        self.log = util.get_log()
+        self.file_name = file_name
+        self.prefix = prefix
+        self.h5 = None
+        self.ops = collections.OrderedDict()
+        self._prepare()
+
+    def __del__(self):
+        if self.h5 is not None:
+            self.h5.close()
+
+    def get_op(self, op_id):
+        if op_id in self.ops:
+            return self.ops[op_id]
+        self.log.warning("Can not find any h5 op id: %s" % op_id)
+        return None
+
+    def get_tensor_by_name(self, tensor_name):
+        if self.h5 is None:
+            self.log.warning("h5 file is None.")
+            return None
+        if tensor_name in self.h5:
+            return np.array(self.h5[tensor_name])
+        return None
+
+    def print_tensor(self, tensor_name):
+        tensor = self.get_tensor_by_name(tensor_name)
+        if tensor is None:
+            self.log.warning("Tensor:%s not exist." % tensor_name)
+            return
+        file_path = self._dump_numpy(tensor_name, tensor)
+        util.print_npy_summary(os.path.dirname(file_path), os.path.basename(file_path))
+
+    def _prepare(self):
+        if not os.path.isfile(self.file_name) or not str(self.file_name).endswith(Constant.Suffix.H5):
+            self.log.error("File [%s] not exist or not a h5 file" % self.file_name)
+        if h5py is None:
+            self.log.warning("Can not find python module h5py.")
+        self.h5 = h5py.File(self.file_name, 'r')
+        self._list_tensors(self.h5)
+
+    def _list_tensors(self, h5, idx=0, name=''):
+        for item in h5:
+            item_name = name + '/' + item
+            if idx == 1:
+                self.ops[str(item)] = H5Op(item_name, h5[item_name], self.prefix)
+                continue
+            self._list_tensors(h5[item], idx+1, item_name)
+
+    def _list_tensors_loop(self, h5, idx=0, name=''):
+        for item in h5:
+            if isinstance(h5[item], h5py.Group):
+                item_name = name + '/' + item
+                print(item_name)
+                # check
+                if H5_NAME_IDX[idx] == IdxType.OP_NAME and item_name not in self.ops:
+                    self.ops[item_name] = H5Op(item)
+                if H5_NAME_IDX[idx] == IdxType.OP_ANC:
+                    self.ops[item_name] = H5Op(item)
+                self._list_tensors(h5[item], idx + 1, item_name)
+
+    def _dump_numpy(self, tensor_name, tensor):
+        if not os.path.exists(cfg.PT_DUMP_DECODE_DIR):
+            util.create_dir(cfg.PT_DUMP_DECODE_DIR)
+        file_name = tensor_name.replace('/', '_').strip('_') + '.npy'
+        file_path = os.path.join(cfg.PT_DUMP_DECODE_DIR, file_name)
+        self.log("Dump file: %s" % file_path)
+        np.save(file_path, tensor)
+        return file_path
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/onnx_builder.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/onnx_builder.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/precision_tool_exception.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/precision_tool_exception.py
new file mode 100644
index 000000000..02084770f
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/precision_tool_exception.py
@@ -0,0 +1,24 @@
+# coding=utf-8
+import logging
+
+
+class PrecisionToolException(Exception):
+    """
+    Class for PrecisionTool Exception
+    """
+    def __init__(self, error_info):
+        super(PrecisionToolException, self).__init__()
+        self.error_info = error_info
+
+
+def catch_tool_exception(fuc):
+    def handle(*args, **kwargs):
+        log = logging.getLogger()
+        try:
+            return fuc(*args, **kwargs)
+        except PrecisionToolException as pte:
+            log.warning(pte.error_info)
+        except SystemExit:
+            # do not exit
+            log.debug("Exit")
+    return handle
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/tool_object.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/tool_object.py
new file mode 100644
index 000000000..7412b6cee
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/tool_object.py
@@ -0,0 +1,10 @@
+# coding=utf-8
+
+
+class ToolObject(object):
+    _instance = None
+
+    def __new__(cls, *args, **kwargs):
+        if not cls._instance:
+            cls._instance = super(ToolObject, cls).__new__(cls, *args, **kwargs)
+        return cls._instance
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/util.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/util.py
new file mode 100644
index 000000000..88fbe0b00
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/util.py
@@ -0,0 +1,536 @@
+# coding=utf-8
+import csv
+import re
+import sys
+import os
+import shutil
+import numpy as np
+import logging
+import subprocess
+from .constant import Constant
+from .precision_tool_exception import PrecisionToolException
+from .precision_tool_exception import catch_tool_exception
+from .file_desc import *
+from ..config import config as cfg
+
+try:
+    from rich.traceback import install
+    from rich.panel import Panel
+    from rich.table import Table
+    from rich import print as rich_print
+    from rich.columns import Columns
+    install()
+except ImportError as import_err:
+    install = None
+    Panel = None
+    Table = None
+    Columns = None
+    rich_print = print
+    print("Failed to import rich. some function may disable. Run 'pip3 install rich' to fix it.",
+          import_err)
+
+try:
+    import readline
+    readline.parse_and_bind('tab: complete')
+except ImportError as import_error:
+    print("Unable to import module: readline. Run 'pip3 install gnureadline pyreadline' to fix it.")
+
+# patterns
+OFFLINE_DUMP_PATTERN = r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)\.?([0-9]+)?\.([0-9]{1,255})[.csv]?"
+OFFLINE_DUMP_DECODE_PATTERN = \
+    r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})\.?[0-9]?[\.0-9]+?" \
+    r"\.([a-z]+)\.([0-9]{1,255})\.npy$"
+OFFLINE_DUMP_CONVERT_PATTERN = \
+    r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})" \
+    r"\.([a-z]+)\.([0-9]{1,255})(\.[x0-9]+)?\.npy$"
+OFFLINE_FILE_NAME = 'op_type.op_name.task_id(.stream_id).timestamp'
+OP_DEBUG_NAME = 'OpDebug.Node_OpDebug.taskid.timestamp'
+CPU_DUMP_DECODE_PATTERN = r"^([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})\.npy$"
+CPU_FILE_DECODE_NAME = 'op_name.0(.0).timestamp.npy'
+OP_DEBUG_PATTERN = r"Opdebug\.Node_OpDebug\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})"
+OP_DEBUG_DECODE_PATTERN = r"Opdebug\.Node_OpDebug\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})[\.0-9]*\.([a-z]+)\.([0-9]{1,255})\.json"
+VECTOR_COMPARE_RESULT_PATTERN = r"result_([0-9]{1,255})\.csv"
+TIMESTAMP_DIR_PATTERN = '[0-9]{1,255}'
+NUMPY_PATTERN = r".*\.npy$"
+H5_PATTERN = r".*\.h5$"
+CSV_SHUFFIX = '.csv'
+NUMPY_SHUFFIX = '.npy'
+CKPT_META_SHUFFIX = r".*.meta$"
+MAPPING_CSV = "mapping.csv"
+
+
+class Util(object):
+    def __init__(self):
+        self.atc = None
+        self.ms_accu_cmp = None
+        logging.basicConfig(level=cfg.LOG_LEVEL, format="%(asctime)s (%(process)d) -[%(levelname)s]%(message)s",
+                            datefmt="%Y-%m-%d %H:%M:%S")
+        self.log = logging.getLogger()
+        self.python = sys.executable
+
+    def get_log(self):
+        return self.log
+
+    def execute_command(self, cmd: str):
+        """ Execute shell command
+        :param cmd: command
+        :return: status code
+        """
+        if cmd is None:
+            self.log.error("Command is None.")
+            return -1
+        self.log.debug("[Run CMD]: %s", cmd)
+        complete_process = subprocess.run(cmd, shell=True)
+        return complete_process.returncode
+
+    @staticmethod
+    def empty_dir(dir_path: str) -> bool:
+        """ Check if target dir is empty
+        :param dir_path: target dir
+        :return: bool
+        """
+        if not os.path.exists(dir_path):
+            return True
+        if len(os.listdir(dir_path)) == 0:
+            return True
+        return False
+
+    def convert_proto_to_json(self, src_file, dst_file):
+        """Convert GE proto graphs to json format.
+        command: atc --mode=5 --om=ge_proto_Build.txt --json=xxx.json
+        :param src_file: proto file
+        :param dst_file: output json file
+        :return: result json file
+        """
+        if not os.path.exists(src_file):
+            raise PrecisionToolException("Source proto file %s not exist." % src_file)
+        # src_file = os.path.join(cfg.GRAPH_DIR_ALL, proto_file)
+        # json_file = proto_file + '.json'
+        # dst_file = os.path.join(cfg.GRAPH_DIR_BUILD, json_file)
+        if os.path.exists(dst_file) and os.path.getmtime(dst_file) > os.path.getmtime(src_file):
+            self.log.debug("GE graph build json already exist.")
+            return dst_file
+        cmd = '%s --mode=5 --om=%s --json=%s' % (self._get_atc(), src_file, dst_file)
+        self.execute_command(cmd)
+        if not os.path.isfile(dst_file):
+            raise PrecisionToolException("Convert GE build graph to json failed. can not find any json file.")
+        self.log.info('Finish convert [%s] build graph from proto to json format.', src_file)
+        return dst_file
+
+    def convert_dump_to_npy(self, src_file, dst_path, data_format=None):
+        """Convert npu dump files to npy format.
+        :param src_file: src file
+        :param dst_path: dst path
+        :param data_format: target data format
+        :return: status code
+        """
+        self.create_dir(dst_path)
+        format_cmd = '' if data_format is None else '-f %s' % data_format
+        cmd = '%s %s convert -d %s -out %s %s' % (self.python, self._get_ms_accu_cmp(), src_file, dst_path, format_cmd)
+        return self.execute_command(cmd)
+
+    def compare_vector(self, npu_dump_dir, cpu_dump_dir, graph_json, result_path):
+        """Run compare vector command.
+        :param npu_dump_dir: npu dump data dir
+        :param cpu_dump_dir: cpu dump data dir
+        :param graph_json: graph json
+        :param result_path: result path
+        :return: status code
+        """
+        self.create_dir(result_path)
+        if graph_json is None:
+            cmd = '%s %s compare -m %s -g %s -out %s' % (
+                self.python, self._get_ms_accu_cmp(), npu_dump_dir, cpu_dump_dir, result_path)
+        else:
+            cmd = '%s %s compare -m %s -g %s -f %s -out %s' % (
+                self.python, self._get_ms_accu_cmp(), npu_dump_dir, cpu_dump_dir, graph_json, result_path)
+        return self.execute_command(cmd)
+
+    def list_dump_files(self, path, sub_path=''):
+        """List npu dump files in npu dump dir.
+        default only list the newest sub dir ordered by timestamp. set sub_path to specific other sub_path
+        :param path: dump path
+        :param sub_path: sub dir
+        :return: dump_files, parent_dirs
+        """
+        parent_dirs = {}
+        dump_files = {}
+        newest_sub_path = self.get_newest_dir(path) if sub_path == '' else sub_path
+        dump_pattern = re.compile(OFFLINE_DUMP_PATTERN)
+        for dir_path, dir_names, file_names in os.walk(os.path.join(path, newest_sub_path), followlinks=True):
+            for name in file_names:
+                dump_match = dump_pattern.match(name)
+                if dump_match is None:
+                    continue
+                dump_files[name] = self._gen_dump_file_info(name, dump_match, dir_path)
+                if dir_path not in parent_dirs:
+                    parent_dirs[dir_path] = {}
+                parent_dirs[dir_path][name] = dump_files[name]
+        return dump_files, parent_dirs
+
+    def parse_mapping_csv(self, path, pattern, extern_pattern=''):
+        """parse mapping csv in dump path"""
+        dump_files = {}
+        re_pattern = re.compile(pattern)
+        for dir_path, dir_names, file_names in os.walk(path, followlinks=True):
+            if MAPPING_CSV not in file_names:
+                continue
+            mapping = self.read_csv(os.path.join(dir_path, MAPPING_CSV))
+            for item in mapping:
+                src_file = os.path.abspath(os.path.join(dir_path, item[0]))
+                if not os.path.isfile(src_file):
+                    self.log.warning("Can not find file %s in mapping.csv, dir: %s.", item[0], dir_path)
+                    continue
+                match = re_pattern.match(item[1])
+                if match is None:
+                    self.log.warning("file name [%s] in mapping.csv is invalid.", item[1])
+                    continue
+                file_desc = self._gen_dump_file_info(item[0], match, dir_path)
+                dst_file_name = '.'.join([file_desc.op_type, file_desc.file_name, str(file_desc.task_id),
+                                          str(file_desc.stream_id), str(file_desc.timestamp)])
+                if item[1].endswith(Constant.Suffix.CSV):
+                    dst_file_name += '.csv'
+                dst_file = os.path.abspath(os.path.join(dir_path, dst_file_name))
+                if not os.path.islink(src_file):
+                    os.rename(src_file, dst_file)
+                    os.symlink(dst_file, src_file)
+                file_desc.path = dst_file
+                file_desc.file_name = dst_file_name
+                dump_files[item[1]] = file_desc
+        return dump_files
+
+    def list_npu_dump_files(self, path, extern_pattern=''):
+        npu_dump_files = self._list_file_with_pattern(path, OFFLINE_DUMP_PATTERN, extern_pattern,
+                                                      self._gen_dump_file_info)
+        npu_dump_files.update(self.parse_mapping_csv(path, OFFLINE_DUMP_PATTERN, extern_pattern))
+        return npu_dump_files
+
+    def list_ge_graph_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, Constant.Pattern.GE_PROTO_GRAPH_PATTERN, extern_pattern,
+                                            self._gen_build_graph_file_info)
+
+    def list_npu_dump_decode_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, OFFLINE_DUMP_DECODE_PATTERN, extern_pattern,
+                                            self._gen_npu_dump_decode_file_info)
+
+    def list_debug_decode_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, OP_DEBUG_DECODE_PATTERN, extern_pattern,
+                                            self._gen_overflow_debug_decode_file_info)
+
+    def list_cpu_dump_decode_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, CPU_DUMP_DECODE_PATTERN, extern_pattern,
+                                            self._gen_cpu_dump_decode_file_info)
+
+    def list_cpu_graph_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, CKPT_META_SHUFFIX, extern_pattern,
+                                            self._gen_cpu_graph_files_info)
+
+    def list_vector_compare_result_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, VECTOR_COMPARE_RESULT_PATTERN, extern_pattern,
+                                            self._gen_vector_compare_result_file_info)
+
+    def list_npu_dump_convert_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, OFFLINE_DUMP_CONVERT_PATTERN, extern_pattern,
+                                            self._gen_npu_dump_convert_file_info)
+
+    def list_numpy_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, NUMPY_PATTERN, extern_pattern,
+                                            self._gen_numpy_file_info)
+
+    def list_h5_files(self, path, extern_pattern=''):
+        return self._list_file_with_pattern(path, H5_PATTERN, extern_pattern,
+                                            self._gen_file_info)
+
+    def create_dir(self, path):
+        """Create dir if not exist
+        :param path: path
+        :return: bool
+        """
+        if os.path.exists(path):
+            return True
+        try:
+            os.makedirs(path, mode=0o700)
+        except OSError as err:
+            self.log.error("Failed to create %s. %s", path, str(err))
+            return False
+        return True
+
+    def clear_dir(self, path: str, pattern=''):
+        """Clear dir with pattern (file/path name match pattern will be removed)
+        :param path: path
+        :param pattern: pattern
+        :return: None
+        """
+        if not os.path.exists(path):
+            return
+        try:
+            for f in os.listdir(path):
+                if not re.match(pattern, f):
+                    continue
+                file_path = os.path.join(path, f)
+                if os.path.isfile(file_path):
+                    os.remove(file_path)
+                elif os.path.isdir(file_path):
+                    shutil.rmtree(file_path)
+        except OSError as err:
+            self.log.error("Failed to remove %s. %s", path, str(err))
+
+    @staticmethod
+    def npy_info(source_data):
+        """Get npy information
+        :param source_data: npy path
+        :return: (shape, dtype)
+        """
+        if isinstance(source_data, str):
+            if not str(source_data).endswith(NUMPY_SHUFFIX):
+                raise PrecisionToolException("Npy file [%s] is invalid" % source_data)
+            data = np.load(source_data, allow_pickle=True)
+        elif isinstance(source_data, np.ndarray):
+            data = source_data
+        else:
+            raise PrecisionToolException("Invalid source data:%s" % source_data)
+        if data.dtype == 'object':
+            raise PrecisionToolException("Invalid source data, data is object.")
+        if np.size(data) == 0:
+            raise PrecisionToolException("Empty source data:%s" % source_data)
+        return data.shape, data.dtype, data.max(), data.min(), data.mean()
+
+    @catch_tool_exception
+    def gen_npy_info_txt(self, source_data):
+        """ Generate numpy info txt.
+        :param source_data: source path or np.ndarray
+        :return: txt
+        """
+        try:
+            shape, dtype, max_data, min_data, mean = self.npy_info(source_data)
+            return '[Shape: %s] [Dtype: %s] [Max: %s] [Min: %s] [Mean: %s]' % (shape, dtype, max_data, min_data, mean)
+        except PrecisionToolException:
+            return ''
+
+    def print_npy_summary(self, path, file_name, is_convert=False, extern_content=''):
+        """Print summary of npy data
+        :param path: file path
+        :param file_name: file name
+        :param is_convert: if convert to txt file
+        :param extern_content: extern content append to the summary
+        :return: None
+        """
+        target_file = os.path.join(path, file_name)
+        if not os.path.exists(target_file):
+            raise PrecisionToolException("File [%s] not exist" % target_file)
+        data = np.load(target_file, allow_pickle=True)
+        table = self.create_table('', ['Index', 'Data'])
+        flatten_data = data.flatten()
+        for i in range(min(16, int(np.ceil(flatten_data.size / 8)))):
+            last_idx = min(flatten_data.size, i*8+8)
+            table.add_row(str(i * 8), ' '.join(flatten_data[i*8: last_idx].astype('str').tolist()))
+        summary = ['[yellow]%s[/yellow]' % self.gen_npy_info_txt(data), 'Path: %s' % target_file]
+        if is_convert:
+            summary.append('TxtFile: %s.txt' % target_file)
+        if extern_content != '':
+            summary.append('%s' % extern_content)
+        self.print_panel(self.create_columns([table, Constant.NEW_LINE.join(summary)]), file_name)
+        if is_convert:
+            self.save_npy_to_txt(data, target_file + '.txt')
+
+    def save_npy_to_txt(self, src_file, dst_file='', align=0):
+        """save numpy file to txt file.
+        default data will be aligned to the last axis of data.shape
+        :param src_file: src file name
+        :param dst_file: dst file name
+        :param align: data align
+        :return: None
+        """
+        if dst_file == '':
+            dst_file = src_file + '.txt'
+        if os.path.exists(dst_file):
+            self.log.debug("Dst file %s exists, will not save new one.", dst_file)
+            return
+        if isinstance(src_file, str):
+            data = np.load(src_file, allow_pickle=True)
+        elif isinstance(src_file, np.ndarray):
+            data = src_file
+        else:
+            raise PrecisionToolException("invalid src_file: %s", src_file)
+        if data.dtype == 'object':
+            raise PrecisionToolException("Invalid source data, data is object.")
+        shape = data.shape
+        data = data.flatten()
+        if align == 0:
+            if len(shape) == 0:
+                align = 1
+            else:
+                align = shape[-1]
+        elif data.size % align != 0:
+            pad_array = np.zeros((align - data.size % align,))
+            data = np.append(data, pad_array)
+        np.savetxt(dst_file, data.reshape((-1, align)), delimiter=' ', fmt='%g')
+
+    def read_csv(self, path):
+        """Read csv file to list.
+        :param path: csv file path
+        :return: list
+        """
+        if not str(path).endswith(CSV_SHUFFIX):
+            self.log.error("csv path [%s] is invalid", path)
+            return
+        rows = []
+        with open(path) as f:
+            csv_handle = csv.reader(f)
+            for row in csv_handle:
+                rows.append(row)
+        return rows
+
+    @staticmethod
+    def print(content):
+        rich_print(content)
+
+    @staticmethod
+    def render(content, rich=True):
+        if rich:
+            rich_print(content)
+        else:
+            print(content)
+
+    @staticmethod
+    def create_table(title, columns):
+        if Table is None:
+            raise PrecisionToolException("No rich module error.")
+        table = Table(title=title)
+        for column_name in columns:
+            table.add_column(column_name, overflow='fold')
+        return table
+
+    @staticmethod
+    def create_columns(content):
+        if Columns is None:
+            raise PrecisionToolException("No rich module error.")
+        return Columns(content)
+
+    def print_panel(self, content, title='', fit=True):
+        """ Print panel.
+        :param content: content
+        :param title: title
+        :param fit: if panel size fit the content
+        :return:Node
+        """
+        if Panel is None:
+            print(content)
+            return
+        if fit:
+            self.print(Panel.fit(content, title=title))
+        else:
+            self.print(Panel(content, title=title))
+
+    @staticmethod
+    def _detect_file(file_name, root_dir):
+        """Find file in root dir"""
+        result = []
+        for dir_path, dir_names, file_names in os.walk(root_dir, followlinks=True):
+            for name in file_names:
+                if re.match(file_name, name):
+                    result.append(os.path.join(dir_path, name))
+        return result
+
+    def _detect_file_if_not_exist(self, target_file):
+        """Find specific file in cmd root path"""
+        self.log.info("Try to auto detect file with name: %s.", target_file)
+        res = self._detect_file(target_file, cfg.CMD_ROOT_PATH)
+        if len(res) == 0:
+            raise PrecisionToolException("Cannot find any file named %s in dir %s" % (target_file, cfg.CMD_ROOT_PATH))
+        self.log.info("Detect [%s] success. %s", target_file, res)
+        return res[0]
+
+    def _get_atc(self):
+        if self.atc is None:
+            self.atc = self._detect_file_if_not_exist('^atc$')
+        return self.atc
+
+    def _get_ms_accu_cmp(self):
+        if self.ms_accu_cmp is None:
+            self.ms_accu_cmp = self._detect_file_if_not_exist(cfg.MS_ACCU_CMP)
+        return self.ms_accu_cmp
+
+    def get_newest_dir(self, path: str):
+        """Find the newest subdir in specific path, subdir should named by timestamp."""
+        if not os.path.isdir(path):
+            self.log.warning("Path [%s] not exists", path)
+            return ''
+        paths = os.listdir(path)
+        sub_paths = []
+        for p in paths:
+            if re.match(TIMESTAMP_DIR_PATTERN, p):
+                sub_paths.append(p)
+        if len(sub_paths) == 0:
+            self.log.debug("Path [%s] has no timestamp dirs.", path)
+            return ''
+        newest_sub_path = sorted(sub_paths)[-1]
+        self.log.info("Sub path num:[%d]. Dirs[%s], choose[%s]", len(sub_paths), str(sub_paths), newest_sub_path)
+        return newest_sub_path
+
+    @staticmethod
+    def _list_file_with_pattern(path, pattern, extern_pattern, gen_info_func):
+        if path is None or not os.path.exists(path):
+            raise PrecisionToolException("Path %s not exist." % path)
+        file_list = {}
+        re_pattern = re.compile(pattern)
+        for dir_path, dir_names, file_names in os.walk(path, followlinks=True):
+            for name in file_names:
+                match = re_pattern.match(name)
+                if match is None:
+                    continue
+                if extern_pattern != '' and not re.match(extern_pattern, name):
+                    continue
+                file_list[name] = gen_info_func(name, match, dir_path)
+        return file_list
+
+    @staticmethod
+    def _gen_numpy_file_info(name, math, dir_path):
+        return FileDesc(name, dir_path)
+
+    @staticmethod
+    def _gen_file_info(name, math, dir_path):
+        return FileDesc(name, dir_path)
+
+    @staticmethod
+    def _gen_build_graph_file_info(name, match, dir_path):
+        return BuildGraphFileDesc(name, dir_path, -1, int(match.group(1)), match.groups()[-1])
+
+    @staticmethod
+    def _gen_dump_file_info(name, match, dir_path):
+        return NpuDumpFileDesc(name, dir_path, int(match.groups()[-1]), op_name=match.group(2), op_type=match.group(1),
+                               task_id=int(match.group(3)), stream_id=match.group(4))
+
+    @staticmethod
+    def _gen_npu_dump_decode_file_info(name, match, dir_path):
+        return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-3]), op_name=match.group(2),
+                                  op_type=match.group(1), task_id=int(match.group(3)),
+                                  anchor_type=match.groups()[-2], anchor_idx=int(match.groups()[-1]))
+
+    @staticmethod
+    def _gen_cpu_dump_decode_file_info(name, match, dir_path):
+        return DumpDecodeFileDesc(name, dir_path, -1, op_name=match.group(1), op_type='', task_id=0,
+                                  anchor_type='output', anchor_idx=int(match.group(2)))
+
+    @staticmethod
+    def _gen_cpu_graph_files_info(name, match, dir_path):
+        return FileDesc(name, dir_path, -1)
+
+    @staticmethod
+    def _gen_overflow_debug_decode_file_info(name, match, dir_path):
+        return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-3]), op_name='Node_OpDebug', op_type='Opdebug',
+                                  task_id=int(match.group(1)), anchor_type=match.groups()[-2],
+                                  anchor_idx=int(match.groups()[-1]))
+
+    @staticmethod
+    def _gen_vector_compare_result_file_info(name, match, dir_path):
+        return FileDesc(name, dir_path, int(match.group(1)))
+
+    @staticmethod
+    def _gen_npu_dump_convert_file_info(name, match, dir_path):
+        return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-4]), op_name=match.group(2),
+                                  op_type=match.group(1), task_id=int(match.group(3)), anchor_type=match.groups()[-3],
+                                  anchor_idx=int(match.groups()[-2]))
+
+
+util = Util()
-- 
Gitee


From 94c931814638103eea01f614e6fcead0447f07c7 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 07:46:50 +0000
Subject: [PATCH 33/38] 1

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../precision_tool/lib/__init__.py            |   0
 .../precision_tool/lib/cpu_tvm.py             |  51 ++++
 .../precision_tool/lib/interactive_cli.py     |  87 +++++++
 .../precision_tool/lib/precision_tool.py      | 230 ++++++++++++++++++
 4 files changed, 368 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/__init__.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/cpu_tvm.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/interactive_cli.py
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/precision_tool.py

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/__init__.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/cpu_tvm.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/cpu_tvm.py
new file mode 100644
index 000000000..a0906071e
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/cpu_tvm.py
@@ -0,0 +1,51 @@
+import numpy as np
+from tbe import tvm
+
+
+class CpuTvm():
+    def __init__(self, json_file, dump_input_files, dump_output_files):
+        self.json_file = json_file
+        self.dump_input_files = dump_input_files
+        self.dump_output_files = dump_output_files
+        self.input_list = []
+        self.output_list = []
+
+    def _load_schedule(self):
+        with open(self.json_file, 'r') as jsonfile:
+            tvm_node = tvm.load_json(jsonfile.read())
+            self.output_list = tvm_node.op.attrs['output_list']
+            self.input_list = tvm_node.op.attrs['input_list']
+        schedule = tvm.create_schedule([res.op for res in self.output_list])
+        return schedule
+
+    def _build_tvm(self, schedule):
+        tensor_list = [ele for ele in self.input_list if ele is not None]
+        for ele in self.output_list:
+            if ele is not None:
+                tensor_list.append(ele)
+        fusion_op = tvm.build(schedule, tensor_list, "c", "llvm")
+        return fusion_op
+
+    def _load_data(self, dump_files):
+        ctx = tvm.cpu(0)
+        data_tvm = []
+        for dump_file in dump_files:
+            data_temp_numpy = np.load(dump_file)
+            data_temp_tvm = tvm.nd.array(data_temp_numpy, ctx)
+            data_tvm.append(data_temp_tvm)
+        return data_tvm
+
+    def run_cpu_tvm(self):
+        # load schedule and build tvm
+        schedule = self._load_schedule()
+        fusion_op = self._build_tvm(schedule)
+
+        #load data and run cpu tvm
+        data_tvm_in = self._load_data(self.dump_input_files)
+        data_tvm_out = self._load_data(self.dump_output_files)
+        data_tvm_in.extend(data_tvm_out)
+        fusion_op(*data_tvm_in)
+
+        #tvm format to numpy format
+        data_np_out = [data.asnumpy() for data in data_tvm_out]
+        return data_np_out
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/interactive_cli.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/interactive_cli.py
new file mode 100644
index 000000000..4e6aedd18
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/interactive_cli.py
@@ -0,0 +1,87 @@
+# coding=utf-8
+import cmd
+from .util.util import util
+from .util.constant import Constant
+from .precision_tool import PrecisionTool
+
+HEADER = r"""    ____                 _      _           ______            __
+   / __ \________  _____(_)____(_)___  ____/_  __/___  ____  / /
+  / /_/ / ___/ _ \/ ___/ / ___/ / __ \/ __ \/ / / __ \/ __ \/ /
+ / ____/ /  /  __/ /__/ (__  ) / /_/ / / / / / / /_/ / /_/ / /
+/_/   /_/   \___/\___/_/____/_/\____/_/ /_/_/  \____/\____/_/ version=%s""" % Constant.VERSION
+
+HELP_AC = "Run auto check function, use [-c] to start vector compare process.\n" \
+          "  usage: ac (-c) \n"
+HELP_RUN = "Run any shell command.\n" \
+           "  usage: (run) vim tensor_name.txt \n"
+HELP_PT = "Print npy tensor, use [-c] to convert and save to txt file.\n" \
+          "  usage: pt (-c) [tensor_name.npy] \n"
+
+
+class InteractiveCli(cmd.Cmd):
+    def __init__(self):
+        cmd.Cmd.__init__(self)
+        self.prompt = "PrecisionTool > "
+        self.precision_tool = None
+        util.print_panel(HEADER)
+        self._prepare()
+
+    def default(self, line=''):
+        util.execute_command(line)
+        return False
+
+    def _prepare(self):
+        self.precision_tool = PrecisionTool()
+        self.precision_tool.prepare()
+
+    def do_ac(self, line=''):
+        """Auto check."""
+        self.precision_tool.do_auto_check(self._parse_argv(line))
+
+    def do_run(self, line=''):
+        """Run any shell command"""
+        util.execute_command(line)
+
+    def do_ls(self, line=''):
+        """List ops: \n usage: ls (op(default)/dump) -n [op_name] -t [op_type]"""
+        argv = self._parse_argv(line)
+        if len(argv) > 0 and argv[0] == 'dump':
+            return self.precision_tool.do_list_dump(argv[1:])
+        self.precision_tool.do_list_nodes(argv)
+
+    def do_ni(self, line=''):
+        """Print node info:\n usage: ni (-n) [op_name]"""
+        self.precision_tool.do_node_info(self._parse_argv(line, '-n'))
+
+    def do_dc(self, line=''):
+        """Convert npu dump by op names:\n usage: dc (-n) [npu dump file] -f [target format]"""
+        self.precision_tool.do_convert_npu_dump(self._parse_argv(line, '-n'))
+
+    def do_vc(self, line=''):
+        """Do vector compare: \n usage: vc """
+        self.precision_tool.do_vector_compare(self._parse_argv(line))
+
+    def do_vcs(self, line=''):
+        """Do vector compare summary"""
+        self.precision_tool.do_vector_compare_summary(self._parse_argv(line))
+
+    def do_pt(self, line=''):
+        """Print data info:\n usage: pt (-n) [*.npy] (-c)\n   -c: convert and save to txt file"""
+        self.precision_tool.do_print_data(self._parse_argv(line, '-n'))
+
+    def do_cp(self, line=''):
+        """Compare two data file """
+        self.precision_tool.do_compare_data(self._parse_argv(line, '-n'))
+
+    def do_train(self, line=''):
+        """Train process:\n usage: train -d all -a dump"""
+        self.precision_tool.do_train_analysis(self._parse_argv(line))
+
+    @staticmethod
+    def _parse_argv(line, insert=None):
+        argv = line.split() if line != '' else []
+        if '-h' in argv:
+            return argv
+        if insert is not None and len(argv) > 0 and argv[0] != insert:
+            argv.insert(0, insert)
+        return argv
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/precision_tool.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/precision_tool.py
new file mode 100644
index 000000000..d118b86bc
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/precision_tool.py
@@ -0,0 +1,230 @@
+import argparse
+import os
+import time
+
+from .adapter.overflow import Overflow
+from .dump.dump_manager import DumpManager
+from .graph.graph_manager import GraphManager
+from .compare.compare import Compare
+from .adapter.fusion import Fusion
+from .train.train_analysis import TrainAnalysis
+from .util.util import util
+from .util.constant import Constant
+from .config import config as cfg
+from .util.precision_tool_exception import PrecisionToolException
+from .util.precision_tool_exception import catch_tool_exception
+
+
+class PrecisionTool(object):
+    def __init__(self):
+        """init"""
+        self.graph_manager = GraphManager()
+        self.overflow = Overflow()
+        self.dump_manager = DumpManager()
+        self.compare = Compare()
+        self.fusion = Fusion()
+        self.train_analysis = TrainAnalysis()
+        self.log = util.get_log()
+
+    @catch_tool_exception
+    def prepare(self):
+        """prepare"""
+        util.create_dir(cfg.DATA_ROOT_DIR)
+        self.graph_manager.prepare()
+        self.dump_manager.prepare()
+        self.overflow.prepare()
+        self.fusion.prepare()
+        self.compare.prepare()
+
+    @catch_tool_exception
+    def do_auto_check(self, argv):
+        """Auto check"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-c', '--vector_compare', dest='vector_compare', help='Run vector compare process',
+                            action='store_true')
+        parser.add_argument('-l', '--limit', dest='limit', type=int, help='limit', default=3)
+        args = parser.parse_args(argv)
+        # vector compare
+        if args.vector_compare:
+            self.do_vector_compare()
+        self.do_vector_compare_summary()
+        self.do_check_fusion()
+        self.do_check_overflow(args.limit)
+        self.do_check_cast()
+        self.do_check_graph_similarity()
+
+    @catch_tool_exception
+    def do_check_overflow(self, limit=3):
+        """check overflow"""
+        self.overflow.check(limit)
+
+    @catch_tool_exception
+    def do_check_cast(self):
+        self.graph_manager.check_cast()
+
+    @catch_tool_exception
+    def do_check_dtype(self):
+        """Check input/output dtype"""
+        self.graph_manager.check_dtype()
+
+    @catch_tool_exception
+    def do_check_fusion(self):
+        """print fusion info summary"""
+        self.fusion.check()
+
+    @catch_tool_exception
+    def do_check_graph_similarity(self):
+        self.graph_manager.check_similarity()
+
+    @catch_tool_exception
+    def do_vector_compare(self, argv=None):
+        """do vector compare"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-lt', '--left', dest='lt', default=None, help='left path(npu dump path)')
+        parser.add_argument('-rt', '--right', dest='rt', default=None, help='right path(cpu/npu dump path)')
+        parser.add_argument('-g', '--graph', dest='graph', required=False, default=None, help='graph json file')
+        args = parser.parse_args() if argv is None else parser.parse_args(argv)
+        # 1. compare npu_debug0 - tf  dump data (auto)
+        # 2. compare npu_debug0 - npu_debug1 dump data
+        # 3. compare dir - dir dump data
+        result_dir = os.path.join(cfg.VECTOR_COMPARE_PATH, time.strftime("%Y%m%d%H%M%S", time.localtime()))
+        if args.lt is None:
+            debug_0_dump_root = self.dump_manager.get_dump_root_dir(Constant.DEFAULT_DEBUG_ID)
+            if util.empty_dir(debug_0_dump_root):
+                raise PrecisionToolException("NPU debug_0 dump dir is empty, no files to compare.")
+            if not util.empty_dir(cfg.TF_DUMP_DIR):
+                self.log.info("Tf dump dir is not empty, will compare npu dump data with tf dump data.")
+                self.compare.npu_tf_vector_compare(self.graph_manager.get_graphs(Constant.DEFAULT_DEBUG_ID),
+                                                   debug_0_dump_root, cfg.TF_DUMP_DIR, result_dir)
+            else:
+                self.log.warning("Tf dump dir is empty, maybe run [python3 precision_tool/cli.py tf_dump] to decode"
+                                 " tf debug data.")
+            debug_1_dump_root = self.dump_manager.get_dump_root_dir(Constant.NPU_DEBUG_ID_1)
+            if debug_1_dump_root is not None and not util.empty_dir(debug_1_dump_root):
+                self.log.info("NPU debug_1 dump dir is not empty, will compare two npu dump data.")
+                self.compare.npu_vector_compare(debug_0_dump_root, debug_1_dump_root)
+        else:
+            lh_path = args.lt
+            rh_path = args.rt
+            graph_json = args.graph
+            self.compare.vector_compare(lh_path, rh_path, result_dir, graph_json)
+        self.compare.vector_summary(result_dir)
+
+    @catch_tool_exception
+    def do_vector_compare_summary(self, argv=None):
+        parser = argparse.ArgumentParser(description="show vector compare result summary.")
+        parser.add_argument('-f', '--file', dest='file', default=None, required=False, help='compare_result file/path')
+        parser.add_argument('-c', '--cos_sim', dest='cos_sim', type=float, help='cos_sim_threshold', default=0.98)
+        parser.add_argument('-l', '--limit', dest='limit', type=int, help='limit', default=3)
+        args = parser.parse_args() if argv is None else parser.parse_args(argv)
+        error_ops = self.compare.vector_summary(args.file, args.cos_sim, args.limit)
+        # parse error_ops
+
+    @catch_tool_exception
+    def do_print_data(self, argv=None):
+        """print tensor data"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-n', '--name', dest='name', default='', help='list by op name')
+        args = parser.parse_args() if argv is None else parser.parse_args(argv)
+        self.dump_manager.print_tensor(args.name, True)
+
+    @catch_tool_exception
+    def do_list_nodes(self, argv):
+        """list op nodes in graph"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-t', '--type', dest='type', default='', help='list by op type')
+        parser.add_argument('-n', '--name', dest='name', default='', help='list by op name')
+        parser.add_argument('-f', '--fusion', dest='fusion', default='', help='list by op fusion pass')
+        parser.add_argument('-k', '--kernel_name', dest='kernel_name', default='', help='list by op kernel_name')
+        args = parser.parse_args(argv)
+        self.graph_manager.print_op_list(args.type, args.name, args.fusion, args.kernel_name)
+
+    @catch_tool_exception
+    def do_node_info(self, argv):
+        """Print op node info"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-n', '--name', dest='name', default='', help='op name')
+        parser.add_argument('-g', '--graph', dest='graph', help='graph name')
+        parser.add_argument('-a', '--attr', dest='attr', action='store_true', help='show all attr info')
+        parser.add_argument('-c', '--check', dest='check ', action='store_true', help='check single op precision')
+        parser.add_argument('-s', '--save', dest='save', type=int, default=0,
+                            help='save subgraph, param gives the deep of subgraph')
+        args = parser.parse_args(argv)
+        # print graph op info
+        npu_ops, _ = self.graph_manager.get_ops(args.name, args.graph)
+        npu_op_summary, tf_op_summary = self.graph_manager.op_graph_summary(npu_ops, args.attr)
+        npu_dump_summary, tf_dump_summary = self.dump_manager.op_dump_summary(npu_ops)
+        pt_dump_summary = self.dump_manager.pt_dump_summary(args.name)
+        # merge graph/dump/compare info
+        for debug_id, graph_summary in npu_op_summary.items():
+            for graph_name, summary_detail in graph_summary.items():
+                summary_txt = [summary_detail]
+                if debug_id in npu_dump_summary and graph_name in npu_dump_summary[debug_id]:
+                    summary_txt.append(npu_dump_summary[debug_id][graph_name])
+                if tf_dump_summary is not None:
+                    summary_txt.append(tf_dump_summary)
+                title = "[green](%s)[/green] %s" % (debug_id, graph_name)
+                util.print_panel(Constant.NEW_LINE.join(summary_txt), title)
+        if pt_dump_summary != '':
+            util.print_panel(pt_dump_summary, args.name)
+        if args.save != 0:
+            self.graph_manager.save_sub_graph(npu_ops, args.save)
+
+    @catch_tool_exception
+    def do_compare_data(self, argv):
+        """compare two tensor"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-n', '--name', dest='names', type=str, default=[], help='op name', nargs='+')
+        parser.add_argument('-p', '--print', dest='count', default=20, type=int, help='print err data num')
+        parser.add_argument('-s', '--save', dest='save', action='store_true', help='save data in txt format')
+        parser.add_argument('-al', '--atol', dest='atol', default=0.001, type=float, help='set rtol')
+        parser.add_argument('-rl', '--rtol', dest='rtol', default=0.001, type=float, help='set atol')
+        args = parser.parse_args(argv)
+        if len(args.names) != 2:
+            self.log.error("compare files should be 2.")
+        else:
+            self.compare.compare_data(args.names[0], args.names[1], args.save, args.rtol, args.atol, args.count)
+
+    @catch_tool_exception
+    def do_list_dump(self, argv):
+        """List dump files"""
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-t', '--type', dest='type', help='')
+        parser.add_argument('-n', '--name', dest='name')
+        self.dump_manager.list_dump(argv.dir, argv.name)
+
+    @catch_tool_exception
+    def do_convert_npu_dump(self, argv):
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-n', '--name', dest='name', help='op name')
+        parser.add_argument('-f', '--format', dest='format', default=None, required=False, help='target format')
+        parser.add_argument('-o', '--output', dest='output', required=False, default=None, help='output path')
+        args = parser.parse_args(argv)
+        self.dump_manager.convert_npu_dump(args.name, args.format, args.output)
+
+    @catch_tool_exception
+    def do_convert_all_npu_dump(self):
+        self.dump_manager.decode_all_npu_dump()
+
+    @catch_tool_exception
+    def check_graph_similarity(self):
+        """ Check graph similarity """
+
+    @catch_tool_exception
+    def do_train_analysis(self, argv):
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-d', '--device', dest='device', default='all', required=False,
+                            help='train device, support cpu/npu/all')
+        parser.add_argument('-a', '--action', dest='action', default='dump', required=False,
+                            help='action, support dump(-d cpu/npu)[overflow]|fusion_off|fusion_switch(npu)')
+        args = parser.parse_args(argv)
+        self.train_analysis.run(args.device, args.action)
+
+    def single_cmd(self, argv):
+        cmd_func_map = {'compare': self.do_compare_data,
+                        'vector_compare': self.do_vector_compare,
+                        'train': self.do_train_analysis}
+        if argv[1] in cmd_func_map:
+            func = cmd_func_map[argv[1]]
+            return func(argv[2:])
+        raise PrecisionToolException("cmd %s is not supported or cmd should be run in interactive mode." % argv[1])
-- 
Gitee


From 7f2f3063d0e3865feeb22537d29f6a516a05cd65 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 08:27:14 +0000
Subject: [PATCH 34/38] 
 cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../test/train_performance_bs256_1p.sh        | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh
index 48c689f74..c85da6f03 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh
@@ -4,6 +4,8 @@
 cur_path=`pwd`
 
 #集合通信参数,不需要修改
+source ~/.bashrc
+docker_enable="false"
 export RANK_SIZE=1
 export JOB_ID=99990001
 export RANK_ID=1
@@ -90,6 +92,12 @@ do
         cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/
     elif [[ $para == --data_path* ]];then
         data_path=`echo ${para#*=}`
+    elif [[ $para == --conda_name* ]];then
+        conda_name=`echo ${para#*=}`
+        source $cur_path/set_conda.sh
+        source activate $conda_name
+    elif [[ $para == --docker_enable* ]];then
+        docker_enable=`echo ${para#*=}`
     fi
 done
 
@@ -103,6 +111,13 @@ if [[ $data_path == "" ]];then
     exit 1
 fi
 
+#docker适配
+if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then
+   echo "docker_enable basic"
+   export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin
+   export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH
+fi
+
 #修改参数
 sed -i "50s|PATH_TO_BE_CONFIGURED|${data_path}|g"  $cur_path/../src/configs/res50_256bs_1p.py
 sed -i "107s|PATH_TO_BE_CONFIGURED|${cur_path}/output/0/d\_solution/ckpt0|g"  $cur_path/../src/configs/res50_256bs_1p.py
@@ -173,7 +188,11 @@ if [[ ${fp32} == "--fp32" ]];then
 elif [[ ${hf32} == "--hf32" ]];then
   CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf'
 elif [[ ${ffts} == "--ffts" ]];then
-  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'_'ffts'
+  if [[ $docker_enable == "basic" ]];then
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'basic'_'docker'_'overflow'_'perf'_'ffts'
+  else
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'_'ffts'
+  fi
 else
   CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
 fi
@@ -202,3 +221,5 @@ echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName
 echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+rm -rf $cur_path/output/overflow_dump
\ No newline at end of file
-- 
Gitee


From 86616d0ea9f24155a7a445fed178a3a554200f74 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 08:43:12 +0000
Subject: [PATCH 35/38] 
 cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../src/trainers/gpu_base_trainer_dump        | 243 ++++++++++++++++++
 1 file changed, 243 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump
new file mode 100644
index 000000000..2bb7b1854
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump
@@ -0,0 +1,243 @@
+# coding=utf-8
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import tensorflow as tf
+import math
+import time
+from . import train_helper
+from .train_helper import stage
+from utils.logger import rank0log
+
+#from tensorflow.contrib.offline_train.python.npu.npu_config import NPURunConfig
+from npu_bridge.estimator.npu.npu_config import NPURunConfig
+#from tensorflow.contrib.offline_train.python.npu.npu_estimator import NPUEstimator
+from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
+#from tensorflow.contrib.offline_train.python.npu.npu_optimizer import NPUDistributedOptimizer
+from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
+
+class GPUBaseTrain(object):
+    def __init__(self, session, config, data, model, logger):
+        self.sess = session
+        self.config = config
+        self.data = data
+        self.model = model
+        self.logger = logger
+        self.print_logger = self.logger.logger
+        self.all_preds = []
+        self.all_targets = []
+        if self.config['accelerator'] == 'gpu':
+            self.classifier, self.training_hook = self.get_classifier()
+        else:
+           # from tensorflow.contrib.offline_train.python.npu.npu_config import NPURunConfig
+            from npu_bridge.estimator.npu.npu_config import NPURunConfig
+           # from tensorflow.contrib.offline_train.python.npu.npu_estimator import NPUEstimator
+            from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
+#            from tensorflow.contrib.offline_train.python.npu.npu_optimizer import NPUDistributedOptimizer
+            from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
+            self.classifier, self.training_hook = self.get_npu_classifier()
+            
+        
+
+    def get_classifier(self):
+        classifier = tf.estimator.Estimator(
+            model_fn=self.model.get_estimator_model_func,
+            model_dir=self.config['log_dir'],
+            config = tf.estimator.RunConfig( 
+                    session_config=self.sess.get_config(), 
+                    save_summary_steps=self.config['save_summary_steps'] if self.config['do_checkpoint'] else None,
+                    save_checkpoints_steps=self.config['save_checkpoints_steps'] if self.config['do_checkpoint'] else None,
+                    keep_checkpoint_max=None
+                     )
+            )
+
+        training_hooks = [train_helper.PrefillStagingAreasHook()]
+        training_hooks.append(self.logger)
+
+        return classifier, training_hooks
+
+    def get_npu_classifier(self):
+        session_config = tf.ConfigProto(
+           inter_op_parallelism_threads=10,
+           intra_op_parallelism_threads=10,
+           allow_soft_placement=True,)
+
+
+        if self.config['over_dump'] == "True":
+            print("NPU overflow dump is enabled")
+            from npu_bridge.npu_init import DumpConfig
+            dump_config = DumpConfig(
+                enable_dump_debug=True, dump_path=self.config['over_dump_path'], dump_debug_mode="all")
+            if self.config['debug'] :
+                run_config = NPURunConfig(dump_config=dump_config, hcom_parallel=True, precision_mode="allow_mix_precision", enable_data_pre_proc=True, save_checkpoints_steps=112590, session_config=session_config, model_dir = self.config['model_dir'], iterations_per_loop=self.config['iterations_per_loop'], keep_checkpoint_max=5)
+            else :
+                run_config = NPURunConfig(dump_config=dump_config, hcom_parallel=True, precision_mode="allow_mix_precision", save_summary_steps=0, log_step_count_steps=None, enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir'], iterations_per_loop=self.config['iterations_per_loop'])
+        else:
+            import precision_tool.tf_config as npu_tf_config
+            if self.config['debug']:
+                if self.config['precision_mode'] == 'must_keep_origin_dtype':
+                    run_config = NPURunConfig(hcom_parallel=True,
+                    precision_mode="must_keep_origin_dtype",
+                    enable_data_pre_proc=True,
+                    save_checkpoints_steps=112590,
+                    session_config=session_config,
+                    model_dir = self.config['model_dir'],
+                    iterations_per_loop=self.config['iterations_per_loop'],
+                    keep_checkpoint_max=5,
+                    enable_small_channel=1)
+                else:
+                    dump_config = npu_tf_config.estimator_dump_config(action='dump')
+                    run_config = NPURunConfig(hcom_parallel=True,
+                    dump_config=dump_config,
+                    precision_mode="allow_mix_precision",
+                    enable_data_pre_proc=True,
+                    save_checkpoints_steps=112590,
+                    session_config=session_config,
+                    model_dir=self.config['model_dir'],
+                    iterations_per_loop=self.config['iterations_per_loop'],
+                    keep_checkpoint_max=5,
+                    enable_small_channel=1,
+                    modify_mixlist='./src/trainers/ReduceMeanD.json')
+            else:
+                run_config = NPURunConfig(hcom_parallel=True, precision_mode="allow_mix_precision", save_summary_steps=0, log_step_count_steps=None, enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir'], iterations_per_loop=self.config['iterations_per_loop'])
+
+#        run_config = NPURunConfig(enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir'])
+
+     #   classifier = tf.estimator.Estimator(
+     #       model_fn=self.model.get_estimator_model_func,
+     #       model_dir=self.config['log_dir'],
+     #       config = tf.estimator.RunConfig( 
+     #               session_config=self.sess.get_config(), 
+     #               save_summary_steps=self.config['save_summary_steps'] if self.config['do_checkpoint'] else None,
+     #               save_checkpoints_steps=self.config['save_checkpoints_steps'] if self.config['do_checkpoint'] else None,
+     #               keep_checkpoint_max=None
+     #                )
+     #       )
+
+        classifier =NPUEstimator(
+            model_fn= self.model.get_estimator_model_func, 
+            config= run_config
+      	  )
+      
+        training_hooks = []
+        if self.config['debug']:
+            training_hooks = [train_helper.PrefillStagingAreasHook()]
+            training_hooks.append(self.logger)
+
+        return classifier, training_hooks
+
+    def train(self):
+        print ('training steps: %d' % self.config['nstep'])
+        self.classifier.train( input_fn=lambda:self.data.get_train_input_fn(),
+                               max_steps = self.config['nstep'],
+                               hooks = self.training_hook
+                              )
+
+
+    def evaluate(self):
+        rank0log(self.print_logger, "Evaluating")
+        rank0log(self.print_logger, "Validation dataset size: {}".format(self.config['num_evaluating_samples'] ))
+        time.sleep(5)  # a little extra margin...
+        try:
+            ckpts = train_helper.sort_and_load_ckpts(self.config['model_dir'])
+            print("=========ckpt==========")
+            print(ckpts)
+            print("=========ckpt==========")
+            for i, c in enumerate(ckpts):
+                if i < len(ckpts) - 1:
+                    if i % self.config['eval_interval'] != 0:
+                        continue
+                eval_result = self.classifier.evaluate(
+                    input_fn=lambda: self.data.get_eval_input_fn(),
+                    checkpoint_path=c['path'])
+                c['epoch'] = math.ceil(c['step'] / (self.config['num_training_samples']/ (self.config['global_batch_size'])))
+                c['top1'] = eval_result['val-top1acc']
+                c['top5'] = eval_result['val-top5acc']
+                c['loss'] = eval_result['loss']
+
+            rank0log(self.print_logger, ' step  epoch  top1    top5     loss   checkpoint_time(UTC)')
+            for i, c in enumerate(ckpts):
+                if 'top1' not in c:
+                    continue
+                rank0log(self.print_logger,'{:5d}  {:5.1f}  {:5.3f}  {:6.2f}  {:6.2f}  {time}'
+                         .format(c['step'],
+                                 c['epoch'],
+                                 c['top1'] * 100,
+                                 c['top5'] * 100,
+                                 c['loss'],
+                                 time=time.strftime('%Y-%m-%d %H:%M:%S', 
+                                    time.localtime(c['mtime']))))
+            rank0log(self.print_logger, "Finished evaluation")
+        except KeyboardInterrupt:
+            self.print_logger.error("Keyboard interrupt")
+
+    def train_and_evaluate(self):
+        success = False
+        epochs_between_evals = self.config.get('epochs_between_evals', 4)
+
+
+        for i in range(self.config['num_epochs'] // epochs_between_evals):
+
+            rank0log(self.print_logger, "Starting a training cycle")
+
+            self.classifier.train(input_fn=lambda:self.data.get_train_input_fn(),
+                            steps = self.config['nsteps_per_epoch']*epochs_between_evals,
+                            hooks = self.training_hook )
+
+            rank0log(self.print_logger, "Starting to evaluate")
+            rank0log(self.print_logger, "Validation dataset size: {}".format(self.config['num_evaluating_samples'] ))
+            time.sleep(5)  # a little extra margin...
+
+            ckpts = train_helper.sort_and_load_ckpts(self.config['model_dir'])
+            c = ckpts[-1]
+            eval_result = self.classifier.evaluate(
+                input_fn=lambda: self.data.get_eval_input_fn(),
+                checkpoint_path=c['path'])
+
+            c['epoch'] = math.ceil(c['step'] / (self.config['num_training_samples']/ (self.config['global_batch_size'])))
+            c['top1'] = eval_result['val-top1acc']
+            c['top5'] = eval_result['val-top5acc']
+            c['loss'] = eval_result['loss']
+
+            rank0log(self.print_logger, ' step  epoch  top1    top5     loss   checkpoint_time(UTC)')
+
+            rank0log(self.print_logger,'{:5d}  {:5.1f}  {:5.3f}  {:6.2f}  {:6.2f}  {time}'
+                    .format(c['step'],
+                            c['epoch'],
+                            c['top1'] * 100,
+                            c['top5'] * 100,
+                            c['loss'],
+                            time=time.strftime('%Y-%m-%d %H:%M:%S',
+                                time.localtime(c['mtime']))))
+            if eval_result['val-top1acc']*100 > self.config.get('stop_threshold', 74.9):
+                success = True
+                break
+
+
+
-- 
Gitee


From 1ddf1afd9ab3aedc8f170d63d37a48b1518821a3 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 08:47:29 +0000
Subject: [PATCH 36/38] 
 /cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump.py.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../trainers/{gpu_base_trainer_dump => gpu_base_trainer_dump.py}  | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/{gpu_base_trainer_dump => gpu_base_trainer_dump.py} (100%)

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump.py
similarity index 100%
rename from TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump
rename to TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump.py
-- 
Gitee


From bdf070fe9b467e66c475a5f17a4751cfa7bd9dd4 Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 08:49:17 +0000
Subject: [PATCH 37/38] 
 built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50_dump.py.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../src/mains/res50_dump.py                   | 148 ++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50_dump.py

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50_dump.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50_dump.py
new file mode 100644
index 000000000..062fc7757
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50_dump.py
@@ -0,0 +1,148 @@
+# coding=utf-8
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import tensorflow as tf
+import sys
+import ast
+import os
+base_path=os.path.split(os.path.realpath(__file__))[0]
+print ("#########base_path:", base_path)
+path_1 = base_path + "/../"
+print (path_1)
+path_2 = base_path + "/../models"
+print (path_2)
+path_3 = base_path + "/../../"
+print (path_3)
+
+
+sys.path.insert(1, path_1)
+sys.path.append(base_path + "/../models")
+sys.path.append(base_path + "/../../")
+sys.path.append(base_path + "/../../models")
+
+from utils import create_session as cs
+from utils import logger as lg
+from data_loader.resnet50 import data_loader as dl
+from models.resnet50 import res50_model as ml
+from optimizers import optimizer as op
+from losses import res50_loss as ls
+from trainers import gpu_base_trainer_dump as tr
+# from configs import res50_config as cfg
+from hyper_param import hyper_param as hp
+from layers import layers as ly
+import argparse
+
+def main():
+    #-------------------choose the config file in .sh file-----------
+    cmdline = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    cmdline.add_argument('--config_file', default="",
+                         help="""config file used.""")
+    cmdline.add_argument('--iterations_per_loop', default=1,
+                         help="""config file used.""")
+    cmdline.add_argument('--max_train_steps', default=200,
+                         help="""config file used.""")
+    cmdline.add_argument('--debug', default=True, type=ast.literal_eval,
+                         help="""config file used.""")
+    cmdline.add_argument('--eval', default=False, type=ast.literal_eval,
+                         help="""config file used.""")
+    cmdline.add_argument('--model_dir', default="./model_dir",
+                         help="""config file used.""")
+    cmdline.add_argument('--precision_mode', default='allow_mix_precision', type=str, help='precision_mode') 
+    
+    # modify for npu overflow start
+    # enable overflow
+    cmdline.add_argument("--over_dump", default="False",
+                        help="whether to enable overflow")
+    cmdline.add_argument("--over_dump_path", default="./",
+                        help="path to save overflow dump files")
+    cmdline.add_argument("--data_path", default="", help="path of dataset")
+    
+    FLAGS, unknown_args = cmdline.parse_known_args()
+    if len(unknown_args) > 0:
+        for bad_arg in unknown_args:
+            print("ERROR: Unknown command line arg: %s" % bad_arg)
+        raise ValueError("Invalid command line arg(s)")
+
+    cfg_file = FLAGS.config_file
+    configs = 'configs'
+    cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file)
+    #------------------------------------------------------------------
+    '''
+    if FLAGS.precision_mode == "allow_mix_precision":
+        option = {}
+        option["ACL_PRECISION_MODE"] = "allow_mix_precision"
+        torch_npu.npu.set_option(option)
+    '''
+    config = cfg.res50_config()
+    config['iterations_per_loop'] = int(FLAGS.iterations_per_loop)
+    config['max_train_steps'] = int(FLAGS.max_train_steps)
+    config['debug'] = FLAGS.debug
+    config['precision_mode'] = FLAGS.precision_mode
+    config['eval'] = FLAGS.eval
+    config['model_dir'] = FLAGS.model_dir
+    if FLAGS.data_path:
+        config['data_url'] = FLAGS.data_path
+
+    config['over_dump'] = FLAGS.over_dump
+    config['over_dump_path'] = FLAGS.over_dump_path
+    
+    print("iterations_per_loop:%d" %(config['iterations_per_loop']))
+    print("max_train_steps    :%d" %(config['max_train_steps']))
+    print("debug              :%s" %(config['debug']))
+    print("precision_mode     :%s" %(config['precision_mode']))
+    print("eval               :%s" %(config['eval']))
+    print("model_dir          :%s" %(config['model_dir']))
+    print("over_dump          :%s" %(config['over_dump']))
+    print("over_dump_path     :%s" %(config['over_dump_path']))
+    Session = cs.CreateSession(config)
+    data = dl.DataLoader(config)
+    hyper_param = hp.HyperParams(config)
+    layers = ly.Layers() 
+    optimizer = op.Optimizer(config)
+    loss = ls.Loss(config)
+    logger = lg.LogSessionRunHook(config)   # add tensorboard summary
+
+    model = ml.Model(config, data, hyper_param,layers, optimizer, loss, logger)   # get the model 
+    trainer = tr.GPUBaseTrain(Session, config, data, model, logger)   # use Estimator to build training process
+
+    if config['mode'] =='train':  
+        trainer.train()
+        if config['eval'] :
+            trainer.evaluate()
+    elif config['mode'] =='evaluate':
+        trainer.evaluate()
+    elif config['mode'] =='train_and_evaluate':
+        trainer.train_and_evaluate()
+    else:
+        raise ValueError('Invalid type of mode')
+
+if __name__ == '__main__':
+    main()
-- 
Gitee


From 5c179ce96a97d7a280b823a5b80cbdce3887cb7e Mon Sep 17 00:00:00 2001
From: huangju1993 <huangju1993@163.com>
Date: Wed, 17 Jul 2024 08:57:10 +0000
Subject: [PATCH 38/38] 
 cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p_dump.sh.

Signed-off-by: huangju1993 <huangju1993@163.com>
---
 .../test/train_performance_bs256_1p_dump.sh   | 225 ++++++++++++++++++
 1 file changed, 225 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p_dump.sh

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p_dump.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p_dump.sh
new file mode 100644
index 000000000..90233d8a3
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p_dump.sh
@@ -0,0 +1,225 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+source ~/.bashrc
+docker_enable="false"
+export RANK_SIZE=1
+export JOB_ID=99990001
+export RANK_ID=1
+export HCCL_CONNECT_TIMEOUT=600
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+ffts='None'
+#设置默认日志级别,不需要修改
+export ASCEND_GLOBAL_LOG_LEVEL_ETP=3
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="ResNet50_ID0058_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=256
+#训练step
+train_steps=2000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="must_keep_origin_dtype"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --ffts* ]];then
+        ffts=`echo ${para#*=}`
+    elif [[ $para == --autotune* ]];then
+        autotune=`echo ${para#*=}`
+        mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak
+        mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak
+        autotune_dump_path=${cur_path}/output/autotune_dump
+        mkdir -p ${autotune_dump_path}/GA
+        mkdir -p ${autotune_dump_path}/rl
+        cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/
+        cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --conda_name* ]];then
+        conda_name=`echo ${para#*=}`
+        source $cur_path/set_conda.sh
+        source activate $conda_name
+    elif [[ $para == --docker_enable* ]];then
+        docker_enable=`echo ${para#*=}`
+    fi
+done
+
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#docker适配
+if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then
+   echo "docker_enable basic"
+   export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin
+   export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH
+fi
+
+#修改参数
+sed -i "50s|PATH_TO_BE_CONFIGURED|${data_path}|g"  $cur_path/../src/configs/res50_256bs_1p.py
+sed -i "107s|PATH_TO_BE_CONFIGURED|${cur_path}/output/0/d\_solution/ckpt0|g"  $cur_path/../src/configs/res50_256bs_1p.py
+
+cp data_loader.py $cur_path/../src/data_loader/resnet50/
+
+if [[ ${ffts} == "--ffts" ]];then
+   export ASCEND_ENHANCE_ENABLE=1
+fi
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+cd $cur_path/../
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    export DEVICE_INDEX=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3.7 ${cur_path}/../src/mains/res50_dump.py --config_file=res50_256bs_1p \
+    --max_train_steps=${train_steps} \
+    --iterations_per_loop=100 \
+    --debug=True \
+    --eval=False \
+    --precision_mode ${precision_mode} \
+    --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#参数改回
+sed -i "50s|${data_path}|PATH_TO_BE_CONFIGURED|g"  $cur_path/../src/configs/res50_256bs_1p.py
+sed -i "107s|${cur_path}/output/0/d\_solution/ckpt0|PATH_TO_BE_CONFIGURED|g"  $cur_path/../src/configs/res50_256bs_1p.py
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "FPS: " | awk -F "FPS: " '{print $2}' | awk -F "  loss:" '{print $1}' | tail -n +2 | awk '{sum+=$1} END {print sum/NR}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf'
+elif [[ ${ffts} == "--ffts" ]];then
+  if [[ $docker_enable == "basic" ]];then
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'basic'_'docker'_'dump'_'perf'_'ffts'
+  else
+        CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'_'ffts'
+  fi
+else
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+fi
+
+
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${RANK_SIZE}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "FPS: " $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss: " '{print $2}' | awk -F "total" '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+rm -rf $cur_path/../precision_data
\ No newline at end of file
-- 
Gitee