From 80784a65df9734be295ff81f886d9d0b9c84f779 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:41:13 +0000 Subject: [PATCH 01/38] add TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/set_conda.sh. Signed-off-by: huangju1993 --- .../DeepCTR_Series_for_TensorFlow/test/set_conda.sh | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/set_conda.sh diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/set_conda.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/set_conda.sh new file mode 100644 index 000000000..febb0fa34 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/set_conda.sh @@ -0,0 +1,2 @@ +export PATH=/home/anaconda3/bin:$PATH +export LD_LIBRARY_PATH=/home/anaconda3/lib:$LD_LIBRARY_PATH \ No newline at end of file -- Gitee From 50abd2b337537d9efcc282fc4c69cae3aec82737 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:41:57 +0000 Subject: [PATCH 02/38] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20precision=5Ftool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../DeepCTR_Series_for_TensorFlow/examples/precision_tool/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/.keep diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/.keep b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/.keep new file mode 100644 index 000000000..e69de29bb -- Gitee From 341cf40c5858a70bb230cb1243e23a6d4744bed3 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:42:21 +0000 Subject: [PATCH 03/38] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20lib?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../examples/precision_tool/lib/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/.keep diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/.keep b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/.keep new file mode 100644 index 000000000..e69de29bb -- Gitee From 118c35275bbde82f554014655d197e02056da7a8 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:42:49 +0000 Subject: [PATCH 04/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/adapter/fusion.py | 112 ++++++++++ .../lib/adapter/msquickcmp_adapter.py | 87 ++++++++ .../lib/adapter/offline_om_adapter.py | 23 ++ .../precision_tool/lib/adapter/overflow.py | 194 +++++++++++++++++ .../precision_tool/lib/adapter/tf_adapter.py | 200 ++++++++++++++++++ 5 files changed, 616 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/fusion.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/msquickcmp_adapter.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/offline_om_adapter.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/overflow.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/tf_adapter.py diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/fusion.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/fusion.py new file mode 100644 index 000000000..b440b1055 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/fusion.py @@ -0,0 +1,112 @@ +# coding=utf-8 +import json +import os +import shutil +from ..util.util import util +from ..config import config as cfg + + +FUSION_RESULT_FILE_NAME = 'fusion_result.json' +EFFECT_TIMES_KEY = 'effect_times' +GRAPH_FUSION_KEY = 'graph_fusion' +UB_FUSION_KEY = 'ub_fusion' +GRAPH_ID_KEYS = ['graphId', 'session_and_graph_id'] + + +class FusionResult(object): + def __init__(self, fusion_json): + self.fusion_json = fusion_json + + def get_effect_graph_fusion(self): + """Get effect graph fusion rule""" + if GRAPH_FUSION_KEY in self.fusion_json: + return self._get_effect_fusion(self.fusion_json[GRAPH_FUSION_KEY]) + return {} + + def get_effect_ub_fusion(self): + """Get effect UB fusion rule""" + if UB_FUSION_KEY in self.fusion_json: + return self._get_effect_fusion(self.fusion_json[UB_FUSION_KEY]) + return {} + + def graph_id(self): + """Get graph id""" + for key in GRAPH_ID_KEYS: + if key in self.fusion_json: + return self.fusion_json[key] + return "NONE" + + @staticmethod + def _get_effect_fusion(fusion): + res = {} + for fusion_name in fusion: + effect_times = int(fusion[fusion_name][EFFECT_TIMES_KEY]) + if effect_times > 0: + res[fusion_name] = effect_times + return res + + +class Fusion(object): + def __init__(self): + self.fusion_result = [] + self.log = util.get_log() + + def prepare(self, json_path='./'): + """Prepare fusion rule manager + :param json_path: path to fusion_result.json + :return: None + """ + util.create_dir(cfg.FUSION_DIR) + file_path = os.path.join(json_path, FUSION_RESULT_FILE_NAME) + file_path_local = os.path.join(cfg.FUSION_DIR, FUSION_RESULT_FILE_NAME) + if not os.path.isfile(file_path): + if not os.path.isfile(file_path_local): + self.log.debug("Can not find fusion result json.") + return + else: + shutil.copy(file_path, cfg.FUSION_DIR) + fe_jsons = self._get_result_jsons(file_path_local) + for fe_json in fe_jsons: + self.fusion_result.append(FusionResult(fe_json)) + + def check(self): + """Check fusion rules + :return: None + """ + self.log.info("Check effect fusion rule list.") + for fusion in self.fusion_result: + graph_fusion_table = self._build_table('Graph Fusion [GraphID: %s]' % fusion.graph_id(), + fusion.get_effect_graph_fusion()) + ub_fusion_table = self._build_table('UB Fusion [GraphID: %s]' % fusion.graph_id(), + fusion.get_effect_ub_fusion()) + util.print_panel(util.create_columns([graph_fusion_table, ub_fusion_table]), + title='GraphID:' + fusion.graph_id(), fit=True) + + @staticmethod + def _get_result_jsons(file_name): + result_jsons = [] + with open(file_name, 'r') as f: + txt = f.read() + try: + result_jsons = json.loads(txt) + if isinstance(result_jsons, dict): + result_jsons = [result_jsons] + except ValueError: + sk = [] + start = -1 + for i in range(len(txt)): + if txt[i] == '{': + sk.append('{') + if txt[i] == '}': + sk.pop() + if len(sk) == 0: + result_jsons.append(json.loads(txt[start+1: i+1])) + start = i + return result_jsons + + @staticmethod + def _build_table(title, fusion): + table = util.create_table(title, ['Fusion Name', 'Effect times']) + for f in fusion: + table.add_row(f, str(fusion[f])) + return table diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/msquickcmp_adapter.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/msquickcmp_adapter.py new file mode 100644 index 000000000..525dc8ee5 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/msquickcmp_adapter.py @@ -0,0 +1,87 @@ +# coding=utf-8 +import os +import time +import pathlib +import shutil +from ..util.util import util +from ..util.constant import Constant +from ..config import config as cfg +from ..util.precision_tool_exception import PrecisionToolException + + +class MsQuickCmpAdapter(object): + def __init__(self, output_path): + self.output_path = output_path + self.log = util.get_log() + + def run(self): + if self.output_path is None or not os.path.isdir(self.output_path): + raise PrecisionToolException("Invalid output path.") + if os.path.exists(cfg.DATA_ROOT_DIR): + raise PrecisionToolException("Precision data dir exist, can not adapt msquickcmp result.") + + for dir_path, dir_names, file_names in os.walk(self.output_path, followlinks=True): + if 'model' in dir_names: + self._adapt_model(os.path.join(dir_path, 'model')) + if 'dump_data' in dir_names: + self._adapt_dump(os.path.join(dir_path, 'dump_data')) + for file_name in file_names: + if str(file_name).endswith(Constant.Suffix.CSV): + self._adapt_vector_compare_result(os.path.join(dir_path, file_name)) + + def _adapt_model(self, path): + file_names = os.listdir(path) + graph_id = 0 + for file_name in file_names: + if str(file_name).endswith(Constant.Suffix.JSON): + self.log.info("Find msquickcmp model json: %s", file_name) + util.create_dir(cfg.DEFAULT_NPU_GRAPH_DIR) + graph_file_name = 'ge_proto_%d_%s.txt' % (graph_id, cfg.BUILD_JSON_GRAPH_NAME) + graph_json_file_name = graph_file_name + Constant.Suffix.JSON + pathlib.Path(os.path.join(cfg.DEFAULT_NPU_GRAPH_DIR, graph_file_name)).touch() + src_path = os.path.join(path, file_name) + dst_path = os.path.join(cfg.DEFAULT_NPU_GRAPH_DIR, graph_json_file_name) + self.log.info("Copy graph file: %s->%s", src_path, dst_path) + shutil.copy(src_path, dst_path) + time.sleep(3) + pathlib.Path(dst_path).touch() + if not util.empty_dir(cfg.DEFAULT_NPU_GRAPH_DIR): + self.log.info("Adapt model success.") + + def _adapt_dump(self, path): + dir_names = os.listdir(path) + if 'tf' in dir_names: + self._adapt_tf_dump(os.path.join(path, 'tf')) + if 'onnx' in dir_names: + self._adapt_tf_dump(os.path.join(path, 'onnx')) + if 'npu' in dir_names: + self._adapt_npu_dump(os.path.join(path, 'npu')) + + def _adapt_tf_dump(self, path): + if util.empty_dir(path): + return + src_path = os.path.abspath(path) + util.create_dir(cfg.TF_DIR) + dst_path = cfg.TF_DUMP_DIR + self.log.info("Create symbol link file: %s->%s", src_path, dst_path) + os.symlink(src_path, dst_path) + self.log.info("Adapt tf dump success.") + + def _adapt_npu_dump(self, path): + sub_dirs = os.listdir(path) + self.log.info("Find npu dump dir:%s", sub_dirs) + sub_dirs = filter(lambda x: str(x).isdigit(), sub_dirs) + for sub_dir in sub_dirs: + util.create_dir(cfg.DEFAULT_NPU_DUMP_DIR) + src_path = os.path.abspath(os.path.join(path, sub_dir)) + dst_path = os.path.join(cfg.DEFAULT_NPU_DUMP_DIR, sub_dir) + self.log.info("Create symbol link file: %s->%s", src_path, dst_path) + os.symlink(src_path, dst_path) + self.log.info("Adapt npu dump success.") + + def _adapt_vector_compare_result(self, path): + target_path = os.path.join(cfg.VECTOR_COMPARE_PATH, '0') + util.create_dir(target_path) + dst_path = os.path.join(target_path, os.path.basename(path)) + shutil.copy(path, dst_path) + self.log.info("Adapt vector compare result.") diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/offline_om_adapter.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/offline_om_adapter.py new file mode 100644 index 000000000..a6cc8a5fb --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/offline_om_adapter.py @@ -0,0 +1,23 @@ +# coding=utf-8 +import os +import time +import pathlib +import shutil +from ..util.util import util +from ..util.constant import Constant +from ..config import config as cfg +from ..util.precision_tool_exception import PrecisionToolException + + +class OfflineOmAdapter(object): + """自动解析om文件至GE图""" + def __init__(self, file_name): + self.file_name = file_name + self.log = util.get_log() + + @staticmethod + def validate(file_name): + return os.path.isfile(file_name) and str(file_name).endswith(Constant.Suffix.OM) + + def run(self): + self.log("To impl") diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/overflow.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/overflow.py new file mode 100644 index 000000000..7908c18be --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/overflow.py @@ -0,0 +1,194 @@ +# coding=utf-8 +import json +import os + +from ..util.util import util +from ..util.precision_tool_exception import PrecisionToolException +from ..util.precision_tool_exception import catch_tool_exception +from ..util.constant import Constant +from ..config import config as cfg + + +AI_CORE_OVERFLOW_STATUS = { + '0x8': '符号证书最小附属NEG符号位取反溢出', + '0x10': '整数加法、减法、乘法或乘加操作计算有溢出', + '0x20': '浮点计算有溢出', + '0x80': '浮点数转无符号数的输入是负数', + '0x100': 'FP32转FP16或32符号整数转FP16中出现溢出', + '0x400': 'CUBE累加出现溢出' +} +DHA_ATOMIC_ADD_STATUS = { + '0x9': '[atomic overflow] 向上溢出', + '0xA': '[atomic underflow] 向下溢出', + '0xB': '[atomic src nan] 源操作数非法', + '0xC': '[atomic dst nan] 目的操作数非法', + '0xD': '[atomic both nan] 源操作数和目的操作数均非法' +} +L2_ATOMIC_ADD_STATUS = { + '000': '[atomic no error] 无异常', + '001': '[atomic overflow] 向上溢出', + '010': '[atomic underflow] 向下溢出', + '011': '[atomic src nan] 源操作数非法', + '100': '[atomic dst nan] 目的操作数非法', + '101': '[atomic both nan] 源操作数和目的操作数均非法' +} + + +class Overflow(object): + def __init__(self): + """Init""" + self.log = util.get_log() + self.debug_files = None + + @catch_tool_exception + def prepare(self): + """Prepare""" + # find right path in DUMP_FILES_NPU_ALL + util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR) + sub_dir = util.get_newest_dir(cfg.NPU_OVERFLOW_DUMP_DIR) + overflow_dump_files = util.list_npu_dump_files(os.path.join(cfg.NPU_OVERFLOW_DUMP_DIR, sub_dir)) + self.debug_files = [item for item in overflow_dump_files.values() if item.op_type == 'Opdebug'] + # sort by timestamp + self.debug_files = sorted(self.debug_files, key=lambda x: x.timestamp) + self.log.info("Find [%d] debug files in overflow dir.", len(self.debug_files)) + + def check(self, max_num=3): + """Check overflow info""" + if len(self.debug_files) == 0: + self.log.info("[Overflow] Checked success. find [0] overflow node!") + return + self.log.info("[Overflow] Find [%s] overflow debug file. Will show top %s ops.", len(self.debug_files), max_num) + for i, debug_file in enumerate(self.debug_files): + debug_decode_files = self._decode_file(debug_file, True) + with open(debug_decode_files[0].path, 'r') as f: + overflow_json = json.load(f) + util.print_panel(self._json_summary(overflow_json, debug_file)) + if i >= max_num: + break + + def _json_summary(self, json_txt, debug_file): + res = [] + detail = {'task_id': -1} + if 'magic' in json_txt: + # version 2 + detail = json_txt['acc_list']['data'] + res.append(' - [AccType:%s][Status:%s][TaskId:%s]' % ( + json_txt['acc_list'], detail['status'], detail['task_id'])) + if 'AI Core' in json_txt and json_txt['AI Core']['status'] > 0: + detail = json_txt['AI Core'] + res.append(' - [AI Core][Status:%s][TaskId:%s] %s' % ( + detail['status'], detail['task_id'], self._decode_ai_core_status(detail['status']))) + if 'DHA Atomic Add' in json_txt and json_txt['DHA Atomic Add']['status'] > 0: + detail = json_txt['DHA Atomic Add'] + res.append(' - [DHA Atomic Add][Status:%s][TaskId:%s] %s' % ( + detail['status'], detail['task_id'], self._decode_dha_atomic_add_status(detail['status']))) + if 'L2 Atomic Add' in json_txt and json_txt['L2 Atomic Add']['status'] > 0: + detail = json_txt['L2 Atomic Add'] + res.append(' - [L2 Atomic Add][Status:%s][TaskId:%s] %s' % ( + detail['status'], detail['task_id'], self._decode_l2_atomic_add_status(detail['status']))) + if str(detail['task_id']) == '-1': + detail['task_id'] = debug_file.task_id + if str(detail['stream_id']) == '-1': + detail['stream_id'] = debug_file.stream_id + dump_file_info = self._find_dump_files_by_task_id(detail['task_id'], detail['stream_id'], + debug_file.dir_path) + res.append(' - First overflow file timestamp [%s] -' % debug_file.timestamp) + if dump_file_info is None: + self.log.warning("Can not find any dump file for debug file: %s, op task id: %s", debug_file.file_name, + detail['task_id']) + else: + dump_decode_files = self._decode_file(dump_file_info) + # sort input/output & index + sorted(dump_decode_files, key=lambda x: x.idx) + for anchor_type in ['input', 'output']: + for dump_decode_file in dump_decode_files: + if dump_decode_file.type != anchor_type: + continue + res.append(' ├─ %s' % dump_decode_file.file_name) + res.append(' └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(dump_decode_file.path)) + res.insert(0, '[green][%s][%s][/green] %s' % (dump_file_info.op_type, dump_file_info.task_id, + dump_file_info.op_name)) + return Constant.NEW_LINE.join(res) + + @staticmethod + def _decode_file(file_info, debug=False): + file_name = file_info.file_name + if debug: + decode_files = util.list_debug_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name) + else: + decode_files = util.list_npu_dump_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name) + if len(decode_files) == 0: + # decode info file + util.convert_dump_to_npy(file_info.path, cfg.OVERFLOW_DECODE_DIR) + if debug: + decode_files = util.list_debug_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name) + else: + decode_files = util.list_npu_dump_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name) + if len(decode_files) == 0: + raise PrecisionToolException("Decode overflow debug file: %s failed." % file_name) + decode_files = sorted(decode_files.values(), key=lambda x: x.timestamp) + return decode_files + + @staticmethod + def _find_dump_files_by_task_id(task_id, stream_id, search_dir): + dump_files = util.list_npu_dump_files(search_dir) + dump_file_list = [item for item in dump_files.values() if item.op_type != 'Opdebug'] + dump_file_list = sorted(dump_file_list, key=lambda x: x.timestamp) + for dump_file in dump_file_list: + if dump_file.task_id == int(task_id) and dump_file.stream_id == int(stream_id): + return dump_file + return None + + def _decode_ai_core_status(self, status): + error_code = [] + if type(status) is not int: + return error_code + bin_status = ''.join(reversed(bin(status))) + prefix = '' + self.log.debug('Decode AI Core Overflow status:[%s]', hex(status)) + for i in range(len(bin_status)): + if bin_status[i] == '1': + if hex(int('1' + prefix, 2)) not in AI_CORE_OVERFLOW_STATUS: + self.log.warning("Unknown AI Core overflow status: [%s]", hex(int('1' + prefix, 2))) + continue + error_code.append(AI_CORE_OVERFLOW_STATUS[hex(int('1' + prefix, 2))]) + prefix += '0' + return error_code + + def _decode_l2_atomic_add_status(self, status): + if type(status) is not int: + return 'status is not int.' + code, _ = self._sub_bin_code(status, 16, 18) + if code in L2_ATOMIC_ADD_STATUS: + return L2_ATOMIC_ADD_STATUS[code] + return 'Status invalid' + + def _decode_dha_atomic_add_status(self, status): + if type(status) is not int: + return 'status is not int.' + _, code = self._sub_bin_code(status, 8, 15) + if code in DHA_ATOMIC_ADD_STATUS: + return DHA_ATOMIC_ADD_STATUS[status] + return 'Status invalid' + + @staticmethod + def _sub_bin_code(status, start, end): + """ Get specific bit code from status in bin format + :param status: status num + :param start: start bit + :param end: end bit + :return: result in bin format and hex format + """ + bin_code = bin(status).replace('0b', '') + append_num = end + 1 - len(bin_code) + if append_num > 0: + bin_list = ['0'] * append_num + bin_list.append(bin_code) + bin_code = ''.join(bin_list) + bin_start = len(bin_code) - end - 1 + bin_end = len(bin_code) - start + bin_start = max(0, bin_start) + bin_code = bin_code[bin_start: bin_end] + return bin_code, hex(int(bin_code, 2)) + + diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/tf_adapter.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/tf_adapter.py new file mode 100644 index 000000000..980b9fe33 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/adapter/tf_adapter.py @@ -0,0 +1,200 @@ +# coding=utf-8 +import os +from ..util.util import util +from ..config import config as cfg +FLAG_DUMP_GE_GRAPH = 'DUMP_GE_GRAPH' +FLAG_DUMP_GRAPH_LEVEL = 'DUMP_GRAPH_LEVEL' +FLAG_DUMP_GRAPH_PATH = 'DUMP_GRAPH_PATH' +FLAG_NPU_DUMP_GRAPH = 'NPU_DUMP_GRAPH' +FUSION_SWITCH_FILE = os.path.join(os.path.dirname(__file__), '../config/fusion_switch.cfg') +FUSION_OFF_FILE = os.path.join(os.path.dirname(__file__), '../config/fusion_off.cfg') + + +class TfAdapter(object): + def __init__(self): + self.log = util.get_log() + + def sess_dump(self, sess): + """wrapper session with dumping debug wrapper. + In session run mode. Use sess=sess_dump(sess) + :param sess: origin session + :return: Session + """ + from tensorflow.python import debug as tf_debug + self._init() + return tf_debug.DumpingDebugWrapperSession(sess, cfg.TF_DEBUG_DUMP_DIR) + + def estimator_dump(self): + """In estimator mode. estim_spec = tf.estimator.EstimatorSpec(traing_hooks=[estimator_dump()]) + :return: + """ + from tensorflow.python import debug as tf_debug + self._init() + return tf_debug.DumpingDebugHook(cfg.TF_DEBUG_DUMP_DIR) + + def session_dump_config(self, session_config=None, action=None, dump_layer=None): + """ + In TF session mode. set dump_config in session_config. + exp. config = session_dump_config() + config.[set your own configs] + with tf.Session(config=config) as sess: + sess.run(_) + tf_debug.LocalCLIDebugWrapperSession(sess=sess, ui_type="readline") + :param session_config: original session config + :param action: if set action, no need to start app with cli wrapper + :return: config_pb2.ConfigProto + """ + from tensorflow.core.protobuf import config_pb2 + from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig + if ((not isinstance(session_config, config_pb2.ConfigProto)) and + (not issubclass(type(session_config), config_pb2.ConfigProto))): + session_config = config_pb2.ConfigProto() + custom_op = None + for existed_custom_op in session_config.graph_options.rewrite_options.custom_optimizers: + if existed_custom_op.name == 'NpuOptimizer': + custom_op = existed_custom_op + if custom_op is None: + custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = 'NpuOptimizer' + custom_op.parameter_map['use_off_line'].b = True + self.update_custom_op(custom_op, action, dump_layer) + session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + return session_config + + def estimator_dump_config(self, action=None): + """return DumpConfig. + In estimator mode. set dump_config in NPURunConfig(). + exp. config = NPURunConfig(dump_config=estimator_dum_config(), session_config=session_config) + :return: DumpConfig + """ + from npu_bridge.npu_init import DumpConfig + self._init() + if self._is_overflow(action): + config = DumpConfig(enable_dump_debug=True, dump_path=cfg.NPU_OVERFLOW_DUMP_DIR, dump_mode="all") + elif self._is_dump(action): + config = DumpConfig(enable_dump=True, dump_path=cfg.DEFAULT_NPU_DUMP_DIR, dump_step=cfg.TF_DUMP_STEP, + dump_mode="all") + else: + config = DumpConfig() + return config + + def npu_device_dump_config(self, npu_device, action): + """For tf2.x + :param npu_device: npu_device + :param action: dump | overflow| fusion_off | fusion_switch + :return: npu_device + """ + self._init() + if self._is_overflow(action): + npu_device.global_options().dump_config.enable_dump_debug = True + npu_device.global_options().dump_config.dump_path = cfg.NPU_OVERFLOW_DUMP_DIR + npu_device.global_options().dump_config.dump_debug_mode = "all" + npu_device.global_options().op_debug_level = cfg.OP_DEBUG_LEVEL + if self._is_dump(action): + npu_device.global_options().dump_config.enable_dump = True + npu_device.global_options().dump_config.dump_path = cfg.DEFAULT_NPU_DUMP_DIR + npu_device.global_options().dump_config.dump_mode = "all" + npu_device.global_options().op_debug_level = cfg.OP_DEBUG_LEVEL + npu_device.global_options().dump_config.dump_step = cfg.TF_DUMP_STEP + if self._is_dump_stats(action): + npu_device.global_options().dump_config.dump_data = "stats" + if self._is_fusion_off(action): + npu_device.global_options().fusion_switch_file = FUSION_OFF_FILE + print("[PrecisionTool] Set fusion switch file: ", FUSION_OFF_FILE) + if self._is_fusion_switch(action): + npu_device.global_options().fusion_switch_file = FUSION_SWITCH_FILE + print("[PrecisionTool] Set fusion switch file: ", FUSION_SWITCH_FILE) + return npu_device + + def update_custom_op(self, custom_op, action=None, dump_layer=None): + """Update custom_op + :param custom_op: origin custom op + :param action: dump | overflow | fusion_off | fusion_switch + :return: + """ + import tensorflow as tf + self._init() + custom_op.parameter_map['debug_dir'].s = tf.compat.as_bytes(cfg.DEFAULT_OP_DEBUG_DIR) + if self._is_overflow(action): + custom_op.parameter_map['enable_dump_debug'].b = True + custom_op.parameter_map['dump_debug_mode'].s = tf.compat.as_bytes("all") + custom_op.parameter_map['dump_path'].s = tf.compat.as_bytes(cfg.NPU_OVERFLOW_DUMP_DIR) + custom_op.parameter_map['op_debug_level'].i = cfg.OP_DEBUG_LEVEL + elif self._is_dump(action): + custom_op.parameter_map['enable_dump'].b = True + custom_op.parameter_map['dump_mode'].s = tf.compat.as_bytes("all") + custom_op.parameter_map['dump_path'].s = tf.compat.as_bytes(cfg.DEFAULT_NPU_DUMP_DIR) + custom_op.parameter_map['op_debug_level'].i = cfg.OP_DEBUG_LEVEL + custom_op.parameter_map['dump_step'].s = tf.compat.as_bytes(cfg.TF_DUMP_STEP) + if self._is_dump_stats(action): + custom_op.parameter_map['dump_data'].s = tf.compat.as_bytes("stats") + if dump_layer is not None: + custom_op.parameter_map['dump_layer'].s = tf.compat.as_bytes(dump_layer) + if self._is_fusion_off(action): + custom_op.parameter_map['fusion_switch_file'].s = tf.compat.as_bytes(FUSION_OFF_FILE) + print("[PrecisionTool] Set fusion switch file: ", FUSION_OFF_FILE) + elif self._is_fusion_switch(action): + custom_op.parameter_map['fusion_switch_file'].s = tf.compat.as_bytes(FUSION_SWITCH_FILE) + print("[PrecisionTool] Set fusion switch file: ", FUSION_SWITCH_FILE) + if self._is_prof(action): + custom_op.parameter_map["profiling_mode"].b = True + custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes('{"output":"%s",\ + "storage_limit": "1000MB","training_trace":"on","l2":"on","hccl":"on","task_trace":"on",\ + "aicpu":"on","fp_point":"", "bp_point":"","aic_metrics":"PipeUtilization","msproftx":"on"}' % + cfg.PROFILING_DIR) + return custom_op + + def _init(self): + util.create_dir(cfg.DEFAULT_OP_DEBUG_DIR) + util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR) + util.create_dir(cfg.DEFAULT_NPU_DUMP_DIR) + util.create_dir(cfg.DEFAULT_NPU_GRAPH_DIR) + util.create_dir(cfg.PROFILING_DIR) + self._set_dump_graph_flags() + + @staticmethod + def _set_dump_graph_flags(): + os.environ[FLAG_DUMP_GE_GRAPH] = str(cfg.DUMP_GE_GRAPH_VALUE) + os.environ[FLAG_DUMP_GRAPH_LEVEL] = str(cfg.DUMP_GRAPH_LEVEL_VALUE) + os.environ[FLAG_DUMP_GRAPH_PATH] = cfg.DEFAULT_NPU_GRAPH_DIR + os.environ[FLAG_NPU_DUMP_GRAPH] = 'true' + + @staticmethod + def _is_dump(action): + if action is not None: + return 'dump' in action + if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] == 'True': + print("[PrecisionTool] enable npu dump >======") + return True + return False + + @staticmethod + def _is_dump_stats(action): + if action is not None: + return 'dump_stats' in action + if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] == 'True': + print("[PrecisionTool] enable npu dump >======") + return True + return False + + @staticmethod + def _is_overflow(action): + if action is not None: + return 'overflow' in action + if cfg.PRECISION_TOOL_OVERFLOW_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG] == 'True': + print("[PrecisionTool] enable npu overflow >======") + return True + return False + + @staticmethod + def _is_fusion_off(action): + return 'fusion_off' in action if action is not None else False + + @staticmethod + def _is_fusion_switch(action): + return ('fusion_switch' in action) if action is not None else False + + @staticmethod + def _is_prof(action): + return ('prof' in action) if action is not None else False + -- Gitee From 9cbb9d1596337655f4503dc28375900248610b99 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:43:15 +0000 Subject: [PATCH 05/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/compare/compare.py | 215 ++++++++++++++++++ .../lib/compare/compare_result.py | 143 ++++++++++++ 2 files changed, 358 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare_result.py diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare.py new file mode 100644 index 000000000..e166f5042 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare.py @@ -0,0 +1,215 @@ +# coding=utf-8 +""" +Compare +""" +import json +import os +import numpy as np +from .compare_result import CompareResult +from ..util.constant import Constant +from ..util.util import util +from ..config import config as cfg +from ..util.precision_tool_exception import PrecisionToolException +from ..util.precision_tool_exception import catch_tool_exception + + +class Compare(object): + def __init__(self): + """Init""" + super(Compare, self).__init__() + self.log = util.get_log() + self.vector_compare_results = {} + + @catch_tool_exception + def prepare(self): + util.create_dir(cfg.VECTOR_COMPARE_PATH) + + def npu_tf_vector_compare(self, graphs, npu_root_dir, tf_root_dir, result_dir): + """Compare npu dump data with tf dump data + :param graphs: npu ge graph json file list + :param npu_root_dir: + :param tf_root_dir: + :param result_dir: result dir + :return: + """ + for graph_file in graphs: + self.log.info("Compare npu tf with graph %s", graph_file) + sub_graphs = self._get_sub_graphs(graph_file) + if sub_graphs is None: + continue + for sub_graph in sub_graphs: + npu_dir = self._get_sub_dir_by_sub_graph_name(sub_graph, npu_root_dir) + + if npu_dir is None: + self.log.warning("Can not find any sub graph dir named %s", npu_dir) + # for some infer case, sub_graph name may not match sub dir name. + npu_dir_0 = self._get_sub_dir_by_sub_graph_name(sub_graph + '_0', npu_root_dir) + if npu_dir_0 is None: + self.log.warning("Can not find any sub graph dir named %s", npu_dir_0) + continue + npu_dir = npu_dir_0 + self.vector_compare(npu_dir, tf_root_dir, result_dir, graph_file) + + @catch_tool_exception + def _get_sub_dir_by_sub_graph_name(self, sub_graph, npu_root_dir): + sub_graph_dirs = [] + for dir_path, dir_names, file_names in os.walk(npu_root_dir, followlinks=True): + if sub_graph in dir_names: + # walk sub graph dir + for sub_dir_path, sub_dir_names, sub_file_names in os.walk(os.path.join(dir_path, sub_graph), + followlinks=True): + if len(sub_dir_names) == 0: + sub_graph_dirs.append(sub_dir_path) + if len(sub_graph_dirs) == 0: + return None + self.log.warning("Find [%d] dirs in sub graph dir [%s], %s, compare first.", len(sub_graph_dirs), sub_graph, + sub_graph_dirs) + return sub_graph_dirs[0] + + @catch_tool_exception + def _get_sub_graphs(self, graph_file): + with open(graph_file, 'r') as f: + graph_json = json.load(f) + if 'graph' not in graph_json: + raise PrecisionToolException("No graph in file: %s" % graph_file) + sub_graphs = [] + for graph in graph_json['graph']: + sub_graphs.append(graph['name']) + return sub_graphs + + ''' + @staticmethod + def _get_ge_default_dirs(self, root_dir): + for dir_path, dir_names, file_names in os.walk(root_dir, followlinks=True): + for dir_name in dir_names: + ''' + + def npu_vector_compare(self, debug_0_root_dir, debug_1_root_dir): + """Compare two npu dump data + :param debug_0_root_dir: + :param debug_1_root_dir: + :return: + """ + # debug_0_sub_dirs = self._get_ge_default_dirs(debug_0_root_dir) + # debug_1_sub_dirs = self._get_ge_default_dirs(debug_1_root_dir) + + def vector_compare(self, lh_path, rh_path, result_dir, graph_json=None): + """Compare all ops""" + if lh_path is None or util.empty_dir(lh_path): + raise PrecisionToolException("No valid dump file in %s" % lh_path) + if rh_path is None or util.empty_dir(rh_path): + raise PrecisionToolException("No valid dump file in %s" % rh_path) + self.log.info("Start vector compare process...") + util.compare_vector(lh_path, rh_path, graph_json, result_dir) + self.log.info("Vector compare process finish.") + + def _get_compare_result_by_file_name(self, file_name): + results = [] + if file_name is None: + sub_dir = util.get_newest_dir(cfg.VECTOR_COMPARE_PATH) + if sub_dir == '': + raise PrecisionToolException("Empty vector compare path:%s" % cfg.VECTOR_COMPARE_PATH) + file_name = os.path.join(cfg.VECTOR_COMPARE_PATH, sub_dir) + if os.path.isfile(file_name): + results.append(CompareResult(file_name)) + if os.path.isdir(file_name): + vector_compare_result_files = util.list_vector_compare_result_files(file_name) + if vector_compare_result_files is None or len(vector_compare_result_files) == 0: + raise PrecisionToolException("Can not find any vector compare result in dir:%s" % file_name) + file_list = sorted(vector_compare_result_files.values(), key=lambda x: x.timestamp) + file_names = [x.file_name for x in file_list] + self.log.debug("Find %s result files in dir %s", file_names, file_name) + for file in file_list: + results.append(CompareResult(file.path)) + return results + + @catch_tool_exception + def vector_summary(self, file_name=None, cos_sim_threshold=0.98, limit=1): + """Print not NaN result in vector compare result""" + compare_results = self._get_compare_result_by_file_name(file_name) + error_ops = [] + for compare_result in compare_results: + err_ops = compare_result.get_op_by_cosine_sim_threshold(cos_sim_threshold, limit) + self.log.info("Find %d ops less then %s in %s", len(err_ops), cos_sim_threshold, compare_result.file_path) + error_ops.extend(err_ops) + if len(error_ops) == 0: + self.log.info("Can not find any compare result over threshold: %s" % cos_sim_threshold) + else: + for i, error_op in enumerate(error_ops): + if i < limit: + error_op.summary(cos_sim_threshold) + return error_ops + + def compare_data(self, left, right, save_txt=False, rl=0.001, al=0.001, diff_count=20): + """Compare data""" + left = self._detect_file(left) + right = self._detect_file(right) + if left is None or right is None: + raise PrecisionToolException("invalid input or output") + # save to txt + if save_txt: + util.save_npy_to_txt(left) + util.save_npy_to_txt(right) + # compare data + total_cnt, all_close, cos_sim, err_percent = self._do_compare_data(left, right, rl, al, diff_count) + content = ['Left:', ' ├─ NpyFile: %s' % left] + if save_txt: + content.append(' ├─ TxtFile: [green]%s.txt[/green]' % left) + content.append(' └─ NpySpec: [yellow]%s[/yellow]' % util.gen_npy_info_txt(left)) + content.append('Right:') + content.append(' ├─ NpyFile: %s' % right) + if save_txt: + content.append(' ├─ TxtFile: [green]%s.txt[/green]' % right) + content.append(' └─ NpySpec: [yellow]%s[/yellow]' % util.gen_npy_info_txt(right)) + content.append('NumCnt: %s' % total_cnt) + content.append('AllClose: %s' % all_close) + content.append('CosSim: %s' % cos_sim) + content.append('ErrorPer: %s (rl= %s, al= %s)' % (err_percent, rl, al)) + util.print_panel(Constant.NEW_LINE.join(content)) + + def _do_compare_data(self, left, right, rl=0.001, al=0.001, diff_count=20): + data_left = np.load(left).astype(np.float32) + data_right = np.load(right).astype(np.float32) + shape_left = data_left.shape + shape_right = data_right.shape + if shape_left != shape_right: + self.log.warning("Data shape not equal: %s vs %s", data_left.shape, data_right.shape) + data_left = data_left.reshape(-1) + data_right = data_right.reshape(-1) + if data_left.shape[0] != data_right.shape[0]: + self.log.warning("Data size not equal: %s vs %s", data_left.shape, data_right.shape) + if data_left.shape[0] < data_right.shape[0]: + data_left = np.pad(data_left, (0, data_right.shape[0] - data_left.shape[0]), 'constant') + else: + data_right = np.pad(data_right,(0, data_left.shape[0] - data_right.shape[0]), 'constant') + all_close = np.allclose(data_left, data_right, atol=al, rtol=rl) + # cos_sim = 1 - spatial.distance.cosine(data_left, data_right) + cos_sim = np.dot(data_left, data_right) / ( + np.sqrt(np.dot(data_left, data_left)) * np.sqrt(np.dot(data_right, data_right))) + err_cnt = 0 + total_cnt = data_left.shape[0] + diff_table_columns = ['Index', 'Left', 'Right', 'Diff'] + err_table = util.create_table("Error Item Table", diff_table_columns) + top_table = util.create_table("Top Item Table", diff_table_columns) + for i in range(total_cnt): + abs_diff = abs(data_left[i] - data_right[i]) + if i < diff_count: + top_table.add_row(str(i), str(data_left[i]), str(data_right[i]), str(abs_diff)) + if abs_diff > (al + rl * abs(data_right[i])): + if err_cnt < diff_count: + err_table.add_row(str(i), str(data_left[i]), str(data_right[i]), str(abs_diff)) + err_cnt += 1 + err_percent = float(err_cnt / total_cnt) + util.print(util.create_columns([err_table, top_table])) + return total_cnt, all_close, cos_sim, err_percent + + def _detect_file(self, file_name): + """Find files in npu/overflow/cpu dump dir""" + if os.path.isfile(file_name): + return file_name + for parent_dir in [cfg.TMP_DIR, cfg.TF_DUMP_DIR]: + file_infos = util.list_numpy_files(parent_dir, file_name) + if len(file_infos) > 0: + self.log.info("Find %s, choose first one.", list(file_infos.keys())) + return list(file_infos.values())[0].path + return None diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare_result.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare_result.py new file mode 100644 index 000000000..5e67dd24d --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/compare/compare_result.py @@ -0,0 +1,143 @@ +import collections +import os +import numpy as np +from ..util.util import util +from ..util.constant import Constant +from ..util.precision_tool_exception import PrecisionToolException +from ..util.precision_tool_exception import catch_tool_exception + + +class RowMap(object): + """ + 'Index': 0, + 'LeftOp': 1, + 'RightOp': 2, + 'TensorIdx': 3, # TensorIndex + 'CosSim': 4, # CosineSimilarity + 'MaxAbs': 5, # MaxAbsoluteError + 'ARE': 6, # AccumulatedRelativeError + 'RED': 7, # RelativeEuclideanDistance + 'KLD': 8, # KullbackLeiblerDivergence + 'StandardDeviation': 9 # StandardDeviation + """ + def __init__(self, item=None): + self.index = 0 + self.left = 1 + self.right = 2 + self.tensor_index = 3 + self.cosine_similarity = 4 + self.max_abs = 5 + if item is not None: + self.update(item) + + def update(self, item): + for i, value in enumerate(item): + self.left = i if value == 'LeftOp' else self.left + self.right = i if value == 'RightOp' else self.right + self.tensor_index = i if value == 'TensorIndex' else self.tensor_index + self.cosine_similarity = i if value == 'CosineSimilarity' else self.cosine_similarity + self.max_abs = i if value == 'MaxAbsoluteError' else self.max_abs + + +class CompareItem(object): + def __init__(self, op_name, item, row_map): + self.row_map = row_map + self.index = int(item[self.row_map.index]) + self.op_name = op_name + self.left = item[self.row_map.left].split(" ") + self.right = item[self.row_map.right].split(" ") + self.input = [] + self.output = [] + + def update(self, item): + tensor_index = item[self.row_map.tensor_index] + if tensor_index not in ['NaN', '*']: + item_detail = tensor_index.split(':') + if len(item_detail) != 3: + raise PrecisionToolException("item:%d tensor index invalid. [%s]" % ( + item[self.row_map.index], tensor_index)) + if item_detail[1] == 'input': + self.input.insert(int(item_detail[2]), item) + else: + self.output.insert(int(item_detail[2]), item) + + def is_cosine_sim_over_threshold(self, threshold): + for item in self.output: + if item[self.row_map.cosine_similarity] == 'NaN': + continue + if float(item[self.row_map.cosine_similarity]) <= threshold: + return True + return False + + @staticmethod + def _color_data(data, threshold): + try: + data = float(data) + if np.isnan(data): + raise ValueError + elif data <= threshold: + return "[red]%s[/red]" % data + else: + return "[green]%s[/green]" % data + except ValueError: + return "[yellow]%s[/yellow]" % data + + def summary(self, threshold): + content = ["Left: %s" % self.left, "Right: %s" % self.right, "Input: "] + input_txt = [] + for i, item in enumerate(self.input): + input_txt.append(" - [%d]%s" % (i, self._color_data(item[self.row_map.cosine_similarity], threshold))) + content.extend([Constant.TAB_LINE.join(input_txt), "Output:"]) + output_txt = [] + for i, item in enumerate(self.output): + output_txt.append(" - [%d]%s" % (i, self._color_data(item[self.row_map.cosine_similarity], threshold))) + content.append(Constant.TAB_LINE.join(output_txt)) + title = "[%d] %s" % (self.index, self.op_name) + util.print_panel(Constant.NEW_LINE.join(content), title=title) + + +class CompareResult(object): + def __init__(self, file_path): + self.file_path = file_path + self.ops = None + self.prepare() + + @catch_tool_exception + def prepare(self): + if not str(self.file_path).endswith(Constant.Suffix.CSV): + raise PrecisionToolException("Compare result file %s not a csv file." % self.file_path) + if not os.path.isfile(self.file_path): + raise PrecisionToolException("Compare result file %s not exist." % self.file_path) + items = util.read_csv(self.file_path) + self.ops = collections.OrderedDict() + row_map = RowMap() + for item in items: + if item[row_map.index] == 'Index': + row_map.update(item) + continue + if item[row_map.tensor_index] in ['NaN', '*']: + continue + tensor_index = item[row_map.tensor_index] + op_name = tensor_index.split(":")[0] + if op_name not in self.ops: + self.ops[op_name] = CompareItem(op_name, item, row_map) + op = self.ops[op_name] + op.update(item) + + def get_compare_item_by_op(self, op_name): + if self.ops is None: + self.prepare() + if self.ops is None: + raise PrecisionToolException("Invalid compare result file: %s" % self.file_path) + if op_name in self.ops: + return self.ops[op_name] + return None + + def get_op_by_cosine_sim_threshold(self, threshold, limit=-1): + result = [] + for compare_item in self.ops.values(): + if compare_item.is_cosine_sim_over_threshold(threshold): + result.append(compare_item) + if len(result) == limit: + break + return result -- Gitee From aac2114b7a85409e20b497843443e1beb3c1f7a8 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:43:28 +0000 Subject: [PATCH 06/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/config/config.py | 114 ++++++++++++++++++ .../precision_tool/lib/config/fusion_off.cfg | 10 ++ .../lib/config/fusion_switch.cfg | 6 + 3 files changed, 130 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/config.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_off.cfg create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_switch.cfg diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/config.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/config.py new file mode 100644 index 000000000..f9568666c --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/config.py @@ -0,0 +1,114 @@ +# coding=utf-8 +import os + +# Dump config '0|5|10' +TF_DUMP_STEP = '0' + +# path to run package operator cmp compare +# default may be /usr/local/Ascend/ +CMD_ROOT_PATH = '/usr/local/Ascend/' +ASCEND_SET_ENV = os.path.join(CMD_ROOT_PATH, 'bin/setenv.bash') + + +# ASCEND Log Path +ASCEND_LOG_PATH = '/root/ascend/log/plog/' + +# TOOL CONFIG +LOG_LEVEL = "NOTSET" +ROOT_DIR = '' + +# [train/infer] if adapt from msquickcmp result, set net type to infer +NET_TYPE = 'infer' + +''' +precision_data/ +├── npu +│ ├── debug_0 +| | ├── dump +| | | └── 20210510101133 +| │ └── graph +| | └── ge_proto_00000179_PreRunAfterBuild.txt +│ └── debug_1 +├── tf +| ├── tf_debug +| └── dump +├── overflow +├── fusion +└── temp + ├── op_graph + ├── decode + | ├── dump_decode + | ├── overflow_decode + | └── dump_convert + └── vector_compare + ├── 20210510101133 + | ├── result_123456.csv + | └── result_123455.csv + └── 20210510101134 + └── result_123458.csv +''' + +# Static dirs, do not change +DATA_ROOT_DIR = os.path.join(ROOT_DIR, 'precision_data') + +# fusion +FUSION_DIR = os.path.join(DATA_ROOT_DIR, 'fusion') + +# npu dump/graph parent dir +NPU_DIR = os.path.join(DATA_ROOT_DIR, 'npu') +DEFAULT_NPU_DIR = os.path.join(NPU_DIR, 'debug_0') +DEFAULT_NPU_DUMP_DIR = os.path.join(DEFAULT_NPU_DIR, 'dump') +DEFAULT_NPU_GRAPH_DIR = os.path.join(DEFAULT_NPU_DIR, 'graph') +PROFILING_DIR = os.path.join(DEFAULT_NPU_DIR, 'prof') +DEFAULT_OP_DEBUG_DIR = DEFAULT_NPU_DIR + +# npu overflow dir +OVERFLOW_DIR = os.path.join(DATA_ROOT_DIR, 'overflow') +NPU_OVERFLOW_DUMP_DIR = os.path.join(OVERFLOW_DIR, 'dump') + +# tf dirs +TF_DIR = os.path.join(DATA_ROOT_DIR, 'tf') +TF_DEBUG_DUMP_DIR = os.path.join(TF_DIR, 'tf_debug') +TF_DUMP_DIR = os.path.join(TF_DIR, 'dump') +TF_GRAPH_DIR = os.path.join(TF_DIR, 'graph') +# tf checkpoints +TF_CKPT_ROOT = os.path.join(TF_DIR, 'checkpoints') +TF_CKPT_FILE = os.path.join(TF_CKPT_ROOT, 'ckpt') +TF_CKPT_INPUT_DIR = os.path.join(TF_CKPT_ROOT, 'input') + +# pytroch dirs +PT_DIR = os.path.join(DATA_ROOT_DIR, 'pt') +PT_NPU_DIR = os.path.join(PT_DIR, 'npu') +PT_GPU_DIR = os.path.join(PT_DIR, 'gpu') + +# tmp dirs +TMP_DIR = os.path.join(DATA_ROOT_DIR, 'temp') +OP_GRAPH_DIR = os.path.join(TMP_DIR, 'op_graph') + +DECODE_DIR = os.path.join(TMP_DIR, 'decode') +OVERFLOW_DECODE_DIR = os.path.join(DECODE_DIR, 'overflow_decode') +DUMP_DECODE_DIR = os.path.join(DECODE_DIR, 'dump_decode') +PT_DUMP_DECODE_DIR = os.path.join(DECODE_DIR, 'pt') +DUMP_CONVERT_DIR = os.path.join(DECODE_DIR, 'dump_convert') + +VECTOR_COMPARE_PATH = os.path.join(TMP_DIR, 'vector_compare') +TF_TENSOR_NAMES = os.path.join(TMP_DIR, 'tf_tensor_names.txt') +TF_TENSOR_DUMP_CMD = os.path.join(TMP_DIR, 'tf_tensor_cmd.txt') + +# FLAG +PRECISION_TOOL_OVERFLOW_FLAG = 'PRECISION_TOOL_OVERFLOW' +PRECISION_TOOL_DUMP_FLAG = 'PRECISION_TOOL_DUMP' + +# for previous version, set 0 +OP_DEBUG_LEVEL = 4 +# DUMP CONFIG +DUMP_GE_GRAPH_VALUE = 2 +DUMP_GRAPH_LEVEL_VALUE = 3 +DUMP_SEED = 2022 + +# TF_DEBUG +TF_DEBUG_TIMEOUT = 360 + +# MSACCUCMP +MS_ACCU_CMP = r'msaccucmp.py[c]?' +BUILD_JSON_GRAPH_NAME = 'Build' diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_off.cfg b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_off.cfg new file mode 100644 index 000000000..823672b74 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_off.cfg @@ -0,0 +1,10 @@ +{ + "Switch": { + "GraphFusion": { + "ALL": "off" + }, + "UBFusion": { + "ALL": "off" + } + } +} \ No newline at end of file diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_switch.cfg b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_switch.cfg new file mode 100644 index 000000000..572ad4271 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/config/fusion_switch.cfg @@ -0,0 +1,6 @@ +{ +"Switch": { + "GraphFusion": {}, + "UBFusion": {} +} +} \ No newline at end of file -- Gitee From 6e4ed2805780821e34bb528d3f300faf8904d4f8 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:43:47 +0000 Subject: [PATCH 07/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/dump/dump_manager.py | 89 ++++++++ .../precision_tool/lib/dump/npu_dump.py | 200 ++++++++++++++++++ .../precision_tool/lib/dump/pt_dump.py | 65 ++++++ .../precision_tool/lib/dump/tf_dump.py | 141 ++++++++++++ 4 files changed, 495 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/dump_manager.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/npu_dump.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/pt_dump.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/tf_dump.py diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/dump_manager.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/dump_manager.py new file mode 100644 index 000000000..939f4fa47 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/dump_manager.py @@ -0,0 +1,89 @@ +# coding=utf-8 +import os +import collections +from ..util.util import util +from ..util.constant import Constant +from .npu_dump import NpuDump +from .tf_dump import TfDump +from .pt_dump import PtDump +from ..config import config as cfg + + +class DumpManager(object): + def __init__(self): + self.npu_dumps = collections.OrderedDict() + self.pt_dump = PtDump(cfg.PT_DIR) + self.tf_dump = TfDump(cfg.TF_DUMP_DIR) + self._init_dirs() + + def prepare(self): + # 1. prepare npu dump + sub_dirs = os.listdir(cfg.NPU_DIR) + if len(sub_dirs) == 0: + # create default + sub_dirs = [Constant.DEFAULT_DEBUG_ID] + sorted(sub_dirs) + for sub_dir in sub_dirs: + npu_dump = NpuDump(sub_dir) + npu_dump.prepare() + self.npu_dumps[sub_dir] = npu_dump + # 2. prepare tf dump + self.tf_dump.prepare() + # 3. prepare pt dump + self.pt_dump.prepare() + + def get_dump_root_dir(self, debug_id): + if debug_id in self.npu_dumps: + return self.npu_dumps[debug_id].dump_root + return None + + def op_dump_summary(self, ops): + npu_result = collections.OrderedDict() + for debug_id, op in ops.items(): + if debug_id in self.npu_dumps: + npu_result[debug_id] = collections.OrderedDict() + for op_detail in op: + npu_result[debug_id][op_detail.graph_name] = self.npu_dumps[debug_id].op_dump_summary(op_detail) + tf_result = None + if self.tf_dump is not None and len(ops[Constant.DEFAULT_DEBUG_ID]) != 0: + tf_result = self.tf_dump.op_dump_summary(ops[Constant.DEFAULT_DEBUG_ID][0]) + return npu_result, tf_result + + def pt_dump_summary(self, ir_name): + """Pytorch dump summary""" + return self.pt_dump.op_dump_summary(ir_name) + + def convert_npu_dump(self, name, data_format=None, dst_path=None): + for _, npu_dump in enumerate(self.npu_dumps.values()): + npu_dump.convert_npu_dump(name, data_format, dst_path) + + def print_tensor(self, file_name, is_convert): + """Print numpy data file""" + if os.path.isfile(file_name): + return util.print_npy_summary(os.path.dirname(file_name), os.path.basename(file_name), is_convert) + # file_name = file_name.replace('/', '_') + # npu decode file + npu_convert_files = self.npu_dumps[Constant.DEFAULT_DEBUG_ID].get_npu_dump_decode_files_by_name(file_name) + self._print_tensors(npu_convert_files, is_convert) + # util.list_npu_dump_convert_files(cfg.DECODE_DIR, file_name) + # tf decode file + tf_decode_files = self.tf_dump.get_dump_files_by_name(file_name, True) + self._print_tensors(tf_decode_files, is_convert) + # pt decode file + pt_decode_files = self.pt_dump.get_dump_files_by_name(file_name) + self._print_tensors(pt_decode_files, is_convert) + + @staticmethod + def _print_tensors(file_infos, is_convert): + if file_infos is not None: + for file_info in file_infos.values(): + util.print_npy_summary(file_info.dir_path, file_info.file_name, is_convert) + + @staticmethod + def _init_dirs(): + """Create dump file dirs""" + util.create_dir(cfg.DUMP_DECODE_DIR) + util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR) + util.create_dir(cfg.OVERFLOW_DECODE_DIR) + util.create_dir(cfg.TF_DUMP_DIR) + util.create_dir(cfg.PT_DIR) diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/npu_dump.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/npu_dump.py new file mode 100644 index 000000000..7e3e4a9d8 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/npu_dump.py @@ -0,0 +1,200 @@ +# coding=utf-8 +import os +import re +from ..util.util import util +from ..util.constant import Constant +from ..util.precision_tool_exception import catch_tool_exception +from ..util.precision_tool_exception import PrecisionToolException +from ..config import config as cfg + + +class NpuDumpDecodeFile(object): + def __init__(self): + self.log = util.get_log() + self.input_files = {} + self.output_files = {} + self.timestamp = -1 + self.op_name = '' + self.op_type = '' + self.task_id = -1 + # self.stream_id = -1 + + def update(self, file_info): + """Prepare op npu decode file map.""" + if not self._check(file_info): + self.log.warning('Invalid NpuDumpDecodeFile: %s', file_info) + return + if file_info.type == 'input': + self.input_files[file_info.idx] = file_info + else: + self.output_files[file_info.idx] = file_info + + def summary(self): + txt = ['[yellow][%s][TaskID: %d][/yellow][green][%s][/green] %s' % ( + self.timestamp, self.task_id, self.op_type, self.op_name)] + if len(self.input_files) > 0: + info = self.input_files[0] + shape, dtype, max_data, min_data, mean = util.npy_info(info.path) + txt.append(' - Input: [green][0][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % ( + shape, dtype, max_data, min_data, mean, info.file_name)) + for idx in range(1, len(self.input_files)): + info = self.input_files[idx] + shape, dtype, max_data, min_data, mean = util.npy_info(info.path) + txt.append(' [green][%d][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % ( + idx, shape, dtype, max_data, min_data, mean, info.file_name)) + if len(self.output_files) > 0: + info = self.output_files[0] + shape, dtype, max_data, min_data, mean = util.npy_info(info.path) + txt.append(' - Output: [green][0][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % ( + shape, dtype, max_data, min_data, mean, info.file_name)) + for idx in range(1, len(self.output_files)): + info = self.output_files[idx] + shape, dtype, max_data, min_data, mean = util.npy_info(info.path) + txt.append(' [green][%d][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % ( + idx, shape, dtype, max_data, min_data, mean, info.file_name)) + return Constant.NEW_LINE.join(txt) + + def _check(self, file_info): + if self.timestamp == -1: + self.timestamp = file_info.timestamp + self.op_name = file_info.op_name + self.op_type = file_info.op_type + self.task_id = file_info.task_id + # self.stream_id = file_info['stream'] + return True + return self.timestamp == file_info['timestamp'] + + +class NpuDump(object): + def __init__(self, debug_id=Constant.DEFAULT_DEBUG_ID): + """Init""" + self.log = util.get_log() + self.debug_id = debug_id + npu_root = os.path.join(cfg.NPU_DIR, debug_id) + self.dump_root = os.path.join(npu_root, Constant.DUMP) + self.decode_dir = os.path.join(cfg.DUMP_DECODE_DIR, debug_id) + self.dump_files = None + self._init_dirs() + + def prepare(self): + """Prepare npu/cpu dump files""" + self._parse_dump_files() + + def get_dump_files_by_op(self, op): + """Get npu dump files by Op""" + npu_files = {} + op_name = op.name().replace('/', '_').replace('.', '_') + match_name = op.type() + '.' + op_name + '\\.' + for f in self.dump_files: + # match op name and graph name, infer dump directory may not has graph + if re.match(match_name, f) and (op.graph_name in self.dump_files[f].path or cfg.NET_TYPE == 'infer'): + npu_files[f] = self.dump_files[f] + return npu_files + + @catch_tool_exception + def op_dump_summary(self, op): + """ print op dump info""" + if op is None: + raise PrecisionToolException("Get None operator") + # search npu dump file by op name + npu_dump_files = self.get_npu_dump_decode_files_by_op(op) + npu_dump_files = sorted(npu_dump_files.values(), key=lambda x: (x.idx, x.timestamp)) + input_txt = ['NpuDumpInput:'] + output_txt = ['NpuDumpOutput:'] + for npu_dump_file in npu_dump_files: + if str(npu_dump_file.file_name).endswith(Constant.Suffix.CSV): + detail = util.read_csv(npu_dump_file.path) + input_txt.append(' -[%d]%s' % (npu_dump_file.idx, npu_dump_file.file_name)) + output_txt.append(' -[%d]%s' % (npu_dump_file.idx, npu_dump_file.file_name)) + for item in detail: + item_txt = '[Shape: %s] [Dtype: %s] [Max: %s] [Min: %s] [Mean: %s]' % (item[5], item[3], item[6], item[7], item[8]) + if item[0] == 'Input': + input_txt.append(' └─ [green][%s][/green][yellow]%s[/yellow]' % (item[1], item_txt)) + elif item[0] == 'Output': + output_txt.append(' └─ [green][%s][/green][yellow]%s[/yellow]' % (item[1], item_txt)) + continue + if npu_dump_file.type == 'input': + input_txt.append(' -[green][%s][/green] %s' % (npu_dump_file.idx, npu_dump_file.file_name)) + input_txt.append(' └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(npu_dump_file.path)) + else: + output_txt.append(' -[green][%s][/green] %s' % (npu_dump_file.idx, npu_dump_file.file_name)) + output_txt.append(' └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(npu_dump_file.path)) + input_txt.extend(output_txt) + return Constant.NEW_LINE.join(input_txt) + + def _init_dirs(self): + util.create_dir(self.dump_root) + util.create_dir(self.decode_dir) + + @catch_tool_exception + def _parse_dump_files(self): + """prepare npu dump, support soft link""" + sub_dir = util.get_newest_dir(self.dump_root) + sub_dir = os.path.join(self.dump_root, sub_dir) if sub_dir != '' else self.dump_root + self.dump_files = util.list_npu_dump_files(sub_dir) + + def list_dump(self, dir_path, file_name): + """list dump""" + + @staticmethod + def get_npu_dump_decode_files_by_name(file_name): + file_name = file_name.replace('/', '_') + return util.list_npu_dump_convert_files(cfg.DECODE_DIR, file_name) + + def get_npu_dump_decode_files_by_op(self, op): + """Get npu dump decode files by op""" + dump_files = self.get_dump_files_by_op(op) + result = {} + for dump_file_key in dump_files.keys(): + dump_file = dump_files[dump_file_key] + if str(dump_file.file_name).endswith(Constant.Suffix.CSV): + result.update({dump_file_key: dump_file}) + continue + dump_decode_files = util.list_npu_dump_decode_files(self.decode_dir, dump_file.file_name) + if len(dump_decode_files) == 0: + util.convert_dump_to_npy(dump_file.path, self.decode_dir) + dump_decode_files = util.list_npu_dump_decode_files(self.decode_dir, dump_file.file_name) + result.update(dump_decode_files) + return result + + def convert_npu_dump(self, name, data_format=None, dst_path=None): + """Convert npu dump to npy of data_format""" + if os.path.isfile(name): + # absolute path to file + self.log.info("Decode file: %s", name) + file_name = os.path.basename(name) + file_path = name + elif os.path.isdir(name): + # decode all files in path + self.log.info("Decode all files in path: %s", name) + file_name = '' + file_path = name + elif self.dump_files is not None and name in self.dump_files: + self.log.info("Decode npu dump file: %s in default dump path", name) + file_info = self.dump_files[name] + file_name = file_info.file_name + file_path = file_info.path + else: + # maybe op name + file_info = self._get_file_by_op_name(name) + if file_info is None: + raise PrecisionToolException("Can not find any op/dump file named %s" % name) + file_name = file_info.file_name + file_path = file_info.path + dst_path = cfg.DUMP_CONVERT_DIR if dst_path is None else dst_path + util.convert_dump_to_npy(file_path, dst_path, data_format) + dump_convert_files = util.list_npu_dump_convert_files(dst_path, file_name) + # print result info + + summary_txt = ['SrcFile: %s' % name] + for convert_file in dump_convert_files.values(): + summary_txt.append(' - %s' % convert_file.file_name) + util.print_panel(Constant.NEW_LINE.join(summary_txt)) + + def _get_file_by_op_name(self, op_name): + """Get dump file info by op name""" + op_name = op_name.replace('/', '_') + for file_info in self.dump_files.values(): + if file_info.op_name == op_name: + return file_info + return None diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/pt_dump.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/pt_dump.py new file mode 100644 index 000000000..27fd006dc --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/pt_dump.py @@ -0,0 +1,65 @@ +# coding=utf-8 +from ..util.util import util +from ..util.h5_util import H5Util +from ..util.h5_util import gen_h5_data_name +from ..config import config as cfg +from ..util.constant import Constant + + +class PtDump(object): + def __init__(self, data_dir): + self.log = util.get_log() + self.npu = None + self.gpu = None + self.data_dir = data_dir + + def prepare(self): + util.create_dir(cfg.PT_NPU_DIR) + util.create_dir(cfg.PT_GPU_DIR) + util.create_dir(cfg.PT_DUMP_DECODE_DIR) + if not util.empty_dir(cfg.PT_NPU_DIR): + npu_h5_files = util.list_h5_files(cfg.PT_NPU_DIR) + if len(npu_h5_files) != 0: + file_list = sorted(npu_h5_files.values(), key=lambda x: x.timestamp) + self.npu = H5Util(file_list[0].path, prefix='npu') + if not util.empty_dir(cfg.PT_GPU_DIR): + gpu_h5_files = util.list_h5_files(cfg.PT_GPU_DIR) + if len(gpu_h5_files) != 0: + file_list = sorted(gpu_h5_files.values(), key=lambda x: x.timestamp) + self.gpu = H5Util(file_list[0].path, prefix='gpu') + + @staticmethod + def get_dump_files_by_name(file_name): + """Get dump files by name""" + npu_pattern = gen_h5_data_name(file_name, 'npu') if '/' in file_name else file_name + gpu_pattern = gen_h5_data_name(file_name, 'gpu') if '/' in file_name else file_name + files = util.list_numpy_files(cfg.PT_DUMP_DECODE_DIR, extern_pattern=npu_pattern) + files.update(util.list_numpy_files(cfg.PT_DUMP_DECODE_DIR, extern_pattern=gpu_pattern)) + return files + + def op_dump_summary(self, ir_name): + summary_list = [] + op_id = self._parse_op_id(ir_name) + if self.npu is not None: + h5_op = self.npu.get_op(op_id) + if h5_op is not None: + summary_list.append('NPU:') + summary_list.append(h5_op.summary()) + if self.gpu is not None: + h5_op = self.gpu.get_op(op_id) + if h5_op is not None: + summary_list.append('GPU:') + summary_list.append(h5_op.summary()) + return Constant.NEW_LINE.join(summary_list) + + @staticmethod + def _parse_op_id(ir_name): + op_id = str(ir_name) + if op_id.isnumeric(): + op_id = ir_name + else: + for name in op_id.split('/'): + if name.isnumeric(): + op_id = name + break + return op_id diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/tf_dump.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/tf_dump.py new file mode 100644 index 000000000..bb4230f54 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/dump/tf_dump.py @@ -0,0 +1,141 @@ +# coding=utf-8 +import os +import re +import time +import sys +from ..util.util import util +from ..util.constant import Constant +from ..util.precision_tool_exception import catch_tool_exception +from ..util.precision_tool_exception import PrecisionToolException +from ..config import config as cfg + + +class TfDump(object): + def __init__(self, dump_root=cfg.TF_DUMP_DIR): + self.log = util.get_log() + self.dump_root = dump_root + self.dump_files = None + + def prepare(self): + if not os.path.exists(self.dump_root): + util.create_dir(self.dump_root) + self._parse_dump_files() + + def get_dump_files_by_op(self, op): + """Get cpu dump files by op""" + tf_files = {} + for output in op.outputs(): + if output.data_dump_origin_name() != '': + tf_files.update(self.get_dump_files_by_name(output.data_dump_origin_name())) + if len(tf_files) == 0: + tf_files.update(self.get_dump_files_by_name(op.name())) + return tf_files + + def get_dump_files_by_name(self, name, likely=False): + match_name = name.replace('/', '_') + if not likely: + match_name = match_name.replace('.', '_') + '\\.' + tf_files = {} + for f in self.dump_files: + if re.match(match_name, f): + tf_files[f] = self.dump_files[f] + return tf_files + + @catch_tool_exception + def op_dump_summary(self, op): + # cpu dump info + if op is None: + return '' + cpu_dump_txt = ['TfDumpOutput:'] + cpu_dump_files = self.get_dump_files_by_op(op) + cpu_dump_file_list = sorted(cpu_dump_files.values(), key=lambda x: x.timestamp) + for cpu_dump_file in cpu_dump_file_list: + cpu_dump_txt.append(' -[green][%s][/green] %s' % (cpu_dump_file.idx, cpu_dump_file.file_name)) + cpu_dump_txt.append(' └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(cpu_dump_file.path)) + return Constant.NEW_LINE.join(cpu_dump_txt) + + def _parse_dump_files(self): + self.dump_files = util.list_cpu_dump_decode_files(self.dump_root) + + def run_tf_dbg_dump(self, cmd_line=None): + """Run tf train script to get dump data.""" + if os.path.exists(cfg.TF_DEBUG_DUMP_DIR) and len(os.listdir(cfg.TF_DEBUG_DUMP_DIR)) != 0: + self.log.info("TF offline debug path [%s] is not empty, will analyze it directly." % cfg.TF_DEBUG_DUMP_DIR) + elif cmd_line is not None: + self.log.info("Run command: %s" % cmd_line) + util.execute_command(cmd_line) + self.log.info("Run finish, start analyze TF dump.") + if not os.path.exists(cfg.TF_DEBUG_DUMP_DIR) or len(os.listdir(cfg.TF_DEBUG_DUMP_DIR)) == 0: + raise PrecisionToolException("Empty tf debug dir. %s" % cfg.TF_DEBUG_DUMP_DIR) + run_dirs = os.listdir(cfg.TF_DEBUG_DUMP_DIR) + run_dirs.sort() + # create dirs + util.create_dir(cfg.TF_DUMP_DIR) + util.create_dir(cfg.TMP_DIR) + # extra the last run dir + for run_dir in run_dirs: + time.sleep(1) + command = "%s -m tensorflow.python.debug.cli.offline_analyzer --ui_type readline --dump_dir %s" % ( + util.python, os.path.join(cfg.TF_DEBUG_DUMP_DIR, run_dir)) + self._do_run_tf_dbg_dump(command, 0) + + @staticmethod + def _make_pt_commands(tensor_name_path): + pt_command_list = [] + tensor_count = {} + with open(tensor_name_path) as tensor_name_file: + # skip 3 line + next(tensor_name_file) + next(tensor_name_file) + next(tensor_name_file) + # start to convert tensor to pt command + for line in tensor_name_file: + new_line = line.strip() + tensor_name = new_line[new_line.rfind(' ') + 1:] + if tensor_name not in tensor_count: + tensor_count[tensor_name] = 0 + else: + tensor_count[tensor_name] += 1 + + npy_file_name = "%s.%s.npy" % (tensor_name.replace("/", "_").replace(":", "."), + str(round(time.time() * 1000000))) + pt_command_list.append("pt %s -n %d -w %s" % + (tensor_name, tensor_count[tensor_name], + os.path.join(cfg.TF_DUMP_DIR, npy_file_name))) + return pt_command_list + + def _do_run_tf_dbg_dump(self, cmd_line, run_times=2): + """Run tf debug with pexpect, should set tf debug ui_type='readline'""" + try: + import pexpect + import readline + except ImportError as import_err: + self.log.error("Import failed with err:%s. You can run " + "'pip3 install pexpect gnureadline pyreadline' to fix it.", + import_err) + raise PrecisionToolException("Import module error.") + self.log.info("======< Auto run tf train process to dump data >======") + self.log.info("Send run times: %d", run_times) + tf_dbg = pexpect.spawn(cmd_line) + # tf_dbg.logfile = open(cfg.DUMP_FILES_CPU_LOG, 'wb') + tf_dbg.logfile = sys.stdout.buffer + for i in range(run_times): + tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT) + self.log.info("Process %d tf_debug.run", i + 1) + tf_dbg.sendline('run') + self.log.info("Generate tensor name file.") + tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT) + tf_dbg.sendline('lt > %s' % cfg.TF_TENSOR_NAMES) + tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT) + if not os.path.exists(cfg.TF_TENSOR_NAMES): + self.log.error("Failed to get tensor name in tf_debug.") + raise PrecisionToolException("Get tensor name in tf_debug failed.") + self.log.info("Save tensor name success. Generate tf dump commands from file: %s", cfg.TF_TENSOR_NAMES) + pt_commands = self._make_pt_commands(cfg.TF_TENSOR_NAMES) + self.log.info("Pt %d tensors." % len(pt_commands)) + for cmd in pt_commands: + self.log.debug(cmd.strip()) + tf_dbg.sendline(cmd.strip()) + tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT) + tf_dbg.sendline('exit') + self.log.info('Finish dump tf data') -- Gitee From 33c590f7622b8efe669dfcb3dc774253772a5a3d Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:44:01 +0000 Subject: [PATCH 08/38] 1 Signed-off-by: huangju1993 --- .../examples/precision_tool/lib/graph/desc.py | 154 +++++++++ .../precision_tool/lib/graph/graph_manager.py | 111 ++++++ .../precision_tool/lib/graph/npu_graph.py | 316 ++++++++++++++++++ .../examples/precision_tool/lib/graph/op.py | 255 ++++++++++++++ .../precision_tool/lib/graph/tf_graph.py | 45 +++ 5 files changed, 881 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/desc.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/graph_manager.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/npu_graph.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/op.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/tf_graph.py diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/desc.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/desc.py new file mode 100644 index 000000000..5ac35dbfd --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/desc.py @@ -0,0 +1,154 @@ +# coding=utf-8 +from ..util.util import util + +ATTR = 'attr' +ATTR_KEY = 'key' +ATTR_VALUE = 'value' +DATA_DUMP_ORIGIN_OUTPUT_INDEX = '_datadump_origin_output_index' +FUSION_ORIGIN_OUTPUT_INDEX = '_fusion_origin_output_index' +DATA_DUMP_ORIGIN_NAME = '_datadump_origin_name' +ORIGIN_FORMAT = 'origin_format' +ORIGIN_SHAPE = 'origin_shape' +VALUE_RANGE = 'value_range' +SHAPE_RANGE = 'shape_range' +DT_STRING = 's' +DT_INT = 'i' +DT_LIST_LIST_INT = 'list_list_int' +DT_LIST_LIST_I = 'list_list_i' +DT_LIST = 'list' +DT_LIST_INT = 'list_i' +DATA_TYPE_DEFAULT_VALUE = { + 'i': 0, + 's': '' +} + + +class Desc(object): + """ Op desc + shape: data shape + dtype: data type + format: data format + npu_file: npu file name/path + cpu_file: cpu file name/path + idx: input idx + """ + def __init__(self, desc_json, index): + self.desc_json = desc_json + self.index = index + self.log = util.get_log() + + def idx(self): + return self.index + + def shape(self): + return self.desc_json['shape']['dim'] if 'shape' in self.desc_json else [] + + def dtype(self): + return self.desc_json['dtype'] if 'dtype' in self.desc_json else '' + + def format(self): + return self.desc_json['layout'] if 'layout' in self.desc_json else [] + + def origin_shape(self): + return self._get_attr_list(ORIGIN_SHAPE, DT_INT) + + def origin_format(self): + return self._get_attr(ORIGIN_FORMAT, DT_STRING) + + def value_range(self): + return self._get_attr_list_list(VALUE_RANGE, DT_LIST_INT) + + def shape_range(self): + return self._get_attr_list_list(SHAPE_RANGE, DT_LIST_INT) + + def _get_attr_list_list(self, key, data_type): + val = self._get_attr_base(key, DT_LIST_LIST_INT) + if val is None or DT_LIST_LIST_I not in val: + return [] + res = [] + for item in val[DT_LIST_LIST_I]: + if data_type in item: + res.append(item[data_type]) + return res + + def _get_attr_list(self, key, data_type): + val = self._get_attr_base(key, DT_LIST) + return val[data_type] if val is not None and data_type in val else [] + + def _get_attr(self, key, data_type): + val = self._get_attr_base(key, data_type) + return val if val is not None else DATA_TYPE_DEFAULT_VALUE[data_type] + + def _get_attr_base(self, key, data_type): + if ATTR in self.desc_json: + for attr in self.desc_json[ATTR]: + if attr[ATTR_KEY] == key: + if attr[ATTR_VALUE] is not None and data_type in attr[ATTR_VALUE]: + return attr[ATTR_VALUE][data_type] + return None + + def compare(self, right_desc): + if self.dtype() == right_desc.dtype() and self.format() == right_desc.format(): + return "[green][%d] [%s][%s] %s[/green]" % (self.idx(), self.dtype(), self.format(), self.shape()), True + else: + return "[yellow][%d] [%s][%s] %s | [%s][%s] %s[/yellow]" % ( + self.idx(), self.dtype(), self.format(), self.shape(), + right_desc.dtype(), right_desc.format(), right_desc.shape()), False + + def data_dump_origin_name(self): + return '' + + +class InputDesc(Desc): + def __init__(self, name, desc_json, index): + super(InputDesc, self).__init__(desc_json, index) + if name == '': + self.log.warning('invalid input name.') + name_info = name.split(':') + self.op_name = name + self.peer_index = -2 + if len(name_info) == 2: + self.op_name = name_info[0] + self.peer_index = int(name_info[1]) + + def name(self): + return self.op_name + + def peer_idx(self): + return self.peer_index + + def is_control(self): + return self.peer_index == -1 + + def summary(self, origin_txt=False): + """idx | dtype | format | shape | [blue]value_range | shape_range| origin_shape[/blue] | op_name | peer_idx""" + if origin_txt: + return "[%d][%s][%s]%s %s:%d" % (self.idx(), self.dtype(), self.format(), + self.shape(), self.name(), self.peer_idx()) + return "[green][%d][/green][yellow][%s][%s]%s[/yellow][blue] %s %s %s[/blue] %s:%d" % ( + self.idx(), self.dtype(), self.format(), self.shape(), + self.value_range(), self.shape_range(), self.origin_shape(), self.name(), self.peer_idx()) + + +class OutputDesc(Desc): + def __init__(self, name, desc_json, index): + super(OutputDesc, self).__init__(desc_json, index) + if name == '': + self.log.warning('invalid output name.') + self.op_names = name.split(':') + + def names(self): + return self.op_names + + def summary(self, origin_txt=False): + if origin_txt: + return "[%d][%s][%s]%s %s" % (self.idx(), self.dtype(), self.format(), self.shape(), self.names()) + return "[green][%d][/green][yellow][%s][%s]%s[/yellow][blue] %s %s %s[/blue] %s" % ( + self.idx(), self.dtype(), self.format(), self.shape(), + self.value_range(), self.shape_range(), self.origin_shape(), self.names()) + + def data_dump_origin_name(self): + return self._get_attr(DATA_DUMP_ORIGIN_NAME, DT_STRING) + + def data_dump_origin_output_index(self): + return self._get_attr(DATA_DUMP_ORIGIN_OUTPUT_INDEX, DT_INT) diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/graph_manager.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/graph_manager.py new file mode 100644 index 000000000..cef861741 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/graph_manager.py @@ -0,0 +1,111 @@ +# coding=utf-8 +""" +Graph Manager +""" +import os +import collections +from ..util.constant import Constant +from .npu_graph import NpuGraph +from .tf_graph import TfGraph +from ..util.util import util +from ..util.precision_tool_exception import catch_tool_exception +from ..util.precision_tool_exception import PrecisionToolException +from ..config import config as cfg + + +class GraphManager(object): + def __init__(self): + self.log = util.get_log() + self.npu_graphs = collections.OrderedDict() + self.tf_graph = None + + def prepare(self): + # prepare npu graphs + if not os.path.exists(cfg.NPU_DIR): + util.create_dir(cfg.NPU_DIR) + sub_dirs = os.listdir(cfg.NPU_DIR) + if len(sub_dirs) == 0: + # create default dir + sub_dirs = [Constant.DEFAULT_DEBUG_ID] + for sub_dir in sub_dirs: + npu_graph = NpuGraph(sub_dir) + npu_graph.prepare() + self.npu_graphs[sub_dir] = npu_graph + # prepare cpu graph + self.tf_graph = TfGraph(cfg.TF_GRAPH_DIR) + + def check_cast(self): + for graph in self.npu_graphs.values(): + graph.check_cast() + + def check_dtype(self): + for graph in self.npu_graphs.values(): + graph.check_dtype() + + def check_similarity(self): + self._check_npu_graph_similarity() + + def _check_npu_graph_similarity(self): + """Check npu graph similarity""" + if len(self.npu_graphs) < 2: + self.log.debug("Only one npu debug, no need to check npu graph similarity.") + return + left_graphs = self.npu_graphs[Constant.DEFAULT_DEBUG_ID].sub_graphs + right_graphs = self.npu_graphs[Constant.NPU_DEBUG_ID_1].sub_graphs + for left_graph in left_graphs.values(): + for right_graph in right_graphs.values(): + if left_graph.graph_id != right_graph.graph_id: + continue + left_graph.compare(right_graph) + + def get_graphs(self, debug_id): + if debug_id not in self.npu_graphs: + raise PrecisionToolException("Get graphs failed with no debug_id:%s" % debug_id) + return self.npu_graphs[debug_id].build_json_files + + def get_ops(self, op_name, graph_name=None): + """ Get npu/tf ops by op_name + :param op_name: op name + :param graph_name: graph name + :return: npu op dict: debug_id->Op, tf op + """ + npu_ops = collections.OrderedDict() + for debug_id, npu_graph in self.npu_graphs.items(): + npu_ops[debug_id] = npu_graph.get_op(op_name, graph_name) + # tf graph op + return npu_ops, None + + def print_op_list(self, op_type='', op_name='', pass_name='', kernel_name=''): + if op_type == '' and op_name == '' and pass_name == '' and kernel_name == '': + table_list = [] + for debug_id, graph in self.npu_graphs.items(): + table = util.create_table(debug_id, ["OpType", "Count"]) + op_type_counter = collections.Counter() + for op in graph.ops_list: + op_type_counter[op.type()] += 1 + for op_types, count in op_type_counter.items(): + table.add_row(op_types, str(count)) + table_list.append(table) + util.render(util.create_columns(table_list)) + + else: + for debug_id, graph in self.npu_graphs.items(): + ops = graph.list_ops(op_type, op_name, pass_name, kernel_name) + ops_txt = ['[green][%s][/green][yellow][%s][/yellow] %s' % ( + op.type(), op.pass_name(), op.name()) for op in ops] + util.print_panel(Constant.NEW_LINE.join(ops_txt), debug_id) + + @staticmethod + def op_graph_summary(ops, attr_detail=False): + npu_summary = collections.OrderedDict() + for debug_id, op in ops.items(): + npu_summary[debug_id] = collections.OrderedDict() + for op_detail in op: + npu_summary[debug_id][op_detail.graph_name] = op_detail.summary(attr_detail=attr_detail) + return npu_summary, None + + def save_sub_graph(self, ops, deep): + for debug_id, op in ops.items(): + if debug_id in self.npu_graphs: + for op_detail in op: + self.npu_graphs[debug_id].save_sub_graph(op_detail, deep) diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/npu_graph.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/npu_graph.py new file mode 100644 index 000000000..5a93b9718 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/npu_graph.py @@ -0,0 +1,316 @@ +# coding=utf-8 +""" +Graph Manager +""" +import json +import os +import collections +import time +from .op import Op +from ..util.util import util +from ..util.constant import Constant +from ..util.precision_tool_exception import catch_tool_exception +from ..util.precision_tool_exception import PrecisionToolException +from ..config import config as cfg + +DANGEROUS_CAST = { + 'DT_FLOAT': ['DT_INT32'] +} + +NO_DIG_OPS = ['AtomicAddrClean', 'NetOutput'] +CKPT_META_SHUFFIX='.meta' + +OP_CAST = 'Cast' + + +class NpuSubGraph(object): + def __init__(self, graph_json, build_file, npu_graph): + self.log = util.get_log() + self.graph_name = graph_json['name'] + self.npu_graph = npu_graph + self.graph = graph_json + self.build_file = build_file + self.ops_list = collections.OrderedDict() + self.ops_type_list = {} + self._prepare() + self.graph_id = self._get_graph_id() + + def _prepare(self): + self.log.debug("Graph %s operator count: %d" % (self.graph_name, len(self.graph['op']))) + for op_json in self.graph['op']: + op_name = op_json['name'] + op_type = op_json['type'] + if op_name not in self.ops_list: + self.ops_list[op_name] = [] + op = Op(op_json, self.ops_list, self.graph['name'], self.npu_graph, self) + if op_type not in self.ops_type_list: + self.ops_type_list[op_type] = {} + self.ops_list[op_name] = op + self.ops_type_list[op_type][op_name] = op + + def _get_graph_id(self): + if 'attr' in self.graph: + for item in self.graph['attr']: + if item['key'] == '_session_graph_id': + return item['value']['s'] + self.log.warning("Unknown sub graph id.") + return "UNKNOWN" + + def compare(self, sub_graph): + """compare with another sub graph""" + if not isinstance(sub_graph, NpuSubGraph): + raise PrecisionToolException("Should compare with another subgraph.") + right_ops_list = sub_graph.ops_list + ignore_ops = ["TransData", "Cast", "Recv", "Send", "Variable", "NetOutput", "NoOp", "Assign", "Constant", + "StreamActive"] + similar_count = 0 + for op_name in self.ops_list: + if self.ops_list[op_name].type() in ignore_ops: + continue + if op_name not in right_ops_list: + self.log.warning("Can not Find [%s] %s in right subgraph.", self.ops_list[op_name].type(), op_name) + continue + result, similar = self.ops_list[op_name].compare(right_ops_list[op_name]) + if not similar: + util.print_panel(result, title=op_name) + else: + similar_count += 1 + for op_name in right_ops_list: + if right_ops_list[op_name].type() in ignore_ops: + continue + if op_name not in self.ops_list: + self.log.warning("Can not Find [%s] %s in left subgraph.", right_ops_list[op_name].type(), op_name) + self.log.info("Compare [%s] [%s], similarity is [%s / %s]", + self.graph_name, sub_graph.graph_name, similar_count, len(self.ops_list)) + + def get_op(self, name): + if name in self.ops_list: + return [self.ops_list[name]] + guess_op_list = [] + for op_detail in self.ops_list.values(): + if name in op_detail.name() or name == str(op_detail.name()).replace('/', '_'): + guess_op_list.append(op_detail) + return guess_op_list + + def get_parent_node_by_subgraph_name(self, graph_name): + ops = [] + for op_detail in self.ops_list.values(): + if graph_name in op_detail.subgraph_names(): + ops.append(op_detail) + return ops + + def get_op_by_type(self, op_type): + ops = [] + for op_detail in self.ops_list.values(): + if op_type == op_detail.type(): + ops.append(op_detail) + return ops + + def check_cast(self): + cast_list = {} + danger_cast_list = {} + if OP_CAST in self.ops_type_list: + cast_ops = self.ops_type_list[OP_CAST] + for op in cast_ops.values(): + input_type = '' + output_type = '' + for input_desc in op.inputs(): + input_type = input_desc.dtype() if input_desc.dtype() != '' else input_type + for output_desc in op.outputs(): + output_type = output_desc.dtype() if output_desc.dtype() != '' else output_type + cast_type = "%s -> %s" % (input_type, output_type) + if cast_type not in cast_list: + cast_list[cast_type] = [] + cast_list[cast_type].append(op.name()) + for cast_type in cast_list: + if self._is_dangerous_cast(cast_type): + summary_txt = "[green][Cast][/green][red][%s][/red] %s" % (cast_type, cast_list[cast_type]) + util.print(summary_txt) + + @staticmethod + def _is_dangerous_cast(cast_type): + """Check if cast """ + cast_info = cast_type.split(" -> ") + input_dtype = cast_info[0] + output_dtype = cast_info[1] + if input_dtype in DANGEROUS_CAST: + if output_dtype in DANGEROUS_CAST[input_dtype]: + return True + return False + + +class NpuGraph(object): + def __init__(self, debug_id=Constant.DEFAULT_DEBUG_ID): + self.log = util.get_log() + self.build_files = None + self.build_json_files = [] + self.debug_id = debug_id + self.npu_root = os.path.join(cfg.NPU_DIR, debug_id) + self.graph_root = os.path.join(self.npu_root, Constant.GRAPH) + self.sub_graphs = collections.OrderedDict() + self.ops_list = [] + util.create_dir(self.graph_root) + + @catch_tool_exception + def prepare(self): + """prepare""" + self._prepare_npu_graphs() + if self.build_files is not None: + for build_file in self.build_files: + self._parse_ops(build_file) + + def check_cast(self): + """Check cast op type""" + for sub_graph in self.sub_graphs.values(): + sub_graph.check_cast() + + def check_dtype(self): + """Check op input/output dtype""" + for op in self.ops_list: + input_dtype = '' + for input_desc in op.inputs(): + input_dtype += ' ' + input_desc.dtype() + output_dtype = '' + for output_desc in op.outputs(): + output_dtype += ' ' + output_desc.dtype() + util.print('[green][%s][/green] %s\n - Input: %s\n - Output: %s' % ( + op.type(), op.name(), input_dtype, output_dtype)) + + def check_similarity(self): + """Check graph similarity.""" + + @catch_tool_exception + def save_sub_graph(self, op, deep=0, dump_manager=None, compare_manager=None): + """Save sub graph""" + if op is None: + raise PrecisionToolException("Save sub graph failed as root operator is None.") + try: + from graphviz import Digraph + file_name_list = [self.debug_id, op.graph_name, op.type(), op.name().replace('/', '_').replace('.', '_'), + str(deep), 'gv'] + file_name = '.'.join(file_name_list) + path = os.path.join(cfg.OP_GRAPH_DIR, file_name) + dot = Digraph(file_name, filename=path, node_attr={'shape': 'Mrecord'}, format='svg') + dot_list = [] + edge_list = [] + self._gen_sub_graph(dot, op, deep, dot_list, edge_list, 'red', direction='all', + dump_manager=dump_manager, compare_manager=compare_manager) + dot.format = 'svg' + dot.save(path) + self.log.info("Sub graph saved to %s" % os.path.abspath(cfg.OP_GRAPH_DIR)) + try: + dot.view(path) + time.sleep(1) + except Exception as err: + raise PrecisionToolException( + "graphviz not install, use [yum/apt-get] install graphviz xdg-utils. %s" % err) + except ImportError as err: + raise PrecisionToolException("Save sub graph failed as import graphviz module failed. %s" % err) + + def _gen_sub_graph(self, dot, op, deep, dot_list, edge_list, color='black', direction='all', + dump_manager=None, compare_manager=None): + """Gen sub graph""" + if deep == 0 or op.type() in NO_DIG_OPS: + return + if op.name() not in dot_list: + dot.node(op.name(), self._gen_sub_graph_label(op), color=color, tooltip=op.summary(True)) + dot_list.append(op.name()) + # add input and output + for desc in op.inputs(): + sub_op = self.get_op(desc.name(), op.graph_name) + if len(sub_op) != 0: + sub_op = sub_op[0] + if direction in ['all', 'input']: + self._gen_sub_graph(dot, sub_op, deep - 1, dot_list, edge_list, direction='input') + if sub_op.name() in dot_list: + src_edge = '%s:o%d' % (sub_op.name(), desc.peer_idx()) + else: + dot.node(sub_op.name(), self._gen_sub_graph_label(sub_op), color=color, tooltip=op.summary(True)) + src_edge = '%s:o%d' % (sub_op.name(), desc.peer_idx()) + dst_edge = '%s:i%d' % (op.name(), desc.idx()) + if src_edge + dst_edge not in edge_list: + dot.edge(src_edge, dst_edge) + edge_list.append(src_edge + dst_edge) + # add output + for desc in op.outputs(): + for out_node_name in desc.names(): + sub_op = self.get_op(out_node_name, op.graph_name) + if len(sub_op) != 0 and direction in ['all', 'output']: + sub_op = sub_op[0] + self._gen_sub_graph(dot, sub_op, deep - 1, dot_list, edge_list, direction='output') + + def _gen_sub_graph_label(self, op): + input_labels = [] + for desc in op.inputs(): + input_labels.append(self._gen_sub_graph_desc(desc, 'i')) + output_labels = [] + for desc in op.outputs(): + output_labels.append(self._gen_sub_graph_desc(desc, 'o')) + str_cell = '|' + return '{{ %s } | [%s] %s | { %s }}' % (str_cell.join(input_labels), op.type(), op.name(), + str_cell.join(output_labels)) + + @staticmethod + def _gen_sub_graph_desc(desc, id_prefix): + desc_str = r'<%s%d> [%d]' % (id_prefix, desc.idx(), desc.idx()) + desc_str = r'%s [%s]' % (desc_str, desc.dtype()) if desc.dtype() != '' else desc_str + desc_str = r'%s\n%s' % (desc_str, desc.shape()) if len(desc.shape()) != 0 else desc_str + return desc_str + + def list_ops(self, op_type='', op_name='', pass_name='', kernel_name=''): + """list ops in graph""" + return filter(lambda op: op_type in op.type() and op_name in op.name() and ( + pass_name == '' or pass_name in op.pass_name()) and kernel_name in op.kernel_name(), self.ops_list) + + def get_op(self, name, graph_name=None): + """get op by name""" + # get op in specific sub graph + if graph_name is not None and graph_name in self.sub_graphs: + return self.sub_graphs[graph_name].get_op(name) + ops = [] + for sub_graph in self.sub_graphs.values(): + ops.extend(sub_graph.get_op(name)) + # check if there is an exact match operation + match_ops = list(filter(lambda x: x.name() == name, ops)) + if len(match_ops) != 0: + return match_ops + # return guess operations by name + self.log.info("Can not find Operator named %s. You may mean the operator bellow.", name) + guess_op_name_list = ['[green][%s][/green] %s' % (x.type(), x.name()) for x in ops] + util.print_panel(Constant.NEW_LINE.join(guess_op_name_list), title='Possible Operators') + return ops + + def get_parent_node_by_subgraph_name(self, graph_name): + ops = [] + for sub_graph in self.sub_graphs.values(): + ops.extend(sub_graph.get_parent_node_by_subgraph_name(graph_name)) + return ops + + def _prepare_npu_graphs(self): + """prepare ge graphs """ + # move graphs to precision data dir + graph_files = util.list_ge_graph_files(self.graph_root) + self.build_files = sorted(filter(lambda x: x.graph_name == cfg.BUILD_JSON_GRAPH_NAME, graph_files.values()), + key=lambda x: x.graph_id) + if len(self.build_files) == 0: + self.log.warning("Can not find any build files in dir: %s", self.graph_root) + self.log.info("Find [%d] GE build files.", len(self.build_files)) + + @catch_tool_exception + def _parse_ops(self, build_file): + """Parse *_Build.txt.json to op objects.""" + build_file_json = build_file.path + '.json' + build_file_json = util.convert_proto_to_json(build_file.path, build_file_json) + if build_file_json is not None: + self.build_json_files.append(build_file_json) + with open(build_file_json, 'r') as f: + graph_json = json.load(f) + if 'graph' not in graph_json: + raise PrecisionToolException("No graph in file: %s" % build_file.file_name) + if len(graph_json['graph']) != 1: + self.log.warning("There are more then one graph in ge build file, find %d" % len(graph_json['graph'])) + # sub_graphs = [] + for graph in graph_json['graph']: + npu_sub_graph = NpuSubGraph(graph, build_file, self) + self.sub_graphs[graph['name']] = npu_sub_graph + self.ops_list.extend(npu_sub_graph.ops_list.values()) diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/op.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/op.py new file mode 100644 index 000000000..2dbd5a31b --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/op.py @@ -0,0 +1,255 @@ +# coding=utf-8 +import json +import re +from typing import List +from .desc import InputDesc +from .desc import OutputDesc +from ..util.util import util +from ..util.constant import Constant +from ..util.precision_tool_exception import PrecisionToolException + +NO_INPUT_NODES = ['Data', 'AtomicAddrClean', 'Recv', 'Constant'] +NO_OUTPUT_NODES = ['Send', 'Recv', 'NetOutput', 'PartitionedCall'] + +JSON_KEY_NAME = 'name' +JSON_KEY_ID = 'id' +JSON_KEY_TYPE = 'type' +JSON_KEY_ATTR = 'attr' +JSON_KEY = 'key' +JSON_VALUE = 'value' +JSON_KEY_LIST = 'list' +JSON_KEY_STR = 's' +JSON_KEY_INT = 'i' +JSON_KEY_INPUT_I = 'input_i' +JSON_KEY_OUTPUT_I = 'output_i' +JSON_KEY_PASS_NAME = 'pass_name' +JSON_KEY_DATA_DUMP_ORIGINAL_OP_NAMES = '_datadump_original_op_names' +JSON_KEY_GE_ATTR_OP_KERNEL_LIB_NAME = "_ge_attr_op_kernel_lib_name" +JSON_KEY_PARENT_NODE_INDEX = "_parent_node_index" +JSON_KEY_SUBGRAPH_NAME = "subgraph_name" + +KERNEL_NAME_SHUFFIX = '_kernelname' + + +class Op(object): + """ Op class. + name: op name + type: op type + inputs: list of input descs + outputs: list of output descs + """ + def __init__(self, op_json, op_list, graph_name, npu_graph, sub_graph): + """Init""" + self.op_json = op_json + self.op_list = op_list + self.graph_name = graph_name + self.npu_graph = npu_graph + self.sub_graph = sub_graph + self.input_list = None + self.output_list = None + self.log = util.get_log() + + def name(self): + """Get op name""" + return self.op_json[JSON_KEY_NAME] + + def id(self): + """Get op id""" + return self.op_json[JSON_KEY_ID] if JSON_KEY_ID in self.op_json else '' + + def json(self): + return json.dumps(self.op_json, indent=2) + + def type(self): + """Get op type""" + return self.op_json[JSON_KEY_TYPE] + + def subgraph_names(self): + return self.op_json[JSON_KEY_SUBGRAPH_NAME] if JSON_KEY_SUBGRAPH_NAME in self.op_json else [] + + def inputs(self): + """Get the input list""" + if self.input_list is None: + self._parse_inputs() + if len(self.input_list) == 0 and self.type() == 'Data': + # Looking for Real Data + self._looking_for_real_inputs() + return self.input_list + + def input_addr(self): + return self.op_json[JSON_KEY_INPUT_I] + + def outputs(self): + """Get output list""" + if self.output_list is None: + self._parse_outputs() + if len(self.output_list) == 0 and self.type() == 'PartitionedCall': + self._looking_for_real_outputs() + return self.output_list + + def output_addr(self): + return self.op_json[JSON_KEY_OUTPUT_I] + + def pass_name(self): + return self._attr(JSON_KEY_PASS_NAME) + + def kernel_name(self): + return self._attr(self.name() + KERNEL_NAME_SHUFFIX) + + def ge_attr_op_kernel_lib_name(self): + return self._attr(JSON_KEY_GE_ATTR_OP_KERNEL_LIB_NAME) + + def data_dump_original_op_names(self): + return self._attr(JSON_KEY_DATA_DUMP_ORIGINAL_OP_NAMES) + + def parent_node_index(self): + return self._attr(JSON_KEY_PARENT_NODE_INDEX) + + def _attr(self, key): + if JSON_KEY_ATTR in self.op_json: + for attr in self.op_json[JSON_KEY_ATTR]: + if key == attr[JSON_KEY]: + if JSON_KEY_STR in attr[JSON_VALUE]: + return attr[JSON_VALUE][JSON_KEY_STR] + elif JSON_KEY_LIST in attr[JSON_VALUE]: + if JSON_KEY_STR in attr[JSON_VALUE][JSON_KEY_LIST]: + return attr[JSON_VALUE][JSON_KEY_LIST][JSON_KEY_STR] + elif JSON_KEY_INT in attr[JSON_VALUE]: + return attr[JSON_VALUE][JSON_KEY_INT] + else: + self.log.warning("Unknown attr format: %s", attr[JSON_VALUE]) + return '' + + def compare(self, right_op): + """Compare with another op""" + if not isinstance(right_op, Op): + raise PrecisionToolException("Should compare with another op.") + res_str = ['LeftOp(Type/Name) : [green][%s][/green] %s' % (self.type(), self.name()), + 'RightOp(Type/Name): [green][%s][/green] %s' % (right_op.type(), right_op.name())] + similar = True + if len(self.inputs()) != len(right_op.inputs()): + res_str.append("Input: [yellow]Input num mismatch.[/yellow]") + else: + res_str.append("Input:") + for left_input in self.inputs(): + for right_input in right_op.inputs(): + if left_input.idx() != right_input.idx(): + continue + txt, input_similar = left_input.compare(right_input) + res_str.append(' - ' + txt) + similar = similar and input_similar + if len(self.outputs()) != len(right_op.outputs()): + res_str.append("Output: [yellow]Output num mismatch.[/yellow]") + else: + res_str.append("Output:") + for left_output in self.outputs(): + for right_output in right_op.outputs(): + if left_output.idx() != right_output.idx(): + continue + txt, output_similar = left_output.compare(right_output) + res_str.append(' - ' + txt) + similar = similar and output_similar + return Constant.NEW_LINE.join(res_str), similar + + def _attr_detail(self): + """Gen attr details""" + res_str = [] + if JSON_KEY_ATTR in self.op_json: + res_str = [' ' + str(i) for i in self.op_json[JSON_KEY_ATTR]] + return Constant.NEW_LINE.join(res_str) + + def summary(self, origin_txt=False, attr_detail=False): + """Summary of current op""" + res_str = ['Op(Type/Name): [green][%s][/green] %s' % (self.type(), self.name()), + 'ID: [yellow]%s[/yellow]' % self.id(), + 'KernelName: [yellow]%s[/yellow]' % self.kernel_name(), + 'KernelLibName: [yellow]%s[/yellow]' % self.ge_attr_op_kernel_lib_name(), + 'GraphName: [yellow]%s[/yellow]' % self.graph_name] + pass_name = self.pass_name() + if pass_name != '': + res_str.append('PassName: [yellow]%s[/yellow]' % pass_name) + origin_op = self.data_dump_original_op_names() + if origin_op != '': + res_str.append('OriginalOp: %s' % origin_op) + if attr_detail: + res_str.append(self._attr_detail()) + res_str.append('InputAddr : [yellow]%s[/yellow]' % self.input_addr()) + res_str.append('OutputAddr: [yellow]%s[/yellow]' % self.output_addr()) + res_str.append('Input:%s' % InputDesc.summary.__doc__) + for i in self.inputs(): + res_str.append(' -' + i.summary(origin_txt)) + res_str.append('Output:') + for i in self.outputs(): + res_str.append(' -' + i.summary(origin_txt)) + return Constant.NEW_LINE.join(res_str) + + def _parse_inputs(self): + """ parse input desc in graph """ + self.input_list = [] + if 'input' not in self.op_json: + if self.type() not in NO_INPUT_NODES: + self.log.warning('Parse Op[%s][%s] inputs error.' % (self.type(), self.name())) + return self.input_list + desc_index = 0 + for i in range(len(self.op_json['input'])): + name = self.op_json['input'][i] + if name == '': + # if self.type() not in NO_INPUT_NODES: + # self.log.warning('invalid input name.') + continue + name_info = name.split(':') + if len(name_info) == 2 and int(name_info[1]) == -1: + # control edge + self.input_list.append(InputDesc(name, [], i)) + else: + self.input_list.append(InputDesc(name, self.op_json['input_desc'][desc_index], i)) + desc_index += 1 + self.input_list.sort(key=lambda x: x.index) + return self.input_list + + def _parse_outputs(self): + """ parse output desc in graph """ + self.output_list = [] + if 'dst_index' not in self.op_json: + if self.type() not in NO_OUTPUT_NODES: + self.log.warning('Parse Op[%s][%s] outputs error.' % (self.type(), self.name())) + return self.output_list + desc_index = 0 + for i in range(len(self.op_json['dst_index'])): + dst_name = self.op_json['dst_name'][i] + if self.op_json['dst_index'][i] == -1: + # control edge + self.output_list.append(OutputDesc(dst_name, [], -1)) + else: + self.output_list.append(OutputDesc(dst_name, self.op_json['output_desc'][desc_index], desc_index)) + desc_index += 1 + self.output_list.sort(key=lambda x: x.index) + return self.output_list + + def _looking_for_real_inputs(self): + """Find real inputs of subgraph data node.""" + graph_name = self.graph_name + parent_node_idx = self.parent_node_index() + parent_nodes = self.npu_graph.get_parent_node_by_subgraph_name(graph_name) + self.log.debug("Find %s parent nodes." % len(parent_nodes)) + for parent_node in parent_nodes: + inputs = parent_node.inputs() + if len(inputs) <= parent_node_idx: + self.log.warning("Parent node has %d inputs, bug need index %d" % (len(inputs), parent_node_idx)) + continue + self.input_list.append(inputs[parent_node_idx]) + + def _looking_for_real_outputs(self): + """Find real outputs of PartitionedCall Node""" + subgraph_names = self.subgraph_names() + for subgraph_name in subgraph_names: + net_output_with_subgraph_name = subgraph_name + '_Node_Output' + net_output_nodes = self.npu_graph.get_op(net_output_with_subgraph_name) + self.log.debug("Find %s net output nodes, just need one." % len(net_output_nodes)) + self.log.info("Note: PartitionCall output nodes is the node connect to PartitionCall from inside.") + for output_node in net_output_nodes: + self.output_list = output_node.inputs() + + + + diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/tf_graph.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/tf_graph.py new file mode 100644 index 000000000..acf8c8920 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/graph/tf_graph.py @@ -0,0 +1,45 @@ +# coding=utf-8 +import collections +import logging +import os +from ..util.util import util +from ..util.precision_tool_exception import catch_tool_exception +from ..util.precision_tool_exception import PrecisionToolException +from ..config import config as cfg + +CKPT_META_SHUFFIX='.meta' + + +class TfGraph(object): + def __init__(self, graph_root=cfg.TF_GRAPH_DIR): + """""" + self.graph_root = graph_root + self.log = util.get_log() + self.op_list = collections.OrderedDict() + + @catch_tool_exception + def get_op_list(self, ckpt_path=None): + if self.op_list is None: + self._convert_ckpt_to_graph(ckpt_path) + return self.op_list + + def _convert_ckpt_to_graph(self, ckpt_path): + log_level = self.log.level + try: + self.log.setLevel('ERROR') + import tensorflow as tf + self.log.setLevel(log_level) + except ImportError as err: + self.log.setLevel(log_level) + raise PrecisionToolException("Import tensorflow failed.") + meta_files = util.list_cpu_graph_files(ckpt_path) + if len(meta_files) == 0: + raise PrecisionToolException("Can not find any ckpt meta files.") + file_list = sorted(meta_files.values(), key=lambda x: x['timestamp']) + ckpt_file = file_list[-1] + self.log.info("Find %d tf ckpt meta files, choose [%s]" % (len(meta_files), ckpt_file['file_name'])) + self.op_list = collections.OrderedDict() + saver = tf.train.import_meta_graph(ckpt_file['path'], clear_devices=True) + graph = tf.get_default_graph() + for op in graph.get_operations(): + self.op_list[op.name] = op -- Gitee From d4748f9f7cc59f5ede07b986677724ddaff272ff Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:44:20 +0000 Subject: [PATCH 09/38] 1 Signed-off-by: huangju1993 --- .../lib/train/train_analysis.py | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/train/train_analysis.py diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/train/train_analysis.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/train/train_analysis.py new file mode 100644 index 000000000..b7547d677 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/train/train_analysis.py @@ -0,0 +1,112 @@ +# coding=utf-8 +import os +import numpy as np +from ..adapter.tf_adapter import TfAdapter +from ..dump.tf_dump import TfDump +from ..util.util import util +from ..config import config as cfg +from ..util.precision_tool_exception import PrecisionToolException + + +class TrainAnalysis(object): + def __init__(self): + self.log = util.get_log() + self.tf_adapter = TfAdapter() + + @staticmethod + def gen_feed_file_name(name): + file_name = str(name).replace(':', '_').replace('/', '_') + '.npy' + return os.path.join(cfg.TF_CKPT_INPUT_DIR, file_name) + + def _init_session(self, device='npu', action='dump'): + """""" + import tensorflow as tf + if device == 'npu': + # util.execute_command('source %s', cfg.ASCEND_SET_ENV) + return tf.Session(config=self.tf_adapter.session_dump_config(None, action=action)) + sess = tf.Session(config=tf.ConfigProto()) + return self.tf_adapter.sess_dump(sess) + + def _reset_dropout_rate(self, graph): + import tensorflow as tf + for op in graph.get_operations(): + if 'dropout' in op.name and 'rate' in op.name: + self.log.debug("Find dropout rate node [%s][%s]" % (op.type, op.name)) + # tensor = graph.get_tensor_by_name(op.name) + if op.type != 'Const': + self.log.warning("Drop out op [%s] is not Const, skip reset rate. May cause difference.") + continue + op._set_attr('value', tf.AttrValue(tensor=tf.make_tensor_proto(0.0, tf.float32))) + self.log.debug("Set op: %s" % str(op)) + + def _prepare_graph(self, graph): + graph.seed = cfg.DUMP_SEED + self._reset_dropout_rate(graph) + return graph + + def _load_train_graph(self, sess): + import tensorflow as tf + if util.empty_dir(cfg.TF_CKPT_ROOT): + raise PrecisionToolException('checkpoint dir [%s] is empty, can not run train analysis process.' % + cfg.TF_CKPT_ROOT) + checkpoint = tf.train.latest_checkpoint(cfg.TF_CKPT_ROOT) + if checkpoint is None: + raise PrecisionToolException('Load ckpt failed from [%s].' % cfg.TF_CKPT_ROOT) + saver = tf.train.import_meta_graph(checkpoint + '.meta') + self._prepare_graph(tf.get_default_graph()) + saver.restore(sess, checkpoint) + return tf.get_default_graph() + + @staticmethod + def _get_input_from_graph(graph): + input_nodes = [] + tensor_index = {} + for op in graph.get_operations(): + if 'Placeholder' == op.type: + if op.name in tensor_index: + tensor_index[op.name] += 1 + else: + tensor_index[op.name] = 0 + node = graph.get_tensor_by_name(op.name + ':' + str(tensor_index[op.name])) + input_nodes.append(node) + return input_nodes + + def _get_input_tensors(self, input_nodes): + feed_map = {} + for node in input_nodes: + file_name = self.gen_feed_file_name(node.name) + if os.path.isfile(file_name): + feed_map[node] = np.load(file_name) + else: + # TD data type + feed_map[node] = np.random.random(node.shape) + return feed_map + + def _build_feed_map(self, graph): + input_nodes = self._get_input_from_graph(graph) + return self._get_input_tensors(input_nodes) + + def _analysis(self, device, action='dump'): + import tensorflow as tf + if device == 'npu': + import npu_bridge.npu_init + sess = self._init_session(device, action=action) + graph = self._load_train_graph(sess) + train_op = tf.get_collection(tf.GraphKeys.TRAIN_OP) + feed_map = self._build_feed_map(graph) + sess.run(train_op, feed_dict=feed_map) + if device == 'cpu': + tf_dump = TfDump() + tf_dump.run_tf_dbg_dump() + + def run(self, device='all', action='dump'): + """ + :param device: all | npu | cpu + :param action: dump | overflow | fusion_switch | fusion_off + :return: + """ + if device == 'all': + self._analysis('cpu', action) + self._analysis('npu', action) + else: + self._analysis(device, action) -- Gitee From e5fa0797422c176c1040ee8577d57d9618ef964d Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:44:39 +0000 Subject: [PATCH 10/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/util/constant.py | 20 + .../precision_tool/lib/util/file_desc.py | 38 ++ .../precision_tool/lib/util/h5_util.py | 190 +++++++ .../precision_tool/lib/util/onnx_builder.py | 0 .../lib/util/precision_tool_exception.py | 24 + .../precision_tool/lib/util/tool_object.py | 10 + .../examples/precision_tool/lib/util/util.py | 536 ++++++++++++++++++ 7 files changed, 818 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/constant.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/file_desc.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/h5_util.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/onnx_builder.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/precision_tool_exception.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/tool_object.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/util.py diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/constant.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/constant.py new file mode 100644 index 000000000..8106bb8d4 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/constant.py @@ -0,0 +1,20 @@ +# coding=utf-8 + + +class Constant(object): + VERSION = "0.1.11" + NEW_LINE = "\n" + TAB_LINE = "\t" + DEFAULT_DEBUG_ID = "debug_0" + NPU_DEBUG_ID_1 = "debug_1" + GRAPH = "graph" + DUMP = "dump" + + class Suffix(object): + JSON = '.json' + CSV = '.csv' + H5 = '.h5' + OM = '.om' + + class Pattern(object): + GE_PROTO_GRAPH_PATTERN = r'^ge_proto_([0-9]+)_(graph_[0-9]+_)*([A-Za-z0-9_-]+)\.txt$' diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/file_desc.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/file_desc.py new file mode 100644 index 000000000..0773632b1 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/file_desc.py @@ -0,0 +1,38 @@ +# coding=utf-8 +import os + + +class FileDesc(object): + def __init__(self, file_name, dir_path, timestamp=-1): + self.file_name = file_name + self.dir_path = dir_path + self.path = os.path.join(dir_path, file_name) + self.timestamp = timestamp + self.idx = 0 + if self.timestamp == -1: + self.timestamp = os.path.getmtime(self.path) + + +class BuildGraphFileDesc(FileDesc): + def __init__(self, file_name, dir_path, timestamp, graph_id, graph_name): + super(BuildGraphFileDesc, self).__init__(file_name, dir_path, timestamp) + self.graph_id = graph_id + self.graph_name = graph_name + + +class NpuDumpFileDesc(FileDesc): + def __init__(self, file_name, dir_path, timestamp, op_name, op_type, task_id, stream_id=0): + super(NpuDumpFileDesc, self).__init__(file_name, dir_path, timestamp) + self.op_name = op_name + self.op_type = op_type + self.task_id = task_id + stream_id = 0 if stream_id is None else int(stream_id) + self.stream_id = stream_id + self.idx = dir_path.split(os.sep)[-1] + + +class DumpDecodeFileDesc(NpuDumpFileDesc): + def __init__(self, file_name, dir_path, timestamp, op_name, op_type, task_id, anchor_type, anchor_idx): + super(DumpDecodeFileDesc, self).__init__(file_name, dir_path, timestamp, op_name, op_type, task_id) + self.type = anchor_type + self.idx = anchor_idx diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/h5_util.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/h5_util.py new file mode 100644 index 000000000..1b294ab97 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/h5_util.py @@ -0,0 +1,190 @@ +import collections +import os +import numpy as np + +try: + import h5py +except ImportError as import_err: + h5py = None + print("Failed to import h5py. some function may disable. Run 'pip3 install h5py' to fix it.", + import_err) + +from ..util.util import util +from ..util.constant import Constant +from ..config import config as cfg + + +class IdxType(object): + # /batch_norm/88/input/xxx + OP_TYPE = 'OP_TYPE' + OP_NAME = 'OP_NAME' + OP_ANC = 'OP_ANC' + + +H5_NAME_IDX = [IdxType.OP_TYPE, IdxType.OP_NAME, IdxType.OP_ANC] + + +def gen_h5_data_name(name, prefix='npu'): + return "%s_h5%s.npy" % (prefix, name.replace('/', '_')) + + +class H5Data(object): + def __init__(self, data, prefix='npu'): + self.data = data + self.prefix = prefix + self.name = gen_h5_data_name(self.data.name, self.prefix) + + def np_data(self): + np_data = np.array(self.data) + self._save(np_data) + return np_data + + def _save(self, data): + path = os.path.join(cfg.PT_DUMP_DECODE_DIR, self.name) + np.save(path, data) + + +class H5Op(object): + def __init__(self, name, h5_node, prefix='npu'): + self.log = util.get_log() + self.name = name + self.prefix = prefix + self.h5_node = h5_node + self.inputs = {} + self.outputs = {} + self.group = { + 'grads': {}, + 'tensors': {}, + 'grad_inputs': {}, + 'result': {} + } + ''' + self.input_grad = {} + self.output_grad = {} + self.input_tensor = {} + self.output_tensor = {} + ''' + self._prepare() + + def summary(self): + summary_txt = [] + summary_txt.extend(self._gen_txt(self.inputs, '-Input:')) + summary_txt.extend(self._gen_txt(self.outputs, '-Output:')) + summary_txt.extend(self._gen_txt(self.group['grads'], 'Grads:')) + summary_txt.extend(self._gen_txt(self.group['tensors'], '-Tensors:')) + summary_txt.extend(self._gen_txt(self.group['grad_inputs'], '-GradInputs:')) + summary_txt.extend(self._gen_txt(self.group['result'], '-Result:')) + return Constant.NEW_LINE.join(summary_txt) + + @staticmethod + def _gen_txt(h5_data, name): + if len(h5_data) == 0: + return [] + txt = [name] + for idx, data in enumerate(h5_data.values()): + txt.append(' └─[green][%s][/green] %s' % (idx, data.name)) + txt.append(' └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(data.np_data())) + return txt + + def _parse_group(self, node): + sub_node_type = node.name.split('/')[-1] + if sub_node_type in self.group.keys(): + for item in node: + sub_node = node[item] + if isinstance(sub_node, h5py.Dataset): + self.group[sub_node_type][item] = H5Data(sub_node, self.prefix) + else: + self.log.warning("Unknown sub node: %s" % sub_node) + else: + self.log.warning("Unknown sub node type: %s(%s)" % (sub_node_type, node)) + + def _prepare_input_output(self, node, desc_type): + for desc_name in node: + sub_node = node[desc_name] + if isinstance(sub_node, h5py.Group): + self._parse_group(sub_node) + elif isinstance(sub_node, h5py.Dataset): + update_dict = self.inputs if desc_type == 'input' else self.outputs + update_dict[desc_name] = H5Data(sub_node, self.prefix) + else: + self.log.warning("Unknown type: %s(%s)" % (type(sub_node), sub_node)) + + def _prepare(self): + for desc_type in self.h5_node: + if desc_type in ['input', 'output']: + self._prepare_input_output(self.h5_node[desc_type], desc_type) + else: + self.log.warning("Unknown desc type: %s(%s)" % (desc_type, self.h5_node)) + + +class H5Util(object): + def __init__(self, file_name, prefix): + self.log = util.get_log() + self.file_name = file_name + self.prefix = prefix + self.h5 = None + self.ops = collections.OrderedDict() + self._prepare() + + def __del__(self): + if self.h5 is not None: + self.h5.close() + + def get_op(self, op_id): + if op_id in self.ops: + return self.ops[op_id] + self.log.warning("Can not find any h5 op id: %s" % op_id) + return None + + def get_tensor_by_name(self, tensor_name): + if self.h5 is None: + self.log.warning("h5 file is None.") + return None + if tensor_name in self.h5: + return np.array(self.h5[tensor_name]) + return None + + def print_tensor(self, tensor_name): + tensor = self.get_tensor_by_name(tensor_name) + if tensor is None: + self.log.warning("Tensor:%s not exist." % tensor_name) + return + file_path = self._dump_numpy(tensor_name, tensor) + util.print_npy_summary(os.path.dirname(file_path), os.path.basename(file_path)) + + def _prepare(self): + if not os.path.isfile(self.file_name) or not str(self.file_name).endswith(Constant.Suffix.H5): + self.log.error("File [%s] not exist or not a h5 file" % self.file_name) + if h5py is None: + self.log.warning("Can not find python module h5py.") + self.h5 = h5py.File(self.file_name, 'r') + self._list_tensors(self.h5) + + def _list_tensors(self, h5, idx=0, name=''): + for item in h5: + item_name = name + '/' + item + if idx == 1: + self.ops[str(item)] = H5Op(item_name, h5[item_name], self.prefix) + continue + self._list_tensors(h5[item], idx+1, item_name) + + def _list_tensors_loop(self, h5, idx=0, name=''): + for item in h5: + if isinstance(h5[item], h5py.Group): + item_name = name + '/' + item + print(item_name) + # check + if H5_NAME_IDX[idx] == IdxType.OP_NAME and item_name not in self.ops: + self.ops[item_name] = H5Op(item) + if H5_NAME_IDX[idx] == IdxType.OP_ANC: + self.ops[item_name] = H5Op(item) + self._list_tensors(h5[item], idx + 1, item_name) + + def _dump_numpy(self, tensor_name, tensor): + if not os.path.exists(cfg.PT_DUMP_DECODE_DIR): + util.create_dir(cfg.PT_DUMP_DECODE_DIR) + file_name = tensor_name.replace('/', '_').strip('_') + '.npy' + file_path = os.path.join(cfg.PT_DUMP_DECODE_DIR, file_name) + self.log("Dump file: %s" % file_path) + np.save(file_path, tensor) + return file_path diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/onnx_builder.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/onnx_builder.py new file mode 100644 index 000000000..e69de29bb diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/precision_tool_exception.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/precision_tool_exception.py new file mode 100644 index 000000000..02084770f --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/precision_tool_exception.py @@ -0,0 +1,24 @@ +# coding=utf-8 +import logging + + +class PrecisionToolException(Exception): + """ + Class for PrecisionTool Exception + """ + def __init__(self, error_info): + super(PrecisionToolException, self).__init__() + self.error_info = error_info + + +def catch_tool_exception(fuc): + def handle(*args, **kwargs): + log = logging.getLogger() + try: + return fuc(*args, **kwargs) + except PrecisionToolException as pte: + log.warning(pte.error_info) + except SystemExit: + # do not exit + log.debug("Exit") + return handle diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/tool_object.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/tool_object.py new file mode 100644 index 000000000..7412b6cee --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/tool_object.py @@ -0,0 +1,10 @@ +# coding=utf-8 + + +class ToolObject(object): + _instance = None + + def __new__(cls, *args, **kwargs): + if not cls._instance: + cls._instance = super(ToolObject, cls).__new__(cls, *args, **kwargs) + return cls._instance diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/util.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/util.py new file mode 100644 index 000000000..88fbe0b00 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/util/util.py @@ -0,0 +1,536 @@ +# coding=utf-8 +import csv +import re +import sys +import os +import shutil +import numpy as np +import logging +import subprocess +from .constant import Constant +from .precision_tool_exception import PrecisionToolException +from .precision_tool_exception import catch_tool_exception +from .file_desc import * +from ..config import config as cfg + +try: + from rich.traceback import install + from rich.panel import Panel + from rich.table import Table + from rich import print as rich_print + from rich.columns import Columns + install() +except ImportError as import_err: + install = None + Panel = None + Table = None + Columns = None + rich_print = print + print("Failed to import rich. some function may disable. Run 'pip3 install rich' to fix it.", + import_err) + +try: + import readline + readline.parse_and_bind('tab: complete') +except ImportError as import_error: + print("Unable to import module: readline. Run 'pip3 install gnureadline pyreadline' to fix it.") + +# patterns +OFFLINE_DUMP_PATTERN = r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)\.?([0-9]+)?\.([0-9]{1,255})[.csv]?" +OFFLINE_DUMP_DECODE_PATTERN = \ + r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})\.?[0-9]?[\.0-9]+?" \ + r"\.([a-z]+)\.([0-9]{1,255})\.npy$" +OFFLINE_DUMP_CONVERT_PATTERN = \ + r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})" \ + r"\.([a-z]+)\.([0-9]{1,255})(\.[x0-9]+)?\.npy$" +OFFLINE_FILE_NAME = 'op_type.op_name.task_id(.stream_id).timestamp' +OP_DEBUG_NAME = 'OpDebug.Node_OpDebug.taskid.timestamp' +CPU_DUMP_DECODE_PATTERN = r"^([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})\.npy$" +CPU_FILE_DECODE_NAME = 'op_name.0(.0).timestamp.npy' +OP_DEBUG_PATTERN = r"Opdebug\.Node_OpDebug\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})" +OP_DEBUG_DECODE_PATTERN = r"Opdebug\.Node_OpDebug\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})[\.0-9]*\.([a-z]+)\.([0-9]{1,255})\.json" +VECTOR_COMPARE_RESULT_PATTERN = r"result_([0-9]{1,255})\.csv" +TIMESTAMP_DIR_PATTERN = '[0-9]{1,255}' +NUMPY_PATTERN = r".*\.npy$" +H5_PATTERN = r".*\.h5$" +CSV_SHUFFIX = '.csv' +NUMPY_SHUFFIX = '.npy' +CKPT_META_SHUFFIX = r".*.meta$" +MAPPING_CSV = "mapping.csv" + + +class Util(object): + def __init__(self): + self.atc = None + self.ms_accu_cmp = None + logging.basicConfig(level=cfg.LOG_LEVEL, format="%(asctime)s (%(process)d) -[%(levelname)s]%(message)s", + datefmt="%Y-%m-%d %H:%M:%S") + self.log = logging.getLogger() + self.python = sys.executable + + def get_log(self): + return self.log + + def execute_command(self, cmd: str): + """ Execute shell command + :param cmd: command + :return: status code + """ + if cmd is None: + self.log.error("Command is None.") + return -1 + self.log.debug("[Run CMD]: %s", cmd) + complete_process = subprocess.run(cmd, shell=True) + return complete_process.returncode + + @staticmethod + def empty_dir(dir_path: str) -> bool: + """ Check if target dir is empty + :param dir_path: target dir + :return: bool + """ + if not os.path.exists(dir_path): + return True + if len(os.listdir(dir_path)) == 0: + return True + return False + + def convert_proto_to_json(self, src_file, dst_file): + """Convert GE proto graphs to json format. + command: atc --mode=5 --om=ge_proto_Build.txt --json=xxx.json + :param src_file: proto file + :param dst_file: output json file + :return: result json file + """ + if not os.path.exists(src_file): + raise PrecisionToolException("Source proto file %s not exist." % src_file) + # src_file = os.path.join(cfg.GRAPH_DIR_ALL, proto_file) + # json_file = proto_file + '.json' + # dst_file = os.path.join(cfg.GRAPH_DIR_BUILD, json_file) + if os.path.exists(dst_file) and os.path.getmtime(dst_file) > os.path.getmtime(src_file): + self.log.debug("GE graph build json already exist.") + return dst_file + cmd = '%s --mode=5 --om=%s --json=%s' % (self._get_atc(), src_file, dst_file) + self.execute_command(cmd) + if not os.path.isfile(dst_file): + raise PrecisionToolException("Convert GE build graph to json failed. can not find any json file.") + self.log.info('Finish convert [%s] build graph from proto to json format.', src_file) + return dst_file + + def convert_dump_to_npy(self, src_file, dst_path, data_format=None): + """Convert npu dump files to npy format. + :param src_file: src file + :param dst_path: dst path + :param data_format: target data format + :return: status code + """ + self.create_dir(dst_path) + format_cmd = '' if data_format is None else '-f %s' % data_format + cmd = '%s %s convert -d %s -out %s %s' % (self.python, self._get_ms_accu_cmp(), src_file, dst_path, format_cmd) + return self.execute_command(cmd) + + def compare_vector(self, npu_dump_dir, cpu_dump_dir, graph_json, result_path): + """Run compare vector command. + :param npu_dump_dir: npu dump data dir + :param cpu_dump_dir: cpu dump data dir + :param graph_json: graph json + :param result_path: result path + :return: status code + """ + self.create_dir(result_path) + if graph_json is None: + cmd = '%s %s compare -m %s -g %s -out %s' % ( + self.python, self._get_ms_accu_cmp(), npu_dump_dir, cpu_dump_dir, result_path) + else: + cmd = '%s %s compare -m %s -g %s -f %s -out %s' % ( + self.python, self._get_ms_accu_cmp(), npu_dump_dir, cpu_dump_dir, graph_json, result_path) + return self.execute_command(cmd) + + def list_dump_files(self, path, sub_path=''): + """List npu dump files in npu dump dir. + default only list the newest sub dir ordered by timestamp. set sub_path to specific other sub_path + :param path: dump path + :param sub_path: sub dir + :return: dump_files, parent_dirs + """ + parent_dirs = {} + dump_files = {} + newest_sub_path = self.get_newest_dir(path) if sub_path == '' else sub_path + dump_pattern = re.compile(OFFLINE_DUMP_PATTERN) + for dir_path, dir_names, file_names in os.walk(os.path.join(path, newest_sub_path), followlinks=True): + for name in file_names: + dump_match = dump_pattern.match(name) + if dump_match is None: + continue + dump_files[name] = self._gen_dump_file_info(name, dump_match, dir_path) + if dir_path not in parent_dirs: + parent_dirs[dir_path] = {} + parent_dirs[dir_path][name] = dump_files[name] + return dump_files, parent_dirs + + def parse_mapping_csv(self, path, pattern, extern_pattern=''): + """parse mapping csv in dump path""" + dump_files = {} + re_pattern = re.compile(pattern) + for dir_path, dir_names, file_names in os.walk(path, followlinks=True): + if MAPPING_CSV not in file_names: + continue + mapping = self.read_csv(os.path.join(dir_path, MAPPING_CSV)) + for item in mapping: + src_file = os.path.abspath(os.path.join(dir_path, item[0])) + if not os.path.isfile(src_file): + self.log.warning("Can not find file %s in mapping.csv, dir: %s.", item[0], dir_path) + continue + match = re_pattern.match(item[1]) + if match is None: + self.log.warning("file name [%s] in mapping.csv is invalid.", item[1]) + continue + file_desc = self._gen_dump_file_info(item[0], match, dir_path) + dst_file_name = '.'.join([file_desc.op_type, file_desc.file_name, str(file_desc.task_id), + str(file_desc.stream_id), str(file_desc.timestamp)]) + if item[1].endswith(Constant.Suffix.CSV): + dst_file_name += '.csv' + dst_file = os.path.abspath(os.path.join(dir_path, dst_file_name)) + if not os.path.islink(src_file): + os.rename(src_file, dst_file) + os.symlink(dst_file, src_file) + file_desc.path = dst_file + file_desc.file_name = dst_file_name + dump_files[item[1]] = file_desc + return dump_files + + def list_npu_dump_files(self, path, extern_pattern=''): + npu_dump_files = self._list_file_with_pattern(path, OFFLINE_DUMP_PATTERN, extern_pattern, + self._gen_dump_file_info) + npu_dump_files.update(self.parse_mapping_csv(path, OFFLINE_DUMP_PATTERN, extern_pattern)) + return npu_dump_files + + def list_ge_graph_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, Constant.Pattern.GE_PROTO_GRAPH_PATTERN, extern_pattern, + self._gen_build_graph_file_info) + + def list_npu_dump_decode_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, OFFLINE_DUMP_DECODE_PATTERN, extern_pattern, + self._gen_npu_dump_decode_file_info) + + def list_debug_decode_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, OP_DEBUG_DECODE_PATTERN, extern_pattern, + self._gen_overflow_debug_decode_file_info) + + def list_cpu_dump_decode_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, CPU_DUMP_DECODE_PATTERN, extern_pattern, + self._gen_cpu_dump_decode_file_info) + + def list_cpu_graph_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, CKPT_META_SHUFFIX, extern_pattern, + self._gen_cpu_graph_files_info) + + def list_vector_compare_result_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, VECTOR_COMPARE_RESULT_PATTERN, extern_pattern, + self._gen_vector_compare_result_file_info) + + def list_npu_dump_convert_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, OFFLINE_DUMP_CONVERT_PATTERN, extern_pattern, + self._gen_npu_dump_convert_file_info) + + def list_numpy_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, NUMPY_PATTERN, extern_pattern, + self._gen_numpy_file_info) + + def list_h5_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, H5_PATTERN, extern_pattern, + self._gen_file_info) + + def create_dir(self, path): + """Create dir if not exist + :param path: path + :return: bool + """ + if os.path.exists(path): + return True + try: + os.makedirs(path, mode=0o700) + except OSError as err: + self.log.error("Failed to create %s. %s", path, str(err)) + return False + return True + + def clear_dir(self, path: str, pattern=''): + """Clear dir with pattern (file/path name match pattern will be removed) + :param path: path + :param pattern: pattern + :return: None + """ + if not os.path.exists(path): + return + try: + for f in os.listdir(path): + if not re.match(pattern, f): + continue + file_path = os.path.join(path, f) + if os.path.isfile(file_path): + os.remove(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except OSError as err: + self.log.error("Failed to remove %s. %s", path, str(err)) + + @staticmethod + def npy_info(source_data): + """Get npy information + :param source_data: npy path + :return: (shape, dtype) + """ + if isinstance(source_data, str): + if not str(source_data).endswith(NUMPY_SHUFFIX): + raise PrecisionToolException("Npy file [%s] is invalid" % source_data) + data = np.load(source_data, allow_pickle=True) + elif isinstance(source_data, np.ndarray): + data = source_data + else: + raise PrecisionToolException("Invalid source data:%s" % source_data) + if data.dtype == 'object': + raise PrecisionToolException("Invalid source data, data is object.") + if np.size(data) == 0: + raise PrecisionToolException("Empty source data:%s" % source_data) + return data.shape, data.dtype, data.max(), data.min(), data.mean() + + @catch_tool_exception + def gen_npy_info_txt(self, source_data): + """ Generate numpy info txt. + :param source_data: source path or np.ndarray + :return: txt + """ + try: + shape, dtype, max_data, min_data, mean = self.npy_info(source_data) + return '[Shape: %s] [Dtype: %s] [Max: %s] [Min: %s] [Mean: %s]' % (shape, dtype, max_data, min_data, mean) + except PrecisionToolException: + return '' + + def print_npy_summary(self, path, file_name, is_convert=False, extern_content=''): + """Print summary of npy data + :param path: file path + :param file_name: file name + :param is_convert: if convert to txt file + :param extern_content: extern content append to the summary + :return: None + """ + target_file = os.path.join(path, file_name) + if not os.path.exists(target_file): + raise PrecisionToolException("File [%s] not exist" % target_file) + data = np.load(target_file, allow_pickle=True) + table = self.create_table('', ['Index', 'Data']) + flatten_data = data.flatten() + for i in range(min(16, int(np.ceil(flatten_data.size / 8)))): + last_idx = min(flatten_data.size, i*8+8) + table.add_row(str(i * 8), ' '.join(flatten_data[i*8: last_idx].astype('str').tolist())) + summary = ['[yellow]%s[/yellow]' % self.gen_npy_info_txt(data), 'Path: %s' % target_file] + if is_convert: + summary.append('TxtFile: %s.txt' % target_file) + if extern_content != '': + summary.append('%s' % extern_content) + self.print_panel(self.create_columns([table, Constant.NEW_LINE.join(summary)]), file_name) + if is_convert: + self.save_npy_to_txt(data, target_file + '.txt') + + def save_npy_to_txt(self, src_file, dst_file='', align=0): + """save numpy file to txt file. + default data will be aligned to the last axis of data.shape + :param src_file: src file name + :param dst_file: dst file name + :param align: data align + :return: None + """ + if dst_file == '': + dst_file = src_file + '.txt' + if os.path.exists(dst_file): + self.log.debug("Dst file %s exists, will not save new one.", dst_file) + return + if isinstance(src_file, str): + data = np.load(src_file, allow_pickle=True) + elif isinstance(src_file, np.ndarray): + data = src_file + else: + raise PrecisionToolException("invalid src_file: %s", src_file) + if data.dtype == 'object': + raise PrecisionToolException("Invalid source data, data is object.") + shape = data.shape + data = data.flatten() + if align == 0: + if len(shape) == 0: + align = 1 + else: + align = shape[-1] + elif data.size % align != 0: + pad_array = np.zeros((align - data.size % align,)) + data = np.append(data, pad_array) + np.savetxt(dst_file, data.reshape((-1, align)), delimiter=' ', fmt='%g') + + def read_csv(self, path): + """Read csv file to list. + :param path: csv file path + :return: list + """ + if not str(path).endswith(CSV_SHUFFIX): + self.log.error("csv path [%s] is invalid", path) + return + rows = [] + with open(path) as f: + csv_handle = csv.reader(f) + for row in csv_handle: + rows.append(row) + return rows + + @staticmethod + def print(content): + rich_print(content) + + @staticmethod + def render(content, rich=True): + if rich: + rich_print(content) + else: + print(content) + + @staticmethod + def create_table(title, columns): + if Table is None: + raise PrecisionToolException("No rich module error.") + table = Table(title=title) + for column_name in columns: + table.add_column(column_name, overflow='fold') + return table + + @staticmethod + def create_columns(content): + if Columns is None: + raise PrecisionToolException("No rich module error.") + return Columns(content) + + def print_panel(self, content, title='', fit=True): + """ Print panel. + :param content: content + :param title: title + :param fit: if panel size fit the content + :return:Node + """ + if Panel is None: + print(content) + return + if fit: + self.print(Panel.fit(content, title=title)) + else: + self.print(Panel(content, title=title)) + + @staticmethod + def _detect_file(file_name, root_dir): + """Find file in root dir""" + result = [] + for dir_path, dir_names, file_names in os.walk(root_dir, followlinks=True): + for name in file_names: + if re.match(file_name, name): + result.append(os.path.join(dir_path, name)) + return result + + def _detect_file_if_not_exist(self, target_file): + """Find specific file in cmd root path""" + self.log.info("Try to auto detect file with name: %s.", target_file) + res = self._detect_file(target_file, cfg.CMD_ROOT_PATH) + if len(res) == 0: + raise PrecisionToolException("Cannot find any file named %s in dir %s" % (target_file, cfg.CMD_ROOT_PATH)) + self.log.info("Detect [%s] success. %s", target_file, res) + return res[0] + + def _get_atc(self): + if self.atc is None: + self.atc = self._detect_file_if_not_exist('^atc$') + return self.atc + + def _get_ms_accu_cmp(self): + if self.ms_accu_cmp is None: + self.ms_accu_cmp = self._detect_file_if_not_exist(cfg.MS_ACCU_CMP) + return self.ms_accu_cmp + + def get_newest_dir(self, path: str): + """Find the newest subdir in specific path, subdir should named by timestamp.""" + if not os.path.isdir(path): + self.log.warning("Path [%s] not exists", path) + return '' + paths = os.listdir(path) + sub_paths = [] + for p in paths: + if re.match(TIMESTAMP_DIR_PATTERN, p): + sub_paths.append(p) + if len(sub_paths) == 0: + self.log.debug("Path [%s] has no timestamp dirs.", path) + return '' + newest_sub_path = sorted(sub_paths)[-1] + self.log.info("Sub path num:[%d]. Dirs[%s], choose[%s]", len(sub_paths), str(sub_paths), newest_sub_path) + return newest_sub_path + + @staticmethod + def _list_file_with_pattern(path, pattern, extern_pattern, gen_info_func): + if path is None or not os.path.exists(path): + raise PrecisionToolException("Path %s not exist." % path) + file_list = {} + re_pattern = re.compile(pattern) + for dir_path, dir_names, file_names in os.walk(path, followlinks=True): + for name in file_names: + match = re_pattern.match(name) + if match is None: + continue + if extern_pattern != '' and not re.match(extern_pattern, name): + continue + file_list[name] = gen_info_func(name, match, dir_path) + return file_list + + @staticmethod + def _gen_numpy_file_info(name, math, dir_path): + return FileDesc(name, dir_path) + + @staticmethod + def _gen_file_info(name, math, dir_path): + return FileDesc(name, dir_path) + + @staticmethod + def _gen_build_graph_file_info(name, match, dir_path): + return BuildGraphFileDesc(name, dir_path, -1, int(match.group(1)), match.groups()[-1]) + + @staticmethod + def _gen_dump_file_info(name, match, dir_path): + return NpuDumpFileDesc(name, dir_path, int(match.groups()[-1]), op_name=match.group(2), op_type=match.group(1), + task_id=int(match.group(3)), stream_id=match.group(4)) + + @staticmethod + def _gen_npu_dump_decode_file_info(name, match, dir_path): + return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-3]), op_name=match.group(2), + op_type=match.group(1), task_id=int(match.group(3)), + anchor_type=match.groups()[-2], anchor_idx=int(match.groups()[-1])) + + @staticmethod + def _gen_cpu_dump_decode_file_info(name, match, dir_path): + return DumpDecodeFileDesc(name, dir_path, -1, op_name=match.group(1), op_type='', task_id=0, + anchor_type='output', anchor_idx=int(match.group(2))) + + @staticmethod + def _gen_cpu_graph_files_info(name, match, dir_path): + return FileDesc(name, dir_path, -1) + + @staticmethod + def _gen_overflow_debug_decode_file_info(name, match, dir_path): + return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-3]), op_name='Node_OpDebug', op_type='Opdebug', + task_id=int(match.group(1)), anchor_type=match.groups()[-2], + anchor_idx=int(match.groups()[-1])) + + @staticmethod + def _gen_vector_compare_result_file_info(name, match, dir_path): + return FileDesc(name, dir_path, int(match.group(1))) + + @staticmethod + def _gen_npu_dump_convert_file_info(name, match, dir_path): + return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-4]), op_name=match.group(2), + op_type=match.group(1), task_id=int(match.group(3)), anchor_type=match.groups()[-3], + anchor_idx=int(match.groups()[-2])) + + +util = Util() -- Gitee From 73c43849e7cde2ce7493bf386d180ebc60d82813 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:44:55 +0000 Subject: [PATCH 11/38] 1 Signed-off-by: huangju1993 --- .../examples/precision_tool/lib/__init__.py | 0 .../examples/precision_tool/lib/cpu_tvm.py | 51 ++++ .../precision_tool/lib/interactive_cli.py | 87 +++++++ .../precision_tool/lib/precision_tool.py | 230 ++++++++++++++++++ 4 files changed, 368 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/__init__.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/cpu_tvm.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/interactive_cli.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/precision_tool.py diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/__init__.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/cpu_tvm.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/cpu_tvm.py new file mode 100644 index 000000000..a0906071e --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/cpu_tvm.py @@ -0,0 +1,51 @@ +import numpy as np +from tbe import tvm + + +class CpuTvm(): + def __init__(self, json_file, dump_input_files, dump_output_files): + self.json_file = json_file + self.dump_input_files = dump_input_files + self.dump_output_files = dump_output_files + self.input_list = [] + self.output_list = [] + + def _load_schedule(self): + with open(self.json_file, 'r') as jsonfile: + tvm_node = tvm.load_json(jsonfile.read()) + self.output_list = tvm_node.op.attrs['output_list'] + self.input_list = tvm_node.op.attrs['input_list'] + schedule = tvm.create_schedule([res.op for res in self.output_list]) + return schedule + + def _build_tvm(self, schedule): + tensor_list = [ele for ele in self.input_list if ele is not None] + for ele in self.output_list: + if ele is not None: + tensor_list.append(ele) + fusion_op = tvm.build(schedule, tensor_list, "c", "llvm") + return fusion_op + + def _load_data(self, dump_files): + ctx = tvm.cpu(0) + data_tvm = [] + for dump_file in dump_files: + data_temp_numpy = np.load(dump_file) + data_temp_tvm = tvm.nd.array(data_temp_numpy, ctx) + data_tvm.append(data_temp_tvm) + return data_tvm + + def run_cpu_tvm(self): + # load schedule and build tvm + schedule = self._load_schedule() + fusion_op = self._build_tvm(schedule) + + #load data and run cpu tvm + data_tvm_in = self._load_data(self.dump_input_files) + data_tvm_out = self._load_data(self.dump_output_files) + data_tvm_in.extend(data_tvm_out) + fusion_op(*data_tvm_in) + + #tvm format to numpy format + data_np_out = [data.asnumpy() for data in data_tvm_out] + return data_np_out diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/interactive_cli.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/interactive_cli.py new file mode 100644 index 000000000..4e6aedd18 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/interactive_cli.py @@ -0,0 +1,87 @@ +# coding=utf-8 +import cmd +from .util.util import util +from .util.constant import Constant +from .precision_tool import PrecisionTool + +HEADER = r""" ____ _ _ ______ __ + / __ \________ _____(_)____(_)___ ____/_ __/___ ____ / / + / /_/ / ___/ _ \/ ___/ / ___/ / __ \/ __ \/ / / __ \/ __ \/ / + / ____/ / / __/ /__/ (__ ) / /_/ / / / / / / /_/ / /_/ / / +/_/ /_/ \___/\___/_/____/_/\____/_/ /_/_/ \____/\____/_/ version=%s""" % Constant.VERSION + +HELP_AC = "Run auto check function, use [-c] to start vector compare process.\n" \ + " usage: ac (-c) \n" +HELP_RUN = "Run any shell command.\n" \ + " usage: (run) vim tensor_name.txt \n" +HELP_PT = "Print npy tensor, use [-c] to convert and save to txt file.\n" \ + " usage: pt (-c) [tensor_name.npy] \n" + + +class InteractiveCli(cmd.Cmd): + def __init__(self): + cmd.Cmd.__init__(self) + self.prompt = "PrecisionTool > " + self.precision_tool = None + util.print_panel(HEADER) + self._prepare() + + def default(self, line=''): + util.execute_command(line) + return False + + def _prepare(self): + self.precision_tool = PrecisionTool() + self.precision_tool.prepare() + + def do_ac(self, line=''): + """Auto check.""" + self.precision_tool.do_auto_check(self._parse_argv(line)) + + def do_run(self, line=''): + """Run any shell command""" + util.execute_command(line) + + def do_ls(self, line=''): + """List ops: \n usage: ls (op(default)/dump) -n [op_name] -t [op_type]""" + argv = self._parse_argv(line) + if len(argv) > 0 and argv[0] == 'dump': + return self.precision_tool.do_list_dump(argv[1:]) + self.precision_tool.do_list_nodes(argv) + + def do_ni(self, line=''): + """Print node info:\n usage: ni (-n) [op_name]""" + self.precision_tool.do_node_info(self._parse_argv(line, '-n')) + + def do_dc(self, line=''): + """Convert npu dump by op names:\n usage: dc (-n) [npu dump file] -f [target format]""" + self.precision_tool.do_convert_npu_dump(self._parse_argv(line, '-n')) + + def do_vc(self, line=''): + """Do vector compare: \n usage: vc """ + self.precision_tool.do_vector_compare(self._parse_argv(line)) + + def do_vcs(self, line=''): + """Do vector compare summary""" + self.precision_tool.do_vector_compare_summary(self._parse_argv(line)) + + def do_pt(self, line=''): + """Print data info:\n usage: pt (-n) [*.npy] (-c)\n -c: convert and save to txt file""" + self.precision_tool.do_print_data(self._parse_argv(line, '-n')) + + def do_cp(self, line=''): + """Compare two data file """ + self.precision_tool.do_compare_data(self._parse_argv(line, '-n')) + + def do_train(self, line=''): + """Train process:\n usage: train -d all -a dump""" + self.precision_tool.do_train_analysis(self._parse_argv(line)) + + @staticmethod + def _parse_argv(line, insert=None): + argv = line.split() if line != '' else [] + if '-h' in argv: + return argv + if insert is not None and len(argv) > 0 and argv[0] != insert: + argv.insert(0, insert) + return argv diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/precision_tool.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/precision_tool.py new file mode 100644 index 000000000..d118b86bc --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/lib/precision_tool.py @@ -0,0 +1,230 @@ +import argparse +import os +import time + +from .adapter.overflow import Overflow +from .dump.dump_manager import DumpManager +from .graph.graph_manager import GraphManager +from .compare.compare import Compare +from .adapter.fusion import Fusion +from .train.train_analysis import TrainAnalysis +from .util.util import util +from .util.constant import Constant +from .config import config as cfg +from .util.precision_tool_exception import PrecisionToolException +from .util.precision_tool_exception import catch_tool_exception + + +class PrecisionTool(object): + def __init__(self): + """init""" + self.graph_manager = GraphManager() + self.overflow = Overflow() + self.dump_manager = DumpManager() + self.compare = Compare() + self.fusion = Fusion() + self.train_analysis = TrainAnalysis() + self.log = util.get_log() + + @catch_tool_exception + def prepare(self): + """prepare""" + util.create_dir(cfg.DATA_ROOT_DIR) + self.graph_manager.prepare() + self.dump_manager.prepare() + self.overflow.prepare() + self.fusion.prepare() + self.compare.prepare() + + @catch_tool_exception + def do_auto_check(self, argv): + """Auto check""" + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--vector_compare', dest='vector_compare', help='Run vector compare process', + action='store_true') + parser.add_argument('-l', '--limit', dest='limit', type=int, help='limit', default=3) + args = parser.parse_args(argv) + # vector compare + if args.vector_compare: + self.do_vector_compare() + self.do_vector_compare_summary() + self.do_check_fusion() + self.do_check_overflow(args.limit) + self.do_check_cast() + self.do_check_graph_similarity() + + @catch_tool_exception + def do_check_overflow(self, limit=3): + """check overflow""" + self.overflow.check(limit) + + @catch_tool_exception + def do_check_cast(self): + self.graph_manager.check_cast() + + @catch_tool_exception + def do_check_dtype(self): + """Check input/output dtype""" + self.graph_manager.check_dtype() + + @catch_tool_exception + def do_check_fusion(self): + """print fusion info summary""" + self.fusion.check() + + @catch_tool_exception + def do_check_graph_similarity(self): + self.graph_manager.check_similarity() + + @catch_tool_exception + def do_vector_compare(self, argv=None): + """do vector compare""" + parser = argparse.ArgumentParser() + parser.add_argument('-lt', '--left', dest='lt', default=None, help='left path(npu dump path)') + parser.add_argument('-rt', '--right', dest='rt', default=None, help='right path(cpu/npu dump path)') + parser.add_argument('-g', '--graph', dest='graph', required=False, default=None, help='graph json file') + args = parser.parse_args() if argv is None else parser.parse_args(argv) + # 1. compare npu_debug0 - tf dump data (auto) + # 2. compare npu_debug0 - npu_debug1 dump data + # 3. compare dir - dir dump data + result_dir = os.path.join(cfg.VECTOR_COMPARE_PATH, time.strftime("%Y%m%d%H%M%S", time.localtime())) + if args.lt is None: + debug_0_dump_root = self.dump_manager.get_dump_root_dir(Constant.DEFAULT_DEBUG_ID) + if util.empty_dir(debug_0_dump_root): + raise PrecisionToolException("NPU debug_0 dump dir is empty, no files to compare.") + if not util.empty_dir(cfg.TF_DUMP_DIR): + self.log.info("Tf dump dir is not empty, will compare npu dump data with tf dump data.") + self.compare.npu_tf_vector_compare(self.graph_manager.get_graphs(Constant.DEFAULT_DEBUG_ID), + debug_0_dump_root, cfg.TF_DUMP_DIR, result_dir) + else: + self.log.warning("Tf dump dir is empty, maybe run [python3 precision_tool/cli.py tf_dump] to decode" + " tf debug data.") + debug_1_dump_root = self.dump_manager.get_dump_root_dir(Constant.NPU_DEBUG_ID_1) + if debug_1_dump_root is not None and not util.empty_dir(debug_1_dump_root): + self.log.info("NPU debug_1 dump dir is not empty, will compare two npu dump data.") + self.compare.npu_vector_compare(debug_0_dump_root, debug_1_dump_root) + else: + lh_path = args.lt + rh_path = args.rt + graph_json = args.graph + self.compare.vector_compare(lh_path, rh_path, result_dir, graph_json) + self.compare.vector_summary(result_dir) + + @catch_tool_exception + def do_vector_compare_summary(self, argv=None): + parser = argparse.ArgumentParser(description="show vector compare result summary.") + parser.add_argument('-f', '--file', dest='file', default=None, required=False, help='compare_result file/path') + parser.add_argument('-c', '--cos_sim', dest='cos_sim', type=float, help='cos_sim_threshold', default=0.98) + parser.add_argument('-l', '--limit', dest='limit', type=int, help='limit', default=3) + args = parser.parse_args() if argv is None else parser.parse_args(argv) + error_ops = self.compare.vector_summary(args.file, args.cos_sim, args.limit) + # parse error_ops + + @catch_tool_exception + def do_print_data(self, argv=None): + """print tensor data""" + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--name', dest='name', default='', help='list by op name') + args = parser.parse_args() if argv is None else parser.parse_args(argv) + self.dump_manager.print_tensor(args.name, True) + + @catch_tool_exception + def do_list_nodes(self, argv): + """list op nodes in graph""" + parser = argparse.ArgumentParser() + parser.add_argument('-t', '--type', dest='type', default='', help='list by op type') + parser.add_argument('-n', '--name', dest='name', default='', help='list by op name') + parser.add_argument('-f', '--fusion', dest='fusion', default='', help='list by op fusion pass') + parser.add_argument('-k', '--kernel_name', dest='kernel_name', default='', help='list by op kernel_name') + args = parser.parse_args(argv) + self.graph_manager.print_op_list(args.type, args.name, args.fusion, args.kernel_name) + + @catch_tool_exception + def do_node_info(self, argv): + """Print op node info""" + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--name', dest='name', default='', help='op name') + parser.add_argument('-g', '--graph', dest='graph', help='graph name') + parser.add_argument('-a', '--attr', dest='attr', action='store_true', help='show all attr info') + parser.add_argument('-c', '--check', dest='check ', action='store_true', help='check single op precision') + parser.add_argument('-s', '--save', dest='save', type=int, default=0, + help='save subgraph, param gives the deep of subgraph') + args = parser.parse_args(argv) + # print graph op info + npu_ops, _ = self.graph_manager.get_ops(args.name, args.graph) + npu_op_summary, tf_op_summary = self.graph_manager.op_graph_summary(npu_ops, args.attr) + npu_dump_summary, tf_dump_summary = self.dump_manager.op_dump_summary(npu_ops) + pt_dump_summary = self.dump_manager.pt_dump_summary(args.name) + # merge graph/dump/compare info + for debug_id, graph_summary in npu_op_summary.items(): + for graph_name, summary_detail in graph_summary.items(): + summary_txt = [summary_detail] + if debug_id in npu_dump_summary and graph_name in npu_dump_summary[debug_id]: + summary_txt.append(npu_dump_summary[debug_id][graph_name]) + if tf_dump_summary is not None: + summary_txt.append(tf_dump_summary) + title = "[green](%s)[/green] %s" % (debug_id, graph_name) + util.print_panel(Constant.NEW_LINE.join(summary_txt), title) + if pt_dump_summary != '': + util.print_panel(pt_dump_summary, args.name) + if args.save != 0: + self.graph_manager.save_sub_graph(npu_ops, args.save) + + @catch_tool_exception + def do_compare_data(self, argv): + """compare two tensor""" + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--name', dest='names', type=str, default=[], help='op name', nargs='+') + parser.add_argument('-p', '--print', dest='count', default=20, type=int, help='print err data num') + parser.add_argument('-s', '--save', dest='save', action='store_true', help='save data in txt format') + parser.add_argument('-al', '--atol', dest='atol', default=0.001, type=float, help='set rtol') + parser.add_argument('-rl', '--rtol', dest='rtol', default=0.001, type=float, help='set atol') + args = parser.parse_args(argv) + if len(args.names) != 2: + self.log.error("compare files should be 2.") + else: + self.compare.compare_data(args.names[0], args.names[1], args.save, args.rtol, args.atol, args.count) + + @catch_tool_exception + def do_list_dump(self, argv): + """List dump files""" + parser = argparse.ArgumentParser() + parser.add_argument('-t', '--type', dest='type', help='') + parser.add_argument('-n', '--name', dest='name') + self.dump_manager.list_dump(argv.dir, argv.name) + + @catch_tool_exception + def do_convert_npu_dump(self, argv): + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--name', dest='name', help='op name') + parser.add_argument('-f', '--format', dest='format', default=None, required=False, help='target format') + parser.add_argument('-o', '--output', dest='output', required=False, default=None, help='output path') + args = parser.parse_args(argv) + self.dump_manager.convert_npu_dump(args.name, args.format, args.output) + + @catch_tool_exception + def do_convert_all_npu_dump(self): + self.dump_manager.decode_all_npu_dump() + + @catch_tool_exception + def check_graph_similarity(self): + """ Check graph similarity """ + + @catch_tool_exception + def do_train_analysis(self, argv): + parser = argparse.ArgumentParser() + parser.add_argument('-d', '--device', dest='device', default='all', required=False, + help='train device, support cpu/npu/all') + parser.add_argument('-a', '--action', dest='action', default='dump', required=False, + help='action, support dump(-d cpu/npu)[overflow]|fusion_off|fusion_switch(npu)') + args = parser.parse_args(argv) + self.train_analysis.run(args.device, args.action) + + def single_cmd(self, argv): + cmd_func_map = {'compare': self.do_compare_data, + 'vector_compare': self.do_vector_compare, + 'train': self.do_train_analysis} + if argv[1] in cmd_func_map: + func = cmd_func_map[argv[1]] + return func(argv[2:]) + raise PrecisionToolException("cmd %s is not supported or cmd should be run in interactive mode." % argv[1]) -- Gitee From 2fdf42531701b1ff7fca425d2af3fc9627cefb80 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 05:45:25 +0000 Subject: [PATCH 12/38] 1 Signed-off-by: huangju1993 --- .../examples/precision_tool/README.md | 518 ++++++++++++++++++ .../examples/precision_tool/RELEASE.md | 66 +++ .../examples/precision_tool/__init__.py | 0 .../examples/precision_tool/caffe_dump.py | 132 +++++ .../examples/precision_tool/cli.py | 129 +++++ .../examples/precision_tool/tf_config.py | 118 ++++ .../examples/precision_tool/tf_session.py | 60 ++ 7 files changed, 1023 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/README.md create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/RELEASE.md create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/__init__.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/caffe_dump.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/cli.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_config.py create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_session.py diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/README.md b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/README.md new file mode 100644 index 000000000..3f291afff --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/README.md @@ -0,0 +1,518 @@ +# 精度问题分析工具 + +## 功能介绍 +### 简介 +该工具包提供了精度比对常用的功能,当前该工具主要适配Tensorflow训练场景 + +对于常用功能基本可以做到一键操作,同时提供Dump数据/图信息的交互式查询和操作入口 + +推理场景可直接使用[推理一键式全流程精度比对](https://gitee.com/ascend/tools/tree/master/msquickcmp) 工具 +### 主要功能 +#### 已完成功能 +1. 简化脚本修改【手动/半自动】 +2. TF标杆数据生成【自动/半自动】 +3. 算子溢出检测分析【自动】 +4. 开启GE图Dump和图解析【自动】 +5. 开启数据Dump并进行全网比对【自动】 +6. 查询算子列表/节点信息【手动】 +7. 查询/解析Dump数据信息【手动】 +8. 数据比对【手动】 +### 工具获取 +1. 下载压缩包的方式获取 + 将https://gitee.com/ascend/tools 以压缩包形式下载 +2. 使用git命令方式获取 +3. 移动 tools/precision_tool 子目录至训练工作目录 +### 安装python3三方依赖 +```shell +pip3 install rich gnureadline pexpect graphviz +# ubuntu/Debian +sudo apt-get install graphviz +# fedora/Centos +sudo yum install graphviz +``` +### 工具执行依赖 +* 一般直接在NPU训练环境上部署该脚本,环境上能够正常执行CPU和NPU训练脚本 +* 如果需要进行数据Dump比对,则需要先检查并去除训练脚本内部使用到的随机处理,避免由于输入数据不一致导致数据比对结果不可用 + ```python + # 对于使用tf.random / np.random / (python) random的可以通过固定随机种子的方式固定输入 + # import tf_config.py 默认会设置上述三种random的seed,但由于import位置关系,可能不一定能作用到所有的关联代码,建议在代码确认合适位置手动嵌入 + seed =987654 + random.seed(seed) + tf.random.set_random_seed(seed) + np.random.seed(seed) + + # RunConfig/NPURunConfig中设置tf_random_seed固定网络随机因子 + # Estimator中tf.random设置的随机种子并不能全局生效 + # 需要使用下面的方式进行设置 + run_config = tf.estimator.RunConfig(tf_random_seed=1, ...) + run_config = NPURunConfig(tf_random_seed=1, ...) + ``` + * **理论上网络中的大多数随机均能通过上面的方式固定, 一般不需要再做下面的这些操作** + ```python + # 1. 参数初始化中的随机操作 + # 加载checkpoint的方式能够固定大多数初始参数 + saver.restore(sess, saver_dir) + + # 2. 输入数据的随机操作(例如对输入数据做shuffle操作) + dataset = tf.data.TFRecordDataset(tf_data) + dataset = dataset.shuffle(batch_size*10) # 直接注释掉该行 + + # 3. 模型中的随机操作(例如使用dropout) + net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') # 建议注释该行 + + # 4. 图像预处理使用的随机操作(根据实际情况固定随机种子,或者替换成其他固定的预处理操作) + # 4.1 Random rotate + random_angle = tf.random_uniform([], - self.degree * 3.141592 / 180, self.degree * 3.141592 / 180) + image = tf.contrib.image.rotate(image, random_angle, interpolation='BILINEAR') + depth_gt = tf.contrib.image.rotate(depth_gt, random_angle, interpolation='NEAREST') + + # 4.2 Random flipping + do_flip = tf.random_uniform([], 0, 1) + image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(image), lambda: image) + depth_gt = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(depth_gt), lambda: depth_gt) + + # 4.3 Random crop + mage_depth = tf.concat([image, depth_gt], 2) + image_depth_cropped = tf.random_crop(image_depth, [self.params.height, self.params.width, 4]) + + # 其他...... + ``` +* 该工具基于**NPU的计算图**,**NPU的DUMP数据**,**NPU的溢出检测数据**,**TF的计算图meta文件**,**TF的DUMP数据**进行数据解析和分析。 +这几类依赖数据可以通过以下方式获取(只使用部分工具功能并不需要提前获取所有依赖数据): +#### 1. NPU的计算图获取 + ``` + 注意:NPU的Dump数据和计算图存在一定的对应关系,需要同时获取 + 避免在自定义的训练脚本中unset DUMP GRAPH相关的环境变量 + ``` +* 【推荐】方法一:配置2、3依赖中的NPU数据Dump或者overflow检测功能,将自动配置上Dump GE图的环境变量 + +* 【不推荐】方法二:参考迁移指导中的修改配置,执行NPU脚本,并将获取到的图转存至precision_data图目录 + ```shell + export DUMP_GE_GRAPH=2 + export DUMP_GRAPH_LEVEL=3 + export DUMP_GRAPH_PATH=./precision_data/npu/debug_0/graph + # 未配置DUMP_GRAPH_PATH时,图文件将保存在脚本执行目录,可以直接转存至precision_data目录 + mkdir -p ./precision_data/npu/debug_0/graph && mv ge_proto_*.txt ./precision_data/npu/debug_0/graph + ``` +#### 2. NPU的DUMP数据获取 +* 【推荐】方法一:在训练脚本中**import precision_tool.tf_config**,并使用precision_tool中提供的辅助命令行执行训练脚本 + ``` python + # NPU的DUMP获取和溢出检测数据的获取,均可按如下方式修改代码 + # 注意:参数action可以设置为'dump'或'overflow' + # 引用 precision_tool/tf_config.py + import precision_tool.tf_config as npu_tf_config + + # 如果使用的是Estimator的NPURunConfig配置使能NPU,则可以参考以下修改 + dump_config = npu_tf_config.estimator_dump_config(action='dump') # 新增行 + npu_config = NPURunConfig(dump_config=dump_config) + + # 如果使用的是session.run或者使用tf.ConfigProto创建session_config传入tf.estimator.RunConfig的方式使能npu + # 可以参考如下修改 + session_config = npu_tf_config.session_dump_config(session_config, action='dump') # 新增行 + # tf.estimator + run_config = tf.estimator.RunConfig(session_config=session_config,...) + # tf.keras + npu_keras_sess = set_keras_session_npu_config(config=session_config) + # session run + with tf.Session(config=npu_config_proto(session_config)): + ...... + + # 如果使用的是custom_op方式,则可以参考以下修改 + config = tf.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["use_off_line"].b = True + custom_op = npu_tf_config.update_custom_op(custom_op, action='dump') # 新增行 + ``` + +* 【不推荐】方法二:参考[精度比对工具使用指南](https://www.hiascend.com/document?tag=community-developer) 修改训练脚本。 + 执行训练脚本,并将dump的数据拷贝到【precision_data/dump/npu/】目录 +#### 3. NPU的溢出检测数据的获取(缺少该数据将无法展示溢出检测结果) +* 【推荐】方法一:在训练脚本中**import precision_tool.tf_config**,并按【2. NPU的DUMP数据获取】中修改训练代码,使用precision_tool中提供的辅助命令行执行训练脚本 + ```python + # 需要将action设置成'overflow' + # 引用 precision_tool/tf_config.py + import precision_tool.tf_config as npu_tf_config + dump_config = npu_tf_config.estimator_dump_config(action='overflow') # 新增行 + ``` +* 【不推荐】方法二:参考[使用溢出检测工具分析算子溢出](https://www.hiascend.com/document?tag=community-developer) 修改训练脚本, + 并将溢出数据拷贝至【precision_tool/dump/overflow/】目录 + +#### 4. TF的DUMP数据获取(缺少该数据无法使用数据比对功能)(适用于TF 1.15, TF2.x参考tfdbg_ascend) +* 【推荐】方法一:在CPU/GPU训练脚本中添加tf_debug代码,并使用precision_tool中提供的辅助命令行工具生成标杆DUMP数据 + ```python + import precision_tool.tf_config as npu_tf_config + + # 如果使用的是Estimator,EstimatorSpec加入training_hooks + estim_specs = tf.estimator.EstimatorSpec(training_hooks=[npu_tf_config.estimator_dump()]) + + # 如果使用的session.run,以下代码在为sess加上了tf_debug的wrapper + sess = npu_tf_config.sess_dump(sess=sess) + ``` + ```shell + # 1. 执行脚本 + # 2. 解析tf debug dump文件,生成算子输出tensor文件 + # 注意:TF dump数据的原理是使用tf_debug的print_tensor(pt)命令实现的,由于训练代码提供了非常灵活的run()接口, + # 脚本无法感知用户需要dump的tensor在哪个run阶段,因此需要用户修改训练代码,在执行完正确的run后,立即退出。 + # 例如,修改代码只执行一个step的训练,根据代码中run的次数,会获取到1~N个离线tf_debug的dump目录 + # precision_tool脚本会自动提取最后一个run阶段中出现的所有tensor作为标杆数据。 + python3.7.5 precision_tool/cli.py tf_dump + + # 在precision_data/tf/dump/ 目录会存放提取的tensor + # 如果获取tensor不符合预期,可以检查下precision_data/dump/cpu_debug/目录, 只保留预期run阶段的tf_debug离线数据 + # 执行以下命令重新生成 + rm -rf precision_data/tf/dump/* && python3.7.5 precision_tool/cli.py tf_dump + ``` +* 【不推荐】方法二:参考[准备基于GPU/CPU运行生成的npy数据](https://www.hiascend.com/document?tag=community-developer) + 获取CPU/GPU的TF数据,并拷贝至【precision/dump/cpu/】目录 +#### 5. TF计算图Meta文件的获取(可选) +* 通过saver保存ckpt获取 + ```python + # 修改CPU/NPU脚本 + with tf.Session() as sess: + # do session.run() + saver = tf.train.Saver() + # 保存ckpt + saver.save(sess, saver_dir) + ``` +#### 6. 关闭NPU的融合功能(根据情况启用) +* NPU会对计算图中的算子进行融合,以提高网络性能,由于大多数融合是自动识别的,可能存在未考虑到的场景,导致精度问题, + 因此,可以尝试关闭融合定界网络问题是否是由于融合导致。 + ```python + # 关闭融合可以和溢出检测/数据Dump同时进行,启用方法也类似 + # 引用 precision_tool/tf_config.py + import precision_tool.tf_config as npu_tf_config + + # 如果使用的是Estimator的NPURunConfig配置使能NPU,则可以参考以下修改 + npu_config = NPURunConfig(fusion_switch_file=npu_tf_config.FUSION_OFF_FILE) # 修改行 + # 如果需要关闭指定的融合规则,则可以修改precision_tool/fusion_switch.cfg, 并参考如下修改 + npu_config = NPURunConfig(fusion_switch_file=npu_tf_config.FUSION_SWITCH_FILE) # 关闭特定融合修改行 + + # 如果使用的是session.run或者使用tf.ConfigProto创建session_config传入tf.estimator.RunConfig的方式使能npu + # 可以参考如下修改(数据Dump和关闭融合同时使能) + session_config = npu_tf_config.session_dump_config(session_config, action='dump|fusion_off') # 新增行 + session_config = npu_tf_config.session_dump_config(session_config, action='dump|fusion_switch') # 关闭特定融合新增行 + # tf.estimator + run_config = tf.estimator.RunConfig(session_config=session_config,...) + # tf.keras + npu_keras_sess = set_keras_session_npu_config(config=session_config) + # session run + with tf.Session(config=npu_config_proto(session_config)): + ...... + # 如果有custom_op,也可以直接使用下面的方式配置 + custom_op = npu_tf_config.update_custom_op(custom_op=custom_op, action='dump | fusion_off') + ``` +## 使用说明 +1. 配置文件precision_tool/config.py(正常默认即可) + ```python + # 如果需要dump特定曾的数据,则可以修改以下配置项 + # 一般对比分析dump首层即可 + # Dump config '0|5|10' + TF_DUMP_STEP = '0' + + # 融合开关配置,可以再该配置文件中配置融合开关的开启和关闭,使用方法参考以下文档: + # https://support.huaweicloud.com/tensorflowdevg-cann330alphaXtraining/atlastfadapi_07_0005.html + FUSION_SWITCH_FILE = './precision_tool/fusion_switch.cfg' + + # 依赖run包中的atc和msaccucmp.pyc工具,一般在run包安装目录,配置到父目录即可 + # 默认run包安装在/usr/local/Ascend,可以不用修改。指定目录安装则需要修改 + # parent dir path of msaccucmp.pyc and atc, usually run package dir + CMD_ROOT_PATH = '/usr/local/' + + # ASCEND Log Path + ASCEND_LOG_PATH = '/root/ascend/log/plog/' + + # 日志级别及数据分析目录设置 + # TOOL CONFIG + LOG_LEVEL = "NOTSET" + # ModelArts场景下,可以根据情况将数据跟目录修改成自定义目录,并在完成后完整下载该目录 + ROOT_DIR = './' + ``` +2. 启动脚本(交互命令行) + ```shell + python3 ./precision_tool/cli.py + ``` + +### 交互模式命令 +1. ac -l [limit_num] -c + ```shell + # auto check. 自动化检测命令 + # 列出Fusion信息;解析算子溢出信息; + # -c 可选,进行全网比对 + # -l 可选,限制输出结果的条数(overflow解析的条数等) + PrecisionTool > ac -c + ╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ + │ [TransData][327] trans_TransData_1170 │ + │ - [AI Core][Status:32][TaskId:327] ['浮点计算有溢出'] │ + │ - First overflow file timestamp [1619347786532995] - │ + │ |- TransData.trans_TransData_1170.327.1619347786532995.input.0.npy │ + │ |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.11950836181640626] │ + │ |- TransData.trans_TransData_1170.327.1619347786532995.output.0.npy │ + │ |- [Shape: (32, 20, 8, 8, 16)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.07781982421875] │ + ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ + ``` +2. run [command] + ```shell + # 不退出交互命令环境执行shell命令,与内置命令不冲突的可以直接执行,否则需要加run前缀 + PrecisionTool > run vim cli.py + PrecisionTool > vim cli.py + ``` + +3. ls -n [op_name] -t [op_type] -f [fusion_pass] -k [kernel_name] + ```shell + # 通过[算子名]/[算子类型]查询网络里的算子,模糊匹配 + # -n 算子节点名称 + # -t 算子类型 + # -f 融合类型 + # -k kernel_name + PrecisionTool > ls -t Mul -n mul_3 -f TbeMulti + [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5b/Branch_1/mul_3 + [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5c/Branch_1/mul_3 + [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5d/Branch_1/mul_3 + [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_6b/Branch_1/mul_3 + ``` + +4. ni (-n) [op_name] -s [save sub graph deep] + ```shell + # 通过[算子名]查询算子节点信息 + # -n 指定节点名称 + # -g graph名 + # -a 显示attr信息 + # -s 保存一个以当前算子节点为根,深度为参数值的子图 + PrecisionTool > ni gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual -s 3 + ╭─────────────────── [GreaterEqual]gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual ────────────────────╮ + │ [GreaterEqual] gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual │ + │ Input: │ + │ -[0][DT_FLOAT][NHWC][32, 8, 8, 320] InceptionV3/InceptionV3/Mixed_7a/Branch_0/add_3:0 │ + │ -[1][DT_FLOAT][NHWC][1, 8, 1, 1] InceptionV3/Mixed_7a/Branch_0/Conv2d_1a_3x3tau:0 │ + │ -[2][][[]][] atomic_addr_clean0_21:-1 │ + │ Output: │ + │ -[0][DT_BOOL][NHWC][32, 8, 8, 320] ['trans_TransData_1170'] │ + │ NpuDumpInput: │ + │ -[0] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.input.0.npy │ + │ |- [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.846897] [Min: -8.368301] [Mean: -0.72565556] │ + │ -[1] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.input.1.npy │ + │ |- [Shape: (1, 8, 1, 1)] [Dtype: float32] [Max: 0.0] [Min: 0.0] [Mean: 0.0] │ + │ NpuDumpOutput: │ + │ -[0] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.output.0.npy │ + │ |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.1176300048828125] │ + │ CpuDumpOutput: │ + │ -[0] gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.0.1619492699305998.npy │ + │ |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.11764373779296874] │ + ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + 2021-04-27 14:39:55 (15178) -[DEBUG]write 14953 bytes to './precision_data/dump/temp/op_graph/GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.3.gv' + 2021-04-27 14:39:55 (15178) -[INFO]Sub graph saved to /root/sym/inception/precision_data/dump/temp/op_graph + ``` + +5. pt (-n) [*.npy] + ```shell + # 查看某个dump数据块的数据信息 + # -n 可选,含义是待查看的数据文件名 + # 默认会将数据保存成 txt + PrecisionTool > pt TransData.trans_TransData_1170.327.1619347786532995.input.0.npy + ╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ + │ Shape: (32, 8, 8, 320) │ + │ Dtype: bool │ + │ Max: True │ + │ Min: False │ + │ Mean: 0.11950836181640626 │ + │ Path: ./precision_data/dump/temp/overflow_decode/TransData.trans_TransData_1170.327.1619347786532995.input.0.npy │ + │ TxtFile: ./precision_data/dump/temp/overflow_decode/TransData.trans_TransData_1170.327.1619347786532995.input.0.npy.txt │ + ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + ``` + +6. cp (-n) [left *.npy] [right *.npy] -p [print num] -al [atol] -rl [rtol] + ```shell + # 对比两个tensor的数据 + # -n 指定需要对比的两个numpy名 + # -p 指定输出的错误数据的个数及前多少个数据 + # -al/rl 指定相对误差的参数,在两个场景中用到 + # -s 保存成txt文件,默认打开 + # 1. np.allclose(left, right, atol=al, rtol=rl) + # 2. err_cnt += 1 if abs(data_left[i] - data_right[i]) > (al + rl * abs(data_right[i])) + PrecisionTool > cp Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy -p 10 -s -al 0.002 -rl 0.005 + Error Item Table Top Item Table + ┏━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ ┏━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ + ┃ Index ┃ Left ┃ Right ┃ Diff ┃ ┃ Index ┃ Left ┃ Right ┃ Diff ┃ + ┡━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ ┡━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ + │ 155 │ 0.024600908 │ 0.022271132 │ 0.002329776 │ │ 0 │ -0.9206961 │ -0.9222216 │ 0.0015255213 │ + │ 247 │ 0.015752593 │ 0.017937578 │ 0.0021849852 │ │ 1 │ -0.6416973 │ -0.64051837 │ 0.0011789203 │ + │ 282 │ -0.0101207765 │ -0.007852031 │ 0.0022687456 │ │ 2 │ -0.35383835 │ -0.35433492 │ 0.0004965663 │ + │ 292 │ 0.019581757 │ 0.02240482 │ 0.0028230622 │ │ 3 │ -0.18851271 │ -0.18883198 │ 0.00031927228 │ + │ 640 │ -0.06593232 │ -0.06874806 │ 0.0028157383 │ │ 4 │ -0.43508735 │ -0.43534422 │ 0.00025686622 │ + │ 1420 │ 0.09293677 │ 0.09586689 │ 0.0029301196 │ │ 5 │ 1.4447614 │ 1.4466647 │ 0.0019032955 │ + │ 1462 │ -0.085207745 │ -0.088047795 │ 0.0028400496 │ │ 6 │ -0.3455438 │ -0.3444429 │ 0.0011008978 │ + │ 1891 │ -0.03433288 │ -0.036525503 │ 0.002192624 │ │ 7 │ -0.6560242 │ -0.6564579 │ 0.0004336834 │ + │ 2033 │ 0.06828873 │ 0.07139922 │ 0.0031104907 │ │ 8 │ -2.6964858 │ -2.6975214 │ 0.0010356903 │ + │ 2246 │ -0.06376442 │ -0.06121233 │ 0.002552092 │ │ 9 │ -0.73746175 │ -0.73650354 │ 0.00095820427 │ + └───────┴───────────────┴──────────────┴──────────────┘ └───────┴─────────────┴─────────────┴───────────────┘ + ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ + │ Left: │ + │ |- NpyFile: ./precision_data/dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy │ + │ |- TxtFile: ./precision_data/dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy.txt │ + │ |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.846897] [Min: -8.368301] [Mean: -0.72565556] │ + │ DstFile: │ + │ |- NpyFile: ./precision_data/dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy │ + │ |- TxtFile: ./precision_data/dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy.txt │ + │ |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.8425903] [Min: -8.374472] [Mean: -0.7256237] │ + │ NumCnt: 655360 │ + │ AllClose: False │ + │ CosSim: 0.99999493 │ + │ ErrorPer: 0.023504638671875 (rl= 0.005, al= 0.002) │ + ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + ``` + +7. vc -lt [left_path] -rt [right_path] -g [graph] + ```python + # 用于手动指定两个目录进行整网对比 + # -lt 必选,其中一个文件目录 + # -rt 必选,另一个目录,一般是标杆目录 + # -g 可选,指定-g将尝试解析graph内的映射关系比对(一般用于NPU和TF之间的数据比对, NPU与NPU之间比对不需要,直接按照算子name对比) + # 需要指定到dump数据所在的目录层级,precision_data/npu/debug_0/dump/20220217095546/3/ge_default_20220217095547_1/1/0/ + ``` +8. vcs -f [file_name] -c [cos_sim_threshold] -l [limit] + ```python + # 查看精度比对结果的概要信息,可以更加预先相似的阈值过滤出低于阈值的算子/信息 + # -f (--file) 可选,指定csv文件,不设置则默认遍历precision_data/temp/vector_compare/目录下最近产生的对比目录内的所有csv + # -c (--cos_sim) 可选,指定筛选所使用的预先相似度阈值,默认0.98 + # -l (--limit) 可选,指定输出前多少个结果,默认值3 + PrecisionTool > vcs -c 0.98 -l 2 + 2021-05-31 14:48:56 (2344298) -[INFO]Sub path num:[1]. Dirs[['20210529145750']], choose[20210529145750] + 2021-05-31 14:48:56 (2344298) -[DEBUG]Find ['result_20210529145751.csv', 'result_20210529145836.csv', 'result_20210529145837.csv', 'result_20210529145849.csv', 'result_20210529150404.csv', 'result_20210529151102.csv'] result files in dir precision_data/temp/vector_compare/20210529145750 + 2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145751.csv + 2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145836.csv + 2021-05-31 14:48:56 (2344298) -[INFO]Find 1 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145837.csv + 2021-05-31 14:48:56 (2344298) -[INFO]Find 2 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145849.csv + 2021-05-31 14:48:56 (2344298) -[INFO]Find 2 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529150404.csv + 2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529151102.csv + ╭── [578] pixel_cls_loss/cond_1/TopKV2 ───╮ + │ Left: ['pixel_cls_loss/cond_1/TopKV2'] │ + │ Right: ['pixel_cls_loss/cond_1/TopKV2'] │ + │ Input: │ + │ - [0]1.0 - [1]nan │ + │ Output: │ + │ - [0]0.999999 - [1]0.978459 │ + ╰─────────────────────────────────────────╯ + ╭── [490] gradients/AddN_5 ───╮ + │ Left: ['gradients/AddN_5'] │ + │ Right: ['gradients/AddN_5'] │ + │ Input: │ + │ - [0]nan - [1]1.0 │ + │ Output: │ + │ - [0]0.05469 │ + ╰─────────────────────────────╯ + ``` +### Precision_data目录结构 +``` +precision_data/ +├── npu +│ ├── debug_0 +| | ├── dump +| | └── 20210510101133 +| │ └── graph +| | └── ge_proto_00000179_PreRunAfterBuild.txt +│ └── debug_1 +├── tf +| ├── tf_debug +| └── dump +├── overflow +├── fusion +└── temp + ├── op_graph + ├── decode + | ├── dump_decode + | ├── overflow_decode + | └── dump_convert + └── vector_compare + ├── 20210510101133 + | ├── result_123456.csv + | └── result_123455.csv + └── 20210510101134 + └── result_123458.csv +``` +### 配合msquickcmp一键式推理精度比对工具使用 +- msquickcmp会将中间dump数据和图自动保存在一个时间戳命名的目录内, 可以使用precision_tool工具直接对该目录进行分析 +```python +output-path/timestamp +├── dump_data +├── input +├── model +├── result_2021211214657.csv +└── tmp +``` +- 修改配置 +```python +# file precision_tool/config.py +# [train/infer] if adapt from msquickcmp result, set net type to infer +NET_TYPE = 'infer' +``` +- 执行以下命令 +```shell +# 前提条件: +# 当前目录没有precision_data目录(导入过程会新创建一个precision_data,用于保存导入数据) +# 只有第一次需要使用infer子命令导入,后续直接python3 precision_tool/cli.py +python3 precision_tool/cli.py infer output-path/timestamp +``` + +### 基于checkpoint进行训练精度分析 +#### 获取checkpoint和网络数据数据 +```python +from precision_tool.tf_session import PrecisionTfSession +with PrecisionTfSession() as sess: + sess.run() +# 执行完成后,将在precision_data/tf/checkpoint 目录生成一个checkpoint +# 在precision_data/tf/checkpoint/inputs目录保存[input_tensor_name].npy的输入数据 +``` + +#### 使用【train】命令进行cpu和npu dump数据的获取 +```shell +# train -d [all/npu/cpu] -a [dump|fusion_off|overflow] +python3 precision_tool/cli.py train -d all -a dump +``` + +### TF脚本修改参考 + +```python +# 打印动态Scale的Loss值 +loss_scale_manager = ExponentialUpdateLossScaleManager() +scale_v = sess.run([loss_scale_manager.get_loss_scale()]) +print(">>> Current Loss Scale >>> ", scale_v) + + +with tf.Session() as sess: + # do session.run() + saver = tf.train.Saver() + # 保存ckpt + saver.save(sess, saver_dir) + # ... + # 从ckpt恢复 + saver.restore(sess, saver_dir) + # ... + # 保存Tensorboard + summary_writer = tf.summary.FileWriter(logdir=log_dir, graph=sess.graph) + +``` + +### F&Q +1. 安装gnureadline报错找不到lncurses + ```shell + /usr/bin/ld: cannot find -lncurses + collect2: error: ld returned 1 exit status + error: command 'gcc' failed with exit status 1 + ``` + ```shell + # 先尝试在本地查找libncurses.so* + find / -name libncurses.so* + # 如果能找到以下文件,直接创建一个libncurses.so指向libncurses.so.5即可,否则需要用包管理工具安装ncurses + /usr/lib64/libncurses.so.5 + /usr/lib64/libncurses.so.5.9 + /usr/lib64/libncursesw.so.5 + # 创建软连接 + ln -s /usr/lib64/libncurses.so.5.9 /usr/lib64/libncurses.so + ``` +#### 参与贡献 + +1. Fork 本仓库 +2. 新建 Feat_xxx 分支 +3. 提交代码 +4. 新建 Pull Request \ No newline at end of file diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/RELEASE.md b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/RELEASE.md new file mode 100644 index 000000000..6ebb752c4 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/RELEASE.md @@ -0,0 +1,66 @@ +#Release 0.1.11 +## Update +* 兼容csv mapping解析summary dump数据 +* 新增打印输入输出地址偏移 +* TF2.x dump时默认保存输入和输出 + +#Release 0.1.10 +## Update +* 兼容新的opdebug dump格式 + + +#Release 0.1.9 +## Update +* 兼容新的summary dump的csv格式 + +#Release 0.1.8 +## Update +* 兼容新的fusion_result格式 + +#Release 0.1.7 +## Update +* 兼容新平台溢出检测数据解析 + +#Release 0.1.6 +## Update +* 支持获取profiling数据 +* 兼容一些溢出监测dump数据的修改 +* 修复溢出检测数据解析问题 + +# Release 0.1.5 +## Update +* 修复PT命令重复打屏的问题 +* 模糊匹配溢出检测算子名 +* 修复PT Dump的H5文件解析的一些问题 + +# Release 0.1.4 +## Update +* 完善训练随机固定场景 + + +# Release 0.1.3 +## Update +* 支持解析Torch Dump的H5数据 + +# Release 0.1.2 +## Update +* 适配部分dump数据格式 + +# Release 0.1.1 +## Features +* 新增NpuPrintLossScaleCallBack,用于TF2.x下打印scale值 +* 新增自动查找子图Data节点真实输入节点功能 + +## Update +* 优化部分推理场景自动对比目录名和graph名不匹配的场景识别逻辑 + +## Bugfix +* 溢出错误码解析崩溃bugfix + + +# Release 0.1.0 +## Feature +* 新增基于Checkpoint加载执行网络精度对比的能力 + +## Update +* 优化目录组织结构 \ No newline at end of file diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/__init__.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/caffe_dump.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/caffe_dump.py new file mode 100644 index 000000000..3c1b5982d --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/caffe_dump.py @@ -0,0 +1,132 @@ +# coding=utf-8 +""" +Source code: + https://bbs.huaweicloud.com/blogs/181056 +Example: + python3.7 caffe_dump.py -m resnet50.prototxt -w resnet50.caffemodel -i test.bin -n 'data:0' -o ./output_dir + +Guide for setting up Caffe/Tensorflow precision golden data generation environments: + https://bbs.huaweicloud.com/blogs/181059 +""" +import caffe +import sys +import argparse +import os +import caffe.proto.caffe_pb2 as caffe_pb2 +import google.protobuf.text_format +import json +import numpy as np +import time + +TIME_LENGTH = 1000 +FILE_PERMISSION_FLAG = 0o600 + + +class CaffeProcess: + def __init__(self): + parse = argparse.ArgumentParser() + parse.add_argument("-w", dest="weight_file_path", + help=" the caffe weight file path", + required=True) + parse.add_argument("-m", dest="model_file_path", + help=" the caffe model file path", + required=True) + parse.add_argument("-o", dest="output_path", help=" the output path", + required=True) + parse.add_argument("-i", "--input_bins", dest="input_bins", help="input_bins bins. e.g. './a.bin;./c.bin'", + required=True) + parse.add_argument("-n", "--input_names", dest="input_names", + help="input nodes name. e.g. 'graph_input_0:0;graph_input_0:1'", + required=True) + args, _ = parse.parse_known_args(sys.argv[1:]) + self.weight_file_path = os.path.realpath(args.weight_file_path) + self.model_file_path = os.path.realpath(args.model_file_path) + self.input_bins = args.input_bins.split(";") + self.input_names = args.input_names.split(";") + self.output_path = os.path.realpath(args.output_path) + self.net_param = None + self.cur_layer_idx = -1 + + @staticmethod + def _check_file_valid(path, is_file): + if not os.path.exists(path): + print('Error: The path "' + path + '" does not exist.') + exit(-1) + if is_file: + if not os.path.isfile(path): + print('Error: The path "' + path + '" is not a file.') + exit(-1) + else: + if not os.path.isdir(path): + print('Error: The path "' + path + '" is not a directory.') + exit(-1) + + def _check_arguments_valid(self): + self._check_file_valid(self.model_file_path, True) + self._check_file_valid(self.weight_file_path, True) + self._check_file_valid(self.output_path, False) + for input_file in self.input_bins: + self._check_file_valid(input_file, True) + + @staticmethod + def calDataSize(shape): + dataSize = 1 + for dim in shape: + dataSize *= dim + return dataSize + + def _load_inputs(self, net): + inputs_map = {} + for layer_name, blob in net.blobs.items(): + if layer_name in self.input_names: + input_bin = np.fromfile( + self.input_bins[self.input_names.index(layer_name)], np.float32) + input_bin_shape = blob.data.shape + if self.calDataSize(input_bin_shape) == self.calDataSize(input_bin.shape): + input_bin = input_bin.reshape(input_bin_shape) + else: + print("Error: input node data size %d not match with input bin data size %d.", self.calDataSize( + input_bin_shape), self.calDataSize(input_bin.shape)) + exit(-1) + inputs_map[layer_name] = input_bin + return inputs_map + + def process(self): + """ + Function Description: + process the caffe net, save result as dump data + """ + # check path valid + self._check_arguments_valid() + + # load model and weight file + net = caffe.Net(self.model_file_path, self.weight_file_path, + caffe.TEST) + inputs_map = self._load_inputs(net) + for key, value in inputs_map.items(): + net.blobs[key].data[...] = value + # process + net.forward() + + # read prototxt file + net_param = caffe_pb2.NetParameter() + with open(self.model_file_path, 'rb') as model_file: + google.protobuf.text_format.Parse(model_file.read(), net_param) + for layer in net_param.layer: + name = layer.name.replace("/", "_").replace(".", "_") + index = 0 + for top in layer.top: + data = net.blobs[top].data[...] + file_name = name + "." + str(index) + "." + str( + round(time.time() * 1000000)) + ".npy" + output_dump_path = os.path.join(self.output_path, file_name) + np.save(output_dump_path, data) + os.chmod(output_dump_path, FILE_PERMISSION_FLAG) + print('The dump data of "' + layer.name + + '" has been saved to "' + output_dump_path + '".') + index += 1 + + +if __name__ == "__main__": + caffe_process = CaffeProcess() + caffe_process.process() \ No newline at end of file diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/cli.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/cli.py new file mode 100644 index 000000000..f46368dee --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/cli.py @@ -0,0 +1,129 @@ +# coding=utf-8 +""" +cli +""" +import os +import sys + +from lib.precision_tool import PrecisionTool +from lib.train.train_analysis import TrainAnalysis +from lib.interactive_cli import InteractiveCli +from lib.util.precision_tool_exception import PrecisionToolException +from lib.util.util import util +from lib.dump.tf_dump import TfDump +from lib.adapter.msquickcmp_adapter import MsQuickCmpAdapter +from lib.adapter.offline_om_adapter import OfflineOmAdapter +from lib.config import config as cfg + +INTRODUCE_DOC = \ + "===============================\n" \ + "Usage:\n" \ + " Single mode:\n" \ + " Exp:\n" \ + " Dump TF data:\n" \ + " > python3.7.5 precision_tool/cli.py tf_dump \n" \ + " Adapt msquickcmp data:\n" \ + " > python3.7.5 precision_tool/cli.py infer [data path of msquickcmp output] \n" \ + " Interactive mode:\n" \ + " Exp:\n" \ + " Start command line:\n" \ + " > python3.7.5 precision_tool/cli.py\n" + + +def _run_tf_dbg_dump(cmdline): + """ Generate tf dump files with tf debug files.""" + tf_dump = TfDump() + tf_dump.run_tf_dbg_dump(cmdline) + + +def _unset_flags(): + if cfg.PRECISION_TOOL_OVERFLOW_FLAG in os.environ: + del os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG] + if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ: + del os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] + + +def _run_npu_dump(cmd): + """Deprecate function.""" + _unset_flags() + log = util.get_log() + os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] = 'True' + log.info("Start run NPU script with dump data.") + ret = util.execute_command(cmd) + log.info("Finish run NPU script with dump data. ret [%s]", ret) + _unset_flags() + + +def _run_npu_overflow(cmd): + """Deprecate function.""" + _unset_flags() + log = util.get_log() + os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG] = 'True' + log.info("Start run NPU script with overflow check process....") + ret = util.execute_command(cmd) + log.info("Finish run NPU script with overflow check process. ret [%s]", ret) + precision_tool = PrecisionTool() + precision_tool.prepare() + precision_tool.do_check_overflow() + _unset_flags() + + +def _run_infer_adapter(output_path): + """ Run precision_tool with msquickcmp output data + :param output_path: msquickcmp output path + :return: None + """ + if OfflineOmAdapter.validate(output_path): + adapter = OfflineOmAdapter(output_path) + else: + adapter = MsQuickCmpAdapter(output_path) + adapter.run() + _run_interactive_cli() + + +def _run_interactive_cli(cli=None): + """ Run precision_tool in interactive mode + :param cli: + :return: + """ + util.get_log().info("Interactive command mode.") + if cli is None: + cli = InteractiveCli() + try: + cli.cmdloop(intro="Enjoy!") + except KeyboardInterrupt: + util.get_log().info("Bye.......") + + +def _run_cli_with_data(data_path): + """ Run precision with specific data path, default is precision_data.""" + cfg.DATA_ROOT_DIR = data_path + _run_interactive_cli() + + +function_list = { + 'tf_dump': _run_tf_dbg_dump, + 'npu_dump': _run_npu_dump, + 'npu_overflow': _run_npu_overflow, + 'infer': _run_infer_adapter, + 'data': _run_cli_with_data +} + + +def main(): + while len(sys.argv) > 1: + util.get_log().info("Single command mode.") + function_key = sys.argv[1] + cmd_line = sys.argv[2] if len(sys.argv) > 2 else None + if function_key in function_list: + return function_list[function_key](cmd_line) + precision_tool = PrecisionTool() + return precision_tool.single_cmd(sys.argv) + _run_interactive_cli() + + +if __name__ == '__main__': + try: + main() + except PrecisionToolException as pte: + util.get_log().error(pte.error_info) diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_config.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_config.py new file mode 100644 index 000000000..b159b0e99 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_config.py @@ -0,0 +1,118 @@ +# coding=utf-8 +import os +import random +import tensorflow as tf +from .lib.adapter.tf_adapter import TfAdapter +from .lib.config import config as cfg + + +adapter = TfAdapter() + + +def seed_everything(seed=cfg.DUMP_SEED): + """ set random seed + :param seed: random seed + :return: None + """ + os.environ['PYTHONHASHSEED'] = str(seed) + random.seed(seed) + if hasattr(tf.random, 'set_seed'): + tf.random.set_seed(seed) + elif hasattr(tf.random, 'set_random_seed'): + tf.random.set_random_seed(seed) + print("[PrecisionTool] Set Tensorflow random seed to %d success." % seed) + try: + import numpy as np + np.random.seed(seed) + print("[PrecisionTool] Set numpy random seed to %d success." % seed) + except ImportError as err: + np = None + print("[PrecisionTool] No numpy module.", err) + try: + from tfdeterminism import patch + patch() + print("[PrecisionTool] patch tf determinism success.") + except Exception as err: + print("[PrecisionTool] No tfdeterminism module. Install it by pip3 install tfdeterminism.", err) + + +# set global random seed +seed_everything() + + +def sess_dump(sess): + """wrapper session with dumping debug wrapper. + In session run mode. Use sess=sess_dump(sess) + :param sess: origin session + :return: Session + """ + return adapter.sess_dump(sess) + + +def estimator_dump(): + """In estimator mode. estim_spec = tf.estimator.EstimatorSpec(traing_hooks=[estimator_dump()]) + :return: + """ + return adapter.estimator_dump() + + +def npu_device_dump_config(npu_device, action): + """For tf2.x + :param npu_device: npu_device + :param action: dump | overflow| fusion_off | fusion_switch + :return: npu_device + """ + return adapter.npu_device_dump_config(npu_device, action) + + +def estimator_dump_config(action=None): + """return DumpConfig. + In estimator mode. set dump_config in NPURunConfig(). + exp. config = NPURunConfig(dump_config=estimator_dum_config(), session_config=session_config) + :return: DumpConfig + """ + return adapter.estimator_dump_config(action) + + +def session_dump_config(session_config=None, action=None, dump_layer=None): + """ + In TF session mode. set dump_config in session_config. + exp. config = session_dump_config() + config.[set your own configs] + with tf.Session(config=config) as sess: + sess.run(_) + tf_debug.LocalCLIDebugWrapperSession(sess=sess, ui_type="readline") + :param session_config: original session config + :param action: if set action, no need to start app with cli wrapper + :return: config_pb2.ConfigProto + """ + return adapter.session_dump_config(session_config, action, dump_layer) + + +def update_custom_op(custom_op, action=None, dump_layer=None): + """Update custom_op + :param custom_op: origin custom op + :param action: dump | overflow | fusion_off | fusion_switch + :param dump_layer: layers to dump, split by space + :return: + """ + return adapter.update_custom_op(custom_op, action, dump_layer) + + +class NpuPrintLossScaleCallBack(tf.keras.callbacks.Callback): + """ + For TF2.x callbacks. Usage: + callbacks = [] + # append other callbacks. + callbacks.append(NpuPrintLossScaleCallBack(opt)) + model.fit(xx, xx, callbacks=callbacks) + """ + def __init__(self, optimizer, loss=None): + super(NpuPrintLossScaleCallBack, self).__init__() + self.optimizer = optimizer + self.loss = loss + + def on_train_batch_begin(self, batch, logs=None): + print("PrecisionTool: Train steps {}, loss_scale={:.3f} / not_overflow_status={}".format( + batch, self.optimizer.loss_scale.numpy(), self.optimizer.last_step_finite.numpy() + ), flush=True) diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_session.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_session.py new file mode 100644 index 000000000..73cd8f7ab --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/precision_tool/tf_session.py @@ -0,0 +1,60 @@ +# coding=utf-8 +import tensorflow as tf +import numpy as np +from .lib.util.util import util +from .lib.train.train_analysis import TrainAnalysis +from .lib.config import config as cfg + + +class PrecisionTfSession(tf.Session): + def __init__(self, target='', graph=None, config=None): + super().__init__(target, graph, config) + self.log = util.get_log() + self._create_dir() + self.running = False + + def run(self, fetches, feed_dict=None, options=None, run_metadata=None): + """ wrapper super.run() """ + run_before_after = False + if not self.running: + self.running = True + run_before_after = True + if run_before_after: + self._before_run(feed_dict) + res = super(tf.Session, self).run(fetches, feed_dict, options, run_metadata) + if run_before_after: + # saver will call run func. + self._after_run() + self.running = False + return res + + @staticmethod + def _create_dir(): + util.create_dir(cfg.TF_CKPT_ROOT) + util.create_dir(cfg.TF_CKPT_INPUT_DIR) + + def _save_data(self, feed, feed_val): + self.log.info('Save: %s', feed) + file_name = TrainAnalysis.gen_feed_file_name(feed.name) + np.save(file_name, feed_val) + + def _before_run(self, feed_dict): + """ + save feed dict tensors + :return: None + """ + if feed_dict is not None: + self.log.info('Session run with feed_dict, will save feed dict.') + for feed, feed_val in feed_dict.items(): + if not isinstance(feed, tf.Tensor): + return + self._save_data(feed, feed_val) + # Iterator case + + def _after_run(self): + """ + save checkpoint for dump and + :return: + """ + saver = tf.train.Saver() + saver.save(self, cfg.TF_CKPT_FILE) -- Gitee From 5d1aeb1663395b7333137bf5be3cc9f5dcffc71a Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 06:04:03 +0000 Subject: [PATCH 13/38] recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh. Signed-off-by: huangju1993 --- ...ID3057_FwFM_performance_1p_RT2_overflow.sh | 192 ++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh new file mode 100644 index 000000000..eb446c60f --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh @@ -0,0 +1,192 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +source ~/.bashrc +docker_enable="false" +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +RankSize=1 +# 数据集路径,保持为空,不需要修改 +data_path="" +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="FwFM_ID3057_for_TensorFlow" +#训练epoch +train_epochs=5 +#训练batch_size +batch_size=128 +#训练step +train_steps= +#学习率 +learning_rate= + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_fp32_to_fp16" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source $cur_path/set_conda.sh + source activate $conda_name + elif [[ $para == --docker_enable* ]];then + docker_enable=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#docker适配 +if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then + echo "docker_enable basic" + export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin + export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../examples + +sed -i "s|epochs=10|epochs=5|g" run_fwfm.py + +#溢出检测 +key_word="args.precision_mode" +line=`grep -rn "args.precision_mode" run_fwfm.py| tail -1 | awk -F ":" '{print $1}'` +jit_word="\ \ \ \ custom_op = npu_tf_config.update_custom_op(custom_op, action='overflow')" +sed -i -b "$[line+1] i $jit_word" run_fwfm.py + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3 run_fwfm.py \ + --data_dir=${data_path} \ + --precision_mode=${precision_mode} \ + --profiling=${profiling} \ + --profiling_dump_path=${profiling_dump_path} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +sed -i "s|epochs=5|epochs=10|g" run_fwfm.py + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +# #输出性能FPS,需要模型审视修改 + +Time=`cat $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|tr -d '\b\r'|grep -Eo "[0-9]*us/sample"|awk -F "us/sample" 'END {print $1}'` +FPS=`awk 'BEGIN{printf "%.2f\n", 1 /'${Time}'*1000000}'` +#打印,不需要修改 +echo "Final Performance item/sec : $FPS" + +#输出CompileTime +CompileTime=`grep '/sample' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| head -n 2| awk '{print $4}' | awk -F 's' '{sum+=$1} END {print sum}'` + +# #输出训练精度,需要模型审视修改 +train_accuracy=`grep "test AUC" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'` +# #打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 + +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +cat $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tr -d '\b\r'|grep -Eo " loss: [0-9]*\.[0-9]*"|awk -F " " '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CompileTime = ${CompileTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log + -- Gitee From 6f54c967aaecb1dfe81899367e78f110437042f5 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 06:05:17 +0000 Subject: [PATCH 14/38] update TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py. Signed-off-by: huangju1993 --- .../DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py index f97c37546..38b347db6 100644 --- a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_fwfm.py @@ -39,6 +39,7 @@ from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names import argparse import os +import precision_tool.tf_config as npu_tf_config def main(): -- Gitee From 25d1daaa2b782422921f4ebdc950da0fbc224ed3 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 06:10:34 +0000 Subject: [PATCH 15/38] recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh. Signed-off-by: huangju1993 --- .../test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh index eb446c60f..9a9f03168 100644 --- a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh @@ -162,7 +162,7 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'privileged'_'docker'_'overflow'_'perf' ##获取性能数据,不需要修改 #吞吐量 -- Gitee From bfb0caa393b7992f53e7dab32d53be2af7b6a8e1 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 06:19:20 +0000 Subject: [PATCH 16/38] recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_overflow.sh. Signed-off-by: huangju1993 --- .../test/train_performance_1p_overflow.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_overflow.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_overflow.sh index 81d82ea09..7fee490a2 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_overflow.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_overflow.sh @@ -145,15 +145,15 @@ BatchSize=${batch_size} DeviceType=`uname -m` if [[ $precision_mode == "must_keep_origin_dtype" ]];then if [[ $docker_enable == "privileged" ]];then - CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'privileged_docker'_'perf' + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'privileged_docker'_'overflow'_'perf' else - CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'overflow'_'perf' fi else if [[ $docker_enable == "privileged" ]];then - CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'privileged_docker'_'perf' + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'privileged_docker'_'overflow'_'perf' else - CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'overflow'_'perf' fi fi echo "CaseName : $CaseName" -- Gitee From 1a69841353d84d8f274404236a37be36c828b786 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 06:20:37 +0000 Subject: [PATCH 17/38] built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_dump.sh. Signed-off-by: huangju1993 --- .../test/train_performance_1p_dump.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_dump.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_dump.sh index da1fdc31b..29d3a1aca 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_dump.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2712_for_TensorFlow/test/train_performance_1p_dump.sh @@ -145,15 +145,15 @@ BatchSize=${batch_size} DeviceType=`uname -m` if [[ $precision_mode == "must_keep_origin_dtype" ]];then if [[ $docker_enable == "privileged" ]];then - CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'privileged_docker'_'perf' + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'privileged_docker'_'dump'_'perf' else - CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'dump'_'perf' fi else if [[ $docker_enable == "privileged" ]];then - CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'privileged_docker'_'perf' + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'privileged_docker'_'dump'_'perf' else - CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'dump'_'perf' fi fi echo "CaseName : $CaseName" -- Gitee From 306a2d72f055d99a91182971147fa0eb2f431d19 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 06:39:13 +0000 Subject: [PATCH 18/38] recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh. Signed-off-by: huangju1993 --- .../test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh index 9a9f03168..87e0d54e8 100644 --- a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_overflow.sh @@ -190,3 +190,4 @@ echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseNa echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "CompileTime = ${CompileTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +rm -rf $cur_path/../examples/precision_data -- Gitee From 6cd72527732b77f77b639c59abc3bb1ae3d21303 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 06:44:22 +0000 Subject: [PATCH 19/38] recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_dump.sh. Signed-off-by: huangju1993 --- ...ain_ID3057_FwFM_performance_1p_RT2_dump.sh | 193 ++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_dump.sh diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_dump.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_dump.sh new file mode 100644 index 000000000..919e4d2f0 --- /dev/null +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID3057_FwFM_performance_1p_RT2_dump.sh @@ -0,0 +1,193 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +source ~/.bashrc +docker_enable="false" +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +RankSize=1 +# 数据集路径,保持为空,不需要修改 +data_path="" +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="FwFM_ID3057_for_TensorFlow" +#训练epoch +train_epochs=5 +#训练batch_size +batch_size=128 +#训练step +train_steps= +#学习率 +learning_rate= + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_fp32_to_fp16" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source $cur_path/set_conda.sh + source activate $conda_name + elif [[ $para == --docker_enable* ]];then + docker_enable=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#docker适配 +if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then + echo "docker_enable basic" + export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin + export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../examples + +sed -i "s|epochs=10|epochs=5|g" run_fwfm.py + +#溢出检测 +key_word="args.precision_mode" +line=`grep -rn "args.precision_mode" run_fwfm.py| tail -1 | awk -F ":" '{print $1}'` +jit_word="\ \ \ \ custom_op = npu_tf_config.update_custom_op(custom_op, action='dump')" +sed -i -b "$[line+1] i $jit_word" run_fwfm.py + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3 run_fwfm.py \ + --data_dir=${data_path} \ + --precision_mode=${precision_mode} \ + --profiling=${profiling} \ + --profiling_dump_path=${profiling_dump_path} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +sed -i "s|epochs=5|epochs=10|g" run_fwfm.py + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +# #输出性能FPS,需要模型审视修改 + +Time=`cat $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|tr -d '\b\r'|grep -Eo "[0-9]*us/sample"|awk -F "us/sample" 'END {print $1}'` +FPS=`awk 'BEGIN{printf "%.2f\n", 1 /'${Time}'*1000000}'` +#打印,不需要修改 +echo "Final Performance item/sec : $FPS" + +#输出CompileTime +CompileTime=`grep '/sample' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| head -n 2| awk '{print $4}' | awk -F 's' '{sum+=$1} END {print sum}'` + +# #输出训练精度,需要模型审视修改 +train_accuracy=`grep "test AUC" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'` +# #打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'privileged'_'docker'_'dump'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 + +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +cat $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tr -d '\b\r'|grep -Eo " loss: [0-9]*\.[0-9]*"|awk -F " " '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CompileTime = ${CompileTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log + +rm -rf $cur_path/../examples/precision_data \ No newline at end of file -- Gitee From f7f3128d8f99296f5b9cbab230fa17b6d7cfdcc4 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:16:11 +0000 Subject: [PATCH 20/38] cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_overflow.sh. Signed-off-by: huangju1993 --- .../test/train_performance_1p_overflow.sh | 214 ++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_overflow.sh diff --git a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_overflow.sh b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_overflow.sh new file mode 100644 index 000000000..b824a8286 --- /dev/null +++ b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_overflow.sh @@ -0,0 +1,214 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +source ~/.bashrc +docker_enable="false" +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="ResNet50_ID0360_for_TensorFlow2.X" +#训练epoch +train_epochs=2 +#训练batch_size +batch_size=256 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.495 + +#TF2.X独有,需要模型审视修改 +export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=True +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source set_conda.sh + source activate $conda_name + elif [[ $para == --docker_enable* ]];then + docker_enable=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#docker适配 +if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then + echo "docker_enable basic" + export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin + export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH +fi + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../tensorflow +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + cpustep=`expr $cpucount / 8` + echo "taskset c steps:" $cpustep + let a=RANK_ID*$cpustep + let b=RANK_ID+1 + let c=b*$cpustep-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup taskset -c $a-$c python3 resnet_ctl_imagenet_main.py \ + --data_dir=${data_path} \ + --num_accumulation_steps=1 \ + --train_steps=${train_steps} \ + --train_epochs=${train_epochs} \ + --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ + --distribution_strategy=off \ + --use_tf_while_loop=true \ + --use_tf_function=true \ + --enable_checkpoint_and_export \ + --steps_per_loop=${train_steps} \ + --base_learning_rate=${learning_rate} \ + --momentum=0.901 \ + --epochs_between_evals=1 \ + --eval_offset_epochs=2 \ + --optimizer=SGD \ + --label_smoothing=0.1 \ + --single_l2_loss_op \ + --warmup_epochs=5 \ + --weight_decay=0.000025 \ + --lr_schedule=polynomial \ + --drop_eval_remainder=True \ + --precision_mode=${precision_mode} \ + --over_dump=${over_dump} \ + --over_dump_path=${over_dump_path} \ + --data_dump_flag=${data_dump_flag} \ + --data_dump_step=${data_dump_step} \ + --data_dump_path=${data_dump_path} \ + --batch_size=${batch_size} \ + --profiling=${profiling} \ + --profiling_dump_path=${profiling_dump_path} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep TimeHistory $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'basic'_'docker'_'overflow'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep train_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v BatchTimestamp|awk '{print $10}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log + +sed -i "/AttributeError/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log +sed -i "/MLL/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log + +rm -rf $cur_path/output/overflow_dump \ No newline at end of file -- Gitee From e43d757ea62c84e592e07bcccee7230f35aade7f Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:23:35 +0000 Subject: [PATCH 21/38] cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_dump.sh. Signed-off-by: huangju1993 --- .../test/train_performance_1p_dump.sh | 213 ++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_dump.sh diff --git a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_dump.sh b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_dump.sh new file mode 100644 index 000000000..85ac16819 --- /dev/null +++ b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_dump.sh @@ -0,0 +1,213 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +source ~/.bashrc +docker_enable="false" +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="ResNet50_ID0360_for_TensorFlow2.X" +#训练epoch +train_epochs=2 +#训练batch_size +batch_size=256 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.495 + +#TF2.X独有,需要模型审视修改 +export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=True +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source set_conda.sh + source activate $conda_name + elif [[ $para == --docker_enable* ]];then + docker_enable=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#docker适配 +if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then + echo "docker_enable basic" + export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin + export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../tensorflow +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + cpustep=`expr $cpucount / 8` + echo "taskset c steps:" $cpustep + let a=RANK_ID*$cpustep + let b=RANK_ID+1 + let c=b*$cpustep-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup taskset -c $a-$c python3 resnet_ctl_imagenet_main.py \ + --data_dir=${data_path} \ + --num_accumulation_steps=1 \ + --train_steps=${train_steps} \ + --train_epochs=${train_epochs} \ + --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ + --distribution_strategy=off \ + --use_tf_while_loop=true \ + --use_tf_function=true \ + --enable_checkpoint_and_export \ + --steps_per_loop=${train_steps} \ + --base_learning_rate=${learning_rate} \ + --momentum=0.901 \ + --epochs_between_evals=1 \ + --eval_offset_epochs=2 \ + --optimizer=SGD \ + --label_smoothing=0.1 \ + --single_l2_loss_op \ + --warmup_epochs=5 \ + --weight_decay=0.000025 \ + --lr_schedule=polynomial \ + --drop_eval_remainder=True \ + --precision_mode=${precision_mode} \ + --over_dump=${over_dump} \ + --over_dump_path=${over_dump_path} \ + --data_dump_flag=${data_dump_flag} \ + --data_dump_step=${data_dump_step} \ + --data_dump_path=${data_dump_path} \ + --batch_size=${batch_size} \ + --profiling=${profiling} \ + --profiling_dump_path=${profiling_dump_path} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep TimeHistory $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'basic'_'docker'_'dump'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep train_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v BatchTimestamp|awk '{print $10}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log + +sed -i "/AttributeError/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log +sed -i "/MLL/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log + +rm -rf $cur_path/output/data_dump;rm -rf $cur_path/../tensorflow/ge_proto* \ No newline at end of file -- Gitee From cac4c877adc985db6d78b6e53071b7583e4f2f8a Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:40:39 +0000 Subject: [PATCH 22/38] add TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/set_conda.sh. Signed-off-by: huangju1993 --- .../ResNet50_ID0058_for_TensorFlow/test/set_conda.sh | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/set_conda.sh diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/set_conda.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/set_conda.sh new file mode 100644 index 000000000..55087d862 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/set_conda.sh @@ -0,0 +1,2 @@ +export PATH=/home/anaconda3/bin:$PATH +export LD_LIBRARY_PATH=/home/anaconda3/lib:$LD_LIBRARY_PATH \ No newline at end of file -- Gitee From 2280523ffbe4c3fe7288669714022b6739b70dbe Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:41:27 +0000 Subject: [PATCH 23/38] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20precision=5Ftool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ResNet50_ID0058_for_TensorFlow/precision_tool/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/.keep diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/.keep b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/.keep new file mode 100644 index 000000000..e69de29bb -- Gitee From c944f62ed74ca1b2f1c451c8ea29be39de73fc8b Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:42:52 +0000 Subject: [PATCH 24/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/README.md | 518 ++++++++++++++++++ .../precision_tool/RELEASE.md | 66 +++ .../precision_tool/__init__.py | 0 .../precision_tool/caffe_dump.py | 132 +++++ .../precision_tool/cli.py | 129 +++++ .../precision_tool/tf_config.py | 118 ++++ .../precision_tool/tf_session.py | 60 ++ 7 files changed, 1023 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/README.md create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/RELEASE.md create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/__init__.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/caffe_dump.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/cli.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_config.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_session.py diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/README.md b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/README.md new file mode 100644 index 000000000..3f291afff --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/README.md @@ -0,0 +1,518 @@ +# 精度问题分析工具 + +## 功能介绍 +### 简介 +该工具包提供了精度比对常用的功能,当前该工具主要适配Tensorflow训练场景 + +对于常用功能基本可以做到一键操作,同时提供Dump数据/图信息的交互式查询和操作入口 + +推理场景可直接使用[推理一键式全流程精度比对](https://gitee.com/ascend/tools/tree/master/msquickcmp) 工具 +### 主要功能 +#### 已完成功能 +1. 简化脚本修改【手动/半自动】 +2. TF标杆数据生成【自动/半自动】 +3. 算子溢出检测分析【自动】 +4. 开启GE图Dump和图解析【自动】 +5. 开启数据Dump并进行全网比对【自动】 +6. 查询算子列表/节点信息【手动】 +7. 查询/解析Dump数据信息【手动】 +8. 数据比对【手动】 +### 工具获取 +1. 下载压缩包的方式获取 + 将https://gitee.com/ascend/tools 以压缩包形式下载 +2. 使用git命令方式获取 +3. 移动 tools/precision_tool 子目录至训练工作目录 +### 安装python3三方依赖 +```shell +pip3 install rich gnureadline pexpect graphviz +# ubuntu/Debian +sudo apt-get install graphviz +# fedora/Centos +sudo yum install graphviz +``` +### 工具执行依赖 +* 一般直接在NPU训练环境上部署该脚本,环境上能够正常执行CPU和NPU训练脚本 +* 如果需要进行数据Dump比对,则需要先检查并去除训练脚本内部使用到的随机处理,避免由于输入数据不一致导致数据比对结果不可用 + ```python + # 对于使用tf.random / np.random / (python) random的可以通过固定随机种子的方式固定输入 + # import tf_config.py 默认会设置上述三种random的seed,但由于import位置关系,可能不一定能作用到所有的关联代码,建议在代码确认合适位置手动嵌入 + seed =987654 + random.seed(seed) + tf.random.set_random_seed(seed) + np.random.seed(seed) + + # RunConfig/NPURunConfig中设置tf_random_seed固定网络随机因子 + # Estimator中tf.random设置的随机种子并不能全局生效 + # 需要使用下面的方式进行设置 + run_config = tf.estimator.RunConfig(tf_random_seed=1, ...) + run_config = NPURunConfig(tf_random_seed=1, ...) + ``` + * **理论上网络中的大多数随机均能通过上面的方式固定, 一般不需要再做下面的这些操作** + ```python + # 1. 参数初始化中的随机操作 + # 加载checkpoint的方式能够固定大多数初始参数 + saver.restore(sess, saver_dir) + + # 2. 输入数据的随机操作(例如对输入数据做shuffle操作) + dataset = tf.data.TFRecordDataset(tf_data) + dataset = dataset.shuffle(batch_size*10) # 直接注释掉该行 + + # 3. 模型中的随机操作(例如使用dropout) + net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') # 建议注释该行 + + # 4. 图像预处理使用的随机操作(根据实际情况固定随机种子,或者替换成其他固定的预处理操作) + # 4.1 Random rotate + random_angle = tf.random_uniform([], - self.degree * 3.141592 / 180, self.degree * 3.141592 / 180) + image = tf.contrib.image.rotate(image, random_angle, interpolation='BILINEAR') + depth_gt = tf.contrib.image.rotate(depth_gt, random_angle, interpolation='NEAREST') + + # 4.2 Random flipping + do_flip = tf.random_uniform([], 0, 1) + image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(image), lambda: image) + depth_gt = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(depth_gt), lambda: depth_gt) + + # 4.3 Random crop + mage_depth = tf.concat([image, depth_gt], 2) + image_depth_cropped = tf.random_crop(image_depth, [self.params.height, self.params.width, 4]) + + # 其他...... + ``` +* 该工具基于**NPU的计算图**,**NPU的DUMP数据**,**NPU的溢出检测数据**,**TF的计算图meta文件**,**TF的DUMP数据**进行数据解析和分析。 +这几类依赖数据可以通过以下方式获取(只使用部分工具功能并不需要提前获取所有依赖数据): +#### 1. NPU的计算图获取 + ``` + 注意:NPU的Dump数据和计算图存在一定的对应关系,需要同时获取 + 避免在自定义的训练脚本中unset DUMP GRAPH相关的环境变量 + ``` +* 【推荐】方法一:配置2、3依赖中的NPU数据Dump或者overflow检测功能,将自动配置上Dump GE图的环境变量 + +* 【不推荐】方法二:参考迁移指导中的修改配置,执行NPU脚本,并将获取到的图转存至precision_data图目录 + ```shell + export DUMP_GE_GRAPH=2 + export DUMP_GRAPH_LEVEL=3 + export DUMP_GRAPH_PATH=./precision_data/npu/debug_0/graph + # 未配置DUMP_GRAPH_PATH时,图文件将保存在脚本执行目录,可以直接转存至precision_data目录 + mkdir -p ./precision_data/npu/debug_0/graph && mv ge_proto_*.txt ./precision_data/npu/debug_0/graph + ``` +#### 2. NPU的DUMP数据获取 +* 【推荐】方法一:在训练脚本中**import precision_tool.tf_config**,并使用precision_tool中提供的辅助命令行执行训练脚本 + ``` python + # NPU的DUMP获取和溢出检测数据的获取,均可按如下方式修改代码 + # 注意:参数action可以设置为'dump'或'overflow' + # 引用 precision_tool/tf_config.py + import precision_tool.tf_config as npu_tf_config + + # 如果使用的是Estimator的NPURunConfig配置使能NPU,则可以参考以下修改 + dump_config = npu_tf_config.estimator_dump_config(action='dump') # 新增行 + npu_config = NPURunConfig(dump_config=dump_config) + + # 如果使用的是session.run或者使用tf.ConfigProto创建session_config传入tf.estimator.RunConfig的方式使能npu + # 可以参考如下修改 + session_config = npu_tf_config.session_dump_config(session_config, action='dump') # 新增行 + # tf.estimator + run_config = tf.estimator.RunConfig(session_config=session_config,...) + # tf.keras + npu_keras_sess = set_keras_session_npu_config(config=session_config) + # session run + with tf.Session(config=npu_config_proto(session_config)): + ...... + + # 如果使用的是custom_op方式,则可以参考以下修改 + config = tf.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["use_off_line"].b = True + custom_op = npu_tf_config.update_custom_op(custom_op, action='dump') # 新增行 + ``` + +* 【不推荐】方法二:参考[精度比对工具使用指南](https://www.hiascend.com/document?tag=community-developer) 修改训练脚本。 + 执行训练脚本,并将dump的数据拷贝到【precision_data/dump/npu/】目录 +#### 3. NPU的溢出检测数据的获取(缺少该数据将无法展示溢出检测结果) +* 【推荐】方法一:在训练脚本中**import precision_tool.tf_config**,并按【2. NPU的DUMP数据获取】中修改训练代码,使用precision_tool中提供的辅助命令行执行训练脚本 + ```python + # 需要将action设置成'overflow' + # 引用 precision_tool/tf_config.py + import precision_tool.tf_config as npu_tf_config + dump_config = npu_tf_config.estimator_dump_config(action='overflow') # 新增行 + ``` +* 【不推荐】方法二:参考[使用溢出检测工具分析算子溢出](https://www.hiascend.com/document?tag=community-developer) 修改训练脚本, + 并将溢出数据拷贝至【precision_tool/dump/overflow/】目录 + +#### 4. TF的DUMP数据获取(缺少该数据无法使用数据比对功能)(适用于TF 1.15, TF2.x参考tfdbg_ascend) +* 【推荐】方法一:在CPU/GPU训练脚本中添加tf_debug代码,并使用precision_tool中提供的辅助命令行工具生成标杆DUMP数据 + ```python + import precision_tool.tf_config as npu_tf_config + + # 如果使用的是Estimator,EstimatorSpec加入training_hooks + estim_specs = tf.estimator.EstimatorSpec(training_hooks=[npu_tf_config.estimator_dump()]) + + # 如果使用的session.run,以下代码在为sess加上了tf_debug的wrapper + sess = npu_tf_config.sess_dump(sess=sess) + ``` + ```shell + # 1. 执行脚本 + # 2. 解析tf debug dump文件,生成算子输出tensor文件 + # 注意:TF dump数据的原理是使用tf_debug的print_tensor(pt)命令实现的,由于训练代码提供了非常灵活的run()接口, + # 脚本无法感知用户需要dump的tensor在哪个run阶段,因此需要用户修改训练代码,在执行完正确的run后,立即退出。 + # 例如,修改代码只执行一个step的训练,根据代码中run的次数,会获取到1~N个离线tf_debug的dump目录 + # precision_tool脚本会自动提取最后一个run阶段中出现的所有tensor作为标杆数据。 + python3.7.5 precision_tool/cli.py tf_dump + + # 在precision_data/tf/dump/ 目录会存放提取的tensor + # 如果获取tensor不符合预期,可以检查下precision_data/dump/cpu_debug/目录, 只保留预期run阶段的tf_debug离线数据 + # 执行以下命令重新生成 + rm -rf precision_data/tf/dump/* && python3.7.5 precision_tool/cli.py tf_dump + ``` +* 【不推荐】方法二:参考[准备基于GPU/CPU运行生成的npy数据](https://www.hiascend.com/document?tag=community-developer) + 获取CPU/GPU的TF数据,并拷贝至【precision/dump/cpu/】目录 +#### 5. TF计算图Meta文件的获取(可选) +* 通过saver保存ckpt获取 + ```python + # 修改CPU/NPU脚本 + with tf.Session() as sess: + # do session.run() + saver = tf.train.Saver() + # 保存ckpt + saver.save(sess, saver_dir) + ``` +#### 6. 关闭NPU的融合功能(根据情况启用) +* NPU会对计算图中的算子进行融合,以提高网络性能,由于大多数融合是自动识别的,可能存在未考虑到的场景,导致精度问题, + 因此,可以尝试关闭融合定界网络问题是否是由于融合导致。 + ```python + # 关闭融合可以和溢出检测/数据Dump同时进行,启用方法也类似 + # 引用 precision_tool/tf_config.py + import precision_tool.tf_config as npu_tf_config + + # 如果使用的是Estimator的NPURunConfig配置使能NPU,则可以参考以下修改 + npu_config = NPURunConfig(fusion_switch_file=npu_tf_config.FUSION_OFF_FILE) # 修改行 + # 如果需要关闭指定的融合规则,则可以修改precision_tool/fusion_switch.cfg, 并参考如下修改 + npu_config = NPURunConfig(fusion_switch_file=npu_tf_config.FUSION_SWITCH_FILE) # 关闭特定融合修改行 + + # 如果使用的是session.run或者使用tf.ConfigProto创建session_config传入tf.estimator.RunConfig的方式使能npu + # 可以参考如下修改(数据Dump和关闭融合同时使能) + session_config = npu_tf_config.session_dump_config(session_config, action='dump|fusion_off') # 新增行 + session_config = npu_tf_config.session_dump_config(session_config, action='dump|fusion_switch') # 关闭特定融合新增行 + # tf.estimator + run_config = tf.estimator.RunConfig(session_config=session_config,...) + # tf.keras + npu_keras_sess = set_keras_session_npu_config(config=session_config) + # session run + with tf.Session(config=npu_config_proto(session_config)): + ...... + # 如果有custom_op,也可以直接使用下面的方式配置 + custom_op = npu_tf_config.update_custom_op(custom_op=custom_op, action='dump | fusion_off') + ``` +## 使用说明 +1. 配置文件precision_tool/config.py(正常默认即可) + ```python + # 如果需要dump特定曾的数据,则可以修改以下配置项 + # 一般对比分析dump首层即可 + # Dump config '0|5|10' + TF_DUMP_STEP = '0' + + # 融合开关配置,可以再该配置文件中配置融合开关的开启和关闭,使用方法参考以下文档: + # https://support.huaweicloud.com/tensorflowdevg-cann330alphaXtraining/atlastfadapi_07_0005.html + FUSION_SWITCH_FILE = './precision_tool/fusion_switch.cfg' + + # 依赖run包中的atc和msaccucmp.pyc工具,一般在run包安装目录,配置到父目录即可 + # 默认run包安装在/usr/local/Ascend,可以不用修改。指定目录安装则需要修改 + # parent dir path of msaccucmp.pyc and atc, usually run package dir + CMD_ROOT_PATH = '/usr/local/' + + # ASCEND Log Path + ASCEND_LOG_PATH = '/root/ascend/log/plog/' + + # 日志级别及数据分析目录设置 + # TOOL CONFIG + LOG_LEVEL = "NOTSET" + # ModelArts场景下,可以根据情况将数据跟目录修改成自定义目录,并在完成后完整下载该目录 + ROOT_DIR = './' + ``` +2. 启动脚本(交互命令行) + ```shell + python3 ./precision_tool/cli.py + ``` + +### 交互模式命令 +1. ac -l [limit_num] -c + ```shell + # auto check. 自动化检测命令 + # 列出Fusion信息;解析算子溢出信息; + # -c 可选,进行全网比对 + # -l 可选,限制输出结果的条数(overflow解析的条数等) + PrecisionTool > ac -c + ╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ + │ [TransData][327] trans_TransData_1170 │ + │ - [AI Core][Status:32][TaskId:327] ['浮点计算有溢出'] │ + │ - First overflow file timestamp [1619347786532995] - │ + │ |- TransData.trans_TransData_1170.327.1619347786532995.input.0.npy │ + │ |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.11950836181640626] │ + │ |- TransData.trans_TransData_1170.327.1619347786532995.output.0.npy │ + │ |- [Shape: (32, 20, 8, 8, 16)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.07781982421875] │ + ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ + ``` +2. run [command] + ```shell + # 不退出交互命令环境执行shell命令,与内置命令不冲突的可以直接执行,否则需要加run前缀 + PrecisionTool > run vim cli.py + PrecisionTool > vim cli.py + ``` + +3. ls -n [op_name] -t [op_type] -f [fusion_pass] -k [kernel_name] + ```shell + # 通过[算子名]/[算子类型]查询网络里的算子,模糊匹配 + # -n 算子节点名称 + # -t 算子类型 + # -f 融合类型 + # -k kernel_name + PrecisionTool > ls -t Mul -n mul_3 -f TbeMulti + [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5b/Branch_1/mul_3 + [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5c/Branch_1/mul_3 + [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_5d/Branch_1/mul_3 + [Mul][TbeMultiOutputFusionPass] InceptionV3/InceptionV3/Mixed_6b/Branch_1/mul_3 + ``` + +4. ni (-n) [op_name] -s [save sub graph deep] + ```shell + # 通过[算子名]查询算子节点信息 + # -n 指定节点名称 + # -g graph名 + # -a 显示attr信息 + # -s 保存一个以当前算子节点为根,深度为参数值的子图 + PrecisionTool > ni gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual -s 3 + ╭─────────────────── [GreaterEqual]gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual ────────────────────╮ + │ [GreaterEqual] gradients/InceptionV3/InceptionV3/Mixed_7a/Branch_0/Maximum_1_grad/GreaterEqual │ + │ Input: │ + │ -[0][DT_FLOAT][NHWC][32, 8, 8, 320] InceptionV3/InceptionV3/Mixed_7a/Branch_0/add_3:0 │ + │ -[1][DT_FLOAT][NHWC][1, 8, 1, 1] InceptionV3/Mixed_7a/Branch_0/Conv2d_1a_3x3tau:0 │ + │ -[2][][[]][] atomic_addr_clean0_21:-1 │ + │ Output: │ + │ -[0][DT_BOOL][NHWC][32, 8, 8, 320] ['trans_TransData_1170'] │ + │ NpuDumpInput: │ + │ -[0] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.input.0.npy │ + │ |- [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.846897] [Min: -8.368301] [Mean: -0.72565556] │ + │ -[1] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.input.1.npy │ + │ |- [Shape: (1, 8, 1, 1)] [Dtype: float32] [Max: 0.0] [Min: 0.0] [Mean: 0.0] │ + │ NpuDumpOutput: │ + │ -[0] GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.325.1619494134722860.output.0.npy │ + │ |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.1176300048828125] │ + │ CpuDumpOutput: │ + │ -[0] gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.0.1619492699305998.npy │ + │ |- [Shape: (32, 8, 8, 320)] [Dtype: bool] [Max: True] [Min: False] [Mean: 0.11764373779296874] │ + ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + 2021-04-27 14:39:55 (15178) -[DEBUG]write 14953 bytes to './precision_data/dump/temp/op_graph/GreaterEqual.gradients_InceptionV3_InceptionV3_Mixed_7a_Branch_0_Maximum_1_grad_GreaterEqual.3.gv' + 2021-04-27 14:39:55 (15178) -[INFO]Sub graph saved to /root/sym/inception/precision_data/dump/temp/op_graph + ``` + +5. pt (-n) [*.npy] + ```shell + # 查看某个dump数据块的数据信息 + # -n 可选,含义是待查看的数据文件名 + # 默认会将数据保存成 txt + PrecisionTool > pt TransData.trans_TransData_1170.327.1619347786532995.input.0.npy + ╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ + │ Shape: (32, 8, 8, 320) │ + │ Dtype: bool │ + │ Max: True │ + │ Min: False │ + │ Mean: 0.11950836181640626 │ + │ Path: ./precision_data/dump/temp/overflow_decode/TransData.trans_TransData_1170.327.1619347786532995.input.0.npy │ + │ TxtFile: ./precision_data/dump/temp/overflow_decode/TransData.trans_TransData_1170.327.1619347786532995.input.0.npy.txt │ + ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + ``` + +6. cp (-n) [left *.npy] [right *.npy] -p [print num] -al [atol] -rl [rtol] + ```shell + # 对比两个tensor的数据 + # -n 指定需要对比的两个numpy名 + # -p 指定输出的错误数据的个数及前多少个数据 + # -al/rl 指定相对误差的参数,在两个场景中用到 + # -s 保存成txt文件,默认打开 + # 1. np.allclose(left, right, atol=al, rtol=rl) + # 2. err_cnt += 1 if abs(data_left[i] - data_right[i]) > (al + rl * abs(data_right[i])) + PrecisionTool > cp Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy -p 10 -s -al 0.002 -rl 0.005 + Error Item Table Top Item Table + ┏━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ ┏━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ + ┃ Index ┃ Left ┃ Right ┃ Diff ┃ ┃ Index ┃ Left ┃ Right ┃ Diff ┃ + ┡━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ ┡━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ + │ 155 │ 0.024600908 │ 0.022271132 │ 0.002329776 │ │ 0 │ -0.9206961 │ -0.9222216 │ 0.0015255213 │ + │ 247 │ 0.015752593 │ 0.017937578 │ 0.0021849852 │ │ 1 │ -0.6416973 │ -0.64051837 │ 0.0011789203 │ + │ 282 │ -0.0101207765 │ -0.007852031 │ 0.0022687456 │ │ 2 │ -0.35383835 │ -0.35433492 │ 0.0004965663 │ + │ 292 │ 0.019581757 │ 0.02240482 │ 0.0028230622 │ │ 3 │ -0.18851271 │ -0.18883198 │ 0.00031927228 │ + │ 640 │ -0.06593232 │ -0.06874806 │ 0.0028157383 │ │ 4 │ -0.43508735 │ -0.43534422 │ 0.00025686622 │ + │ 1420 │ 0.09293677 │ 0.09586689 │ 0.0029301196 │ │ 5 │ 1.4447614 │ 1.4466647 │ 0.0019032955 │ + │ 1462 │ -0.085207745 │ -0.088047795 │ 0.0028400496 │ │ 6 │ -0.3455438 │ -0.3444429 │ 0.0011008978 │ + │ 1891 │ -0.03433288 │ -0.036525503 │ 0.002192624 │ │ 7 │ -0.6560242 │ -0.6564579 │ 0.0004336834 │ + │ 2033 │ 0.06828873 │ 0.07139922 │ 0.0031104907 │ │ 8 │ -2.6964858 │ -2.6975214 │ 0.0010356903 │ + │ 2246 │ -0.06376442 │ -0.06121233 │ 0.002552092 │ │ 9 │ -0.73746175 │ -0.73650354 │ 0.00095820427 │ + └───────┴───────────────┴──────────────┴──────────────┘ └───────┴─────────────┴─────────────┴───────────────┘ + ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ + │ Left: │ + │ |- NpyFile: ./precision_data/dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy │ + │ |- TxtFile: ./precision_data/dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy.txt │ + │ |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.846897] [Min: -8.368301] [Mean: -0.72565556] │ + │ DstFile: │ + │ |- NpyFile: ./precision_data/dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy │ + │ |- TxtFile: ./precision_data/dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy.txt │ + │ |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.8425903] [Min: -8.374472] [Mean: -0.7256237] │ + │ NumCnt: 655360 │ + │ AllClose: False │ + │ CosSim: 0.99999493 │ + │ ErrorPer: 0.023504638671875 (rl= 0.005, al= 0.002) │ + ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + ``` + +7. vc -lt [left_path] -rt [right_path] -g [graph] + ```python + # 用于手动指定两个目录进行整网对比 + # -lt 必选,其中一个文件目录 + # -rt 必选,另一个目录,一般是标杆目录 + # -g 可选,指定-g将尝试解析graph内的映射关系比对(一般用于NPU和TF之间的数据比对, NPU与NPU之间比对不需要,直接按照算子name对比) + # 需要指定到dump数据所在的目录层级,precision_data/npu/debug_0/dump/20220217095546/3/ge_default_20220217095547_1/1/0/ + ``` +8. vcs -f [file_name] -c [cos_sim_threshold] -l [limit] + ```python + # 查看精度比对结果的概要信息,可以更加预先相似的阈值过滤出低于阈值的算子/信息 + # -f (--file) 可选,指定csv文件,不设置则默认遍历precision_data/temp/vector_compare/目录下最近产生的对比目录内的所有csv + # -c (--cos_sim) 可选,指定筛选所使用的预先相似度阈值,默认0.98 + # -l (--limit) 可选,指定输出前多少个结果,默认值3 + PrecisionTool > vcs -c 0.98 -l 2 + 2021-05-31 14:48:56 (2344298) -[INFO]Sub path num:[1]. Dirs[['20210529145750']], choose[20210529145750] + 2021-05-31 14:48:56 (2344298) -[DEBUG]Find ['result_20210529145751.csv', 'result_20210529145836.csv', 'result_20210529145837.csv', 'result_20210529145849.csv', 'result_20210529150404.csv', 'result_20210529151102.csv'] result files in dir precision_data/temp/vector_compare/20210529145750 + 2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145751.csv + 2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145836.csv + 2021-05-31 14:48:56 (2344298) -[INFO]Find 1 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145837.csv + 2021-05-31 14:48:56 (2344298) -[INFO]Find 2 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529145849.csv + 2021-05-31 14:48:56 (2344298) -[INFO]Find 2 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529150404.csv + 2021-05-31 14:48:56 (2344298) -[INFO]Find 0 ops less then 0.98 in precision_data/temp/vector_compare/20210529145750/result_20210529151102.csv + ╭── [578] pixel_cls_loss/cond_1/TopKV2 ───╮ + │ Left: ['pixel_cls_loss/cond_1/TopKV2'] │ + │ Right: ['pixel_cls_loss/cond_1/TopKV2'] │ + │ Input: │ + │ - [0]1.0 - [1]nan │ + │ Output: │ + │ - [0]0.999999 - [1]0.978459 │ + ╰─────────────────────────────────────────╯ + ╭── [490] gradients/AddN_5 ───╮ + │ Left: ['gradients/AddN_5'] │ + │ Right: ['gradients/AddN_5'] │ + │ Input: │ + │ - [0]nan - [1]1.0 │ + │ Output: │ + │ - [0]0.05469 │ + ╰─────────────────────────────╯ + ``` +### Precision_data目录结构 +``` +precision_data/ +├── npu +│ ├── debug_0 +| | ├── dump +| | └── 20210510101133 +| │ └── graph +| | └── ge_proto_00000179_PreRunAfterBuild.txt +│ └── debug_1 +├── tf +| ├── tf_debug +| └── dump +├── overflow +├── fusion +└── temp + ├── op_graph + ├── decode + | ├── dump_decode + | ├── overflow_decode + | └── dump_convert + └── vector_compare + ├── 20210510101133 + | ├── result_123456.csv + | └── result_123455.csv + └── 20210510101134 + └── result_123458.csv +``` +### 配合msquickcmp一键式推理精度比对工具使用 +- msquickcmp会将中间dump数据和图自动保存在一个时间戳命名的目录内, 可以使用precision_tool工具直接对该目录进行分析 +```python +output-path/timestamp +├── dump_data +├── input +├── model +├── result_2021211214657.csv +└── tmp +``` +- 修改配置 +```python +# file precision_tool/config.py +# [train/infer] if adapt from msquickcmp result, set net type to infer +NET_TYPE = 'infer' +``` +- 执行以下命令 +```shell +# 前提条件: +# 当前目录没有precision_data目录(导入过程会新创建一个precision_data,用于保存导入数据) +# 只有第一次需要使用infer子命令导入,后续直接python3 precision_tool/cli.py +python3 precision_tool/cli.py infer output-path/timestamp +``` + +### 基于checkpoint进行训练精度分析 +#### 获取checkpoint和网络数据数据 +```python +from precision_tool.tf_session import PrecisionTfSession +with PrecisionTfSession() as sess: + sess.run() +# 执行完成后,将在precision_data/tf/checkpoint 目录生成一个checkpoint +# 在precision_data/tf/checkpoint/inputs目录保存[input_tensor_name].npy的输入数据 +``` + +#### 使用【train】命令进行cpu和npu dump数据的获取 +```shell +# train -d [all/npu/cpu] -a [dump|fusion_off|overflow] +python3 precision_tool/cli.py train -d all -a dump +``` + +### TF脚本修改参考 + +```python +# 打印动态Scale的Loss值 +loss_scale_manager = ExponentialUpdateLossScaleManager() +scale_v = sess.run([loss_scale_manager.get_loss_scale()]) +print(">>> Current Loss Scale >>> ", scale_v) + + +with tf.Session() as sess: + # do session.run() + saver = tf.train.Saver() + # 保存ckpt + saver.save(sess, saver_dir) + # ... + # 从ckpt恢复 + saver.restore(sess, saver_dir) + # ... + # 保存Tensorboard + summary_writer = tf.summary.FileWriter(logdir=log_dir, graph=sess.graph) + +``` + +### F&Q +1. 安装gnureadline报错找不到lncurses + ```shell + /usr/bin/ld: cannot find -lncurses + collect2: error: ld returned 1 exit status + error: command 'gcc' failed with exit status 1 + ``` + ```shell + # 先尝试在本地查找libncurses.so* + find / -name libncurses.so* + # 如果能找到以下文件,直接创建一个libncurses.so指向libncurses.so.5即可,否则需要用包管理工具安装ncurses + /usr/lib64/libncurses.so.5 + /usr/lib64/libncurses.so.5.9 + /usr/lib64/libncursesw.so.5 + # 创建软连接 + ln -s /usr/lib64/libncurses.so.5.9 /usr/lib64/libncurses.so + ``` +#### 参与贡献 + +1. Fork 本仓库 +2. 新建 Feat_xxx 分支 +3. 提交代码 +4. 新建 Pull Request \ No newline at end of file diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/RELEASE.md b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/RELEASE.md new file mode 100644 index 000000000..6ebb752c4 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/RELEASE.md @@ -0,0 +1,66 @@ +#Release 0.1.11 +## Update +* 兼容csv mapping解析summary dump数据 +* 新增打印输入输出地址偏移 +* TF2.x dump时默认保存输入和输出 + +#Release 0.1.10 +## Update +* 兼容新的opdebug dump格式 + + +#Release 0.1.9 +## Update +* 兼容新的summary dump的csv格式 + +#Release 0.1.8 +## Update +* 兼容新的fusion_result格式 + +#Release 0.1.7 +## Update +* 兼容新平台溢出检测数据解析 + +#Release 0.1.6 +## Update +* 支持获取profiling数据 +* 兼容一些溢出监测dump数据的修改 +* 修复溢出检测数据解析问题 + +# Release 0.1.5 +## Update +* 修复PT命令重复打屏的问题 +* 模糊匹配溢出检测算子名 +* 修复PT Dump的H5文件解析的一些问题 + +# Release 0.1.4 +## Update +* 完善训练随机固定场景 + + +# Release 0.1.3 +## Update +* 支持解析Torch Dump的H5数据 + +# Release 0.1.2 +## Update +* 适配部分dump数据格式 + +# Release 0.1.1 +## Features +* 新增NpuPrintLossScaleCallBack,用于TF2.x下打印scale值 +* 新增自动查找子图Data节点真实输入节点功能 + +## Update +* 优化部分推理场景自动对比目录名和graph名不匹配的场景识别逻辑 + +## Bugfix +* 溢出错误码解析崩溃bugfix + + +# Release 0.1.0 +## Feature +* 新增基于Checkpoint加载执行网络精度对比的能力 + +## Update +* 优化目录组织结构 \ No newline at end of file diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/__init__.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/caffe_dump.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/caffe_dump.py new file mode 100644 index 000000000..3c1b5982d --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/caffe_dump.py @@ -0,0 +1,132 @@ +# coding=utf-8 +""" +Source code: + https://bbs.huaweicloud.com/blogs/181056 +Example: + python3.7 caffe_dump.py -m resnet50.prototxt -w resnet50.caffemodel -i test.bin -n 'data:0' -o ./output_dir + +Guide for setting up Caffe/Tensorflow precision golden data generation environments: + https://bbs.huaweicloud.com/blogs/181059 +""" +import caffe +import sys +import argparse +import os +import caffe.proto.caffe_pb2 as caffe_pb2 +import google.protobuf.text_format +import json +import numpy as np +import time + +TIME_LENGTH = 1000 +FILE_PERMISSION_FLAG = 0o600 + + +class CaffeProcess: + def __init__(self): + parse = argparse.ArgumentParser() + parse.add_argument("-w", dest="weight_file_path", + help=" the caffe weight file path", + required=True) + parse.add_argument("-m", dest="model_file_path", + help=" the caffe model file path", + required=True) + parse.add_argument("-o", dest="output_path", help=" the output path", + required=True) + parse.add_argument("-i", "--input_bins", dest="input_bins", help="input_bins bins. e.g. './a.bin;./c.bin'", + required=True) + parse.add_argument("-n", "--input_names", dest="input_names", + help="input nodes name. e.g. 'graph_input_0:0;graph_input_0:1'", + required=True) + args, _ = parse.parse_known_args(sys.argv[1:]) + self.weight_file_path = os.path.realpath(args.weight_file_path) + self.model_file_path = os.path.realpath(args.model_file_path) + self.input_bins = args.input_bins.split(";") + self.input_names = args.input_names.split(";") + self.output_path = os.path.realpath(args.output_path) + self.net_param = None + self.cur_layer_idx = -1 + + @staticmethod + def _check_file_valid(path, is_file): + if not os.path.exists(path): + print('Error: The path "' + path + '" does not exist.') + exit(-1) + if is_file: + if not os.path.isfile(path): + print('Error: The path "' + path + '" is not a file.') + exit(-1) + else: + if not os.path.isdir(path): + print('Error: The path "' + path + '" is not a directory.') + exit(-1) + + def _check_arguments_valid(self): + self._check_file_valid(self.model_file_path, True) + self._check_file_valid(self.weight_file_path, True) + self._check_file_valid(self.output_path, False) + for input_file in self.input_bins: + self._check_file_valid(input_file, True) + + @staticmethod + def calDataSize(shape): + dataSize = 1 + for dim in shape: + dataSize *= dim + return dataSize + + def _load_inputs(self, net): + inputs_map = {} + for layer_name, blob in net.blobs.items(): + if layer_name in self.input_names: + input_bin = np.fromfile( + self.input_bins[self.input_names.index(layer_name)], np.float32) + input_bin_shape = blob.data.shape + if self.calDataSize(input_bin_shape) == self.calDataSize(input_bin.shape): + input_bin = input_bin.reshape(input_bin_shape) + else: + print("Error: input node data size %d not match with input bin data size %d.", self.calDataSize( + input_bin_shape), self.calDataSize(input_bin.shape)) + exit(-1) + inputs_map[layer_name] = input_bin + return inputs_map + + def process(self): + """ + Function Description: + process the caffe net, save result as dump data + """ + # check path valid + self._check_arguments_valid() + + # load model and weight file + net = caffe.Net(self.model_file_path, self.weight_file_path, + caffe.TEST) + inputs_map = self._load_inputs(net) + for key, value in inputs_map.items(): + net.blobs[key].data[...] = value + # process + net.forward() + + # read prototxt file + net_param = caffe_pb2.NetParameter() + with open(self.model_file_path, 'rb') as model_file: + google.protobuf.text_format.Parse(model_file.read(), net_param) + for layer in net_param.layer: + name = layer.name.replace("/", "_").replace(".", "_") + index = 0 + for top in layer.top: + data = net.blobs[top].data[...] + file_name = name + "." + str(index) + "." + str( + round(time.time() * 1000000)) + ".npy" + output_dump_path = os.path.join(self.output_path, file_name) + np.save(output_dump_path, data) + os.chmod(output_dump_path, FILE_PERMISSION_FLAG) + print('The dump data of "' + layer.name + + '" has been saved to "' + output_dump_path + '".') + index += 1 + + +if __name__ == "__main__": + caffe_process = CaffeProcess() + caffe_process.process() \ No newline at end of file diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/cli.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/cli.py new file mode 100644 index 000000000..f46368dee --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/cli.py @@ -0,0 +1,129 @@ +# coding=utf-8 +""" +cli +""" +import os +import sys + +from lib.precision_tool import PrecisionTool +from lib.train.train_analysis import TrainAnalysis +from lib.interactive_cli import InteractiveCli +from lib.util.precision_tool_exception import PrecisionToolException +from lib.util.util import util +from lib.dump.tf_dump import TfDump +from lib.adapter.msquickcmp_adapter import MsQuickCmpAdapter +from lib.adapter.offline_om_adapter import OfflineOmAdapter +from lib.config import config as cfg + +INTRODUCE_DOC = \ + "===============================\n" \ + "Usage:\n" \ + " Single mode:\n" \ + " Exp:\n" \ + " Dump TF data:\n" \ + " > python3.7.5 precision_tool/cli.py tf_dump \n" \ + " Adapt msquickcmp data:\n" \ + " > python3.7.5 precision_tool/cli.py infer [data path of msquickcmp output] \n" \ + " Interactive mode:\n" \ + " Exp:\n" \ + " Start command line:\n" \ + " > python3.7.5 precision_tool/cli.py\n" + + +def _run_tf_dbg_dump(cmdline): + """ Generate tf dump files with tf debug files.""" + tf_dump = TfDump() + tf_dump.run_tf_dbg_dump(cmdline) + + +def _unset_flags(): + if cfg.PRECISION_TOOL_OVERFLOW_FLAG in os.environ: + del os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG] + if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ: + del os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] + + +def _run_npu_dump(cmd): + """Deprecate function.""" + _unset_flags() + log = util.get_log() + os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] = 'True' + log.info("Start run NPU script with dump data.") + ret = util.execute_command(cmd) + log.info("Finish run NPU script with dump data. ret [%s]", ret) + _unset_flags() + + +def _run_npu_overflow(cmd): + """Deprecate function.""" + _unset_flags() + log = util.get_log() + os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG] = 'True' + log.info("Start run NPU script with overflow check process....") + ret = util.execute_command(cmd) + log.info("Finish run NPU script with overflow check process. ret [%s]", ret) + precision_tool = PrecisionTool() + precision_tool.prepare() + precision_tool.do_check_overflow() + _unset_flags() + + +def _run_infer_adapter(output_path): + """ Run precision_tool with msquickcmp output data + :param output_path: msquickcmp output path + :return: None + """ + if OfflineOmAdapter.validate(output_path): + adapter = OfflineOmAdapter(output_path) + else: + adapter = MsQuickCmpAdapter(output_path) + adapter.run() + _run_interactive_cli() + + +def _run_interactive_cli(cli=None): + """ Run precision_tool in interactive mode + :param cli: + :return: + """ + util.get_log().info("Interactive command mode.") + if cli is None: + cli = InteractiveCli() + try: + cli.cmdloop(intro="Enjoy!") + except KeyboardInterrupt: + util.get_log().info("Bye.......") + + +def _run_cli_with_data(data_path): + """ Run precision with specific data path, default is precision_data.""" + cfg.DATA_ROOT_DIR = data_path + _run_interactive_cli() + + +function_list = { + 'tf_dump': _run_tf_dbg_dump, + 'npu_dump': _run_npu_dump, + 'npu_overflow': _run_npu_overflow, + 'infer': _run_infer_adapter, + 'data': _run_cli_with_data +} + + +def main(): + while len(sys.argv) > 1: + util.get_log().info("Single command mode.") + function_key = sys.argv[1] + cmd_line = sys.argv[2] if len(sys.argv) > 2 else None + if function_key in function_list: + return function_list[function_key](cmd_line) + precision_tool = PrecisionTool() + return precision_tool.single_cmd(sys.argv) + _run_interactive_cli() + + +if __name__ == '__main__': + try: + main() + except PrecisionToolException as pte: + util.get_log().error(pte.error_info) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_config.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_config.py new file mode 100644 index 000000000..b159b0e99 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_config.py @@ -0,0 +1,118 @@ +# coding=utf-8 +import os +import random +import tensorflow as tf +from .lib.adapter.tf_adapter import TfAdapter +from .lib.config import config as cfg + + +adapter = TfAdapter() + + +def seed_everything(seed=cfg.DUMP_SEED): + """ set random seed + :param seed: random seed + :return: None + """ + os.environ['PYTHONHASHSEED'] = str(seed) + random.seed(seed) + if hasattr(tf.random, 'set_seed'): + tf.random.set_seed(seed) + elif hasattr(tf.random, 'set_random_seed'): + tf.random.set_random_seed(seed) + print("[PrecisionTool] Set Tensorflow random seed to %d success." % seed) + try: + import numpy as np + np.random.seed(seed) + print("[PrecisionTool] Set numpy random seed to %d success." % seed) + except ImportError as err: + np = None + print("[PrecisionTool] No numpy module.", err) + try: + from tfdeterminism import patch + patch() + print("[PrecisionTool] patch tf determinism success.") + except Exception as err: + print("[PrecisionTool] No tfdeterminism module. Install it by pip3 install tfdeterminism.", err) + + +# set global random seed +seed_everything() + + +def sess_dump(sess): + """wrapper session with dumping debug wrapper. + In session run mode. Use sess=sess_dump(sess) + :param sess: origin session + :return: Session + """ + return adapter.sess_dump(sess) + + +def estimator_dump(): + """In estimator mode. estim_spec = tf.estimator.EstimatorSpec(traing_hooks=[estimator_dump()]) + :return: + """ + return adapter.estimator_dump() + + +def npu_device_dump_config(npu_device, action): + """For tf2.x + :param npu_device: npu_device + :param action: dump | overflow| fusion_off | fusion_switch + :return: npu_device + """ + return adapter.npu_device_dump_config(npu_device, action) + + +def estimator_dump_config(action=None): + """return DumpConfig. + In estimator mode. set dump_config in NPURunConfig(). + exp. config = NPURunConfig(dump_config=estimator_dum_config(), session_config=session_config) + :return: DumpConfig + """ + return adapter.estimator_dump_config(action) + + +def session_dump_config(session_config=None, action=None, dump_layer=None): + """ + In TF session mode. set dump_config in session_config. + exp. config = session_dump_config() + config.[set your own configs] + with tf.Session(config=config) as sess: + sess.run(_) + tf_debug.LocalCLIDebugWrapperSession(sess=sess, ui_type="readline") + :param session_config: original session config + :param action: if set action, no need to start app with cli wrapper + :return: config_pb2.ConfigProto + """ + return adapter.session_dump_config(session_config, action, dump_layer) + + +def update_custom_op(custom_op, action=None, dump_layer=None): + """Update custom_op + :param custom_op: origin custom op + :param action: dump | overflow | fusion_off | fusion_switch + :param dump_layer: layers to dump, split by space + :return: + """ + return adapter.update_custom_op(custom_op, action, dump_layer) + + +class NpuPrintLossScaleCallBack(tf.keras.callbacks.Callback): + """ + For TF2.x callbacks. Usage: + callbacks = [] + # append other callbacks. + callbacks.append(NpuPrintLossScaleCallBack(opt)) + model.fit(xx, xx, callbacks=callbacks) + """ + def __init__(self, optimizer, loss=None): + super(NpuPrintLossScaleCallBack, self).__init__() + self.optimizer = optimizer + self.loss = loss + + def on_train_batch_begin(self, batch, logs=None): + print("PrecisionTool: Train steps {}, loss_scale={:.3f} / not_overflow_status={}".format( + batch, self.optimizer.loss_scale.numpy(), self.optimizer.last_step_finite.numpy() + ), flush=True) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_session.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_session.py new file mode 100644 index 000000000..73cd8f7ab --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/tf_session.py @@ -0,0 +1,60 @@ +# coding=utf-8 +import tensorflow as tf +import numpy as np +from .lib.util.util import util +from .lib.train.train_analysis import TrainAnalysis +from .lib.config import config as cfg + + +class PrecisionTfSession(tf.Session): + def __init__(self, target='', graph=None, config=None): + super().__init__(target, graph, config) + self.log = util.get_log() + self._create_dir() + self.running = False + + def run(self, fetches, feed_dict=None, options=None, run_metadata=None): + """ wrapper super.run() """ + run_before_after = False + if not self.running: + self.running = True + run_before_after = True + if run_before_after: + self._before_run(feed_dict) + res = super(tf.Session, self).run(fetches, feed_dict, options, run_metadata) + if run_before_after: + # saver will call run func. + self._after_run() + self.running = False + return res + + @staticmethod + def _create_dir(): + util.create_dir(cfg.TF_CKPT_ROOT) + util.create_dir(cfg.TF_CKPT_INPUT_DIR) + + def _save_data(self, feed, feed_val): + self.log.info('Save: %s', feed) + file_name = TrainAnalysis.gen_feed_file_name(feed.name) + np.save(file_name, feed_val) + + def _before_run(self, feed_dict): + """ + save feed dict tensors + :return: None + """ + if feed_dict is not None: + self.log.info('Session run with feed_dict, will save feed dict.') + for feed, feed_val in feed_dict.items(): + if not isinstance(feed, tf.Tensor): + return + self._save_data(feed, feed_val) + # Iterator case + + def _after_run(self): + """ + save checkpoint for dump and + :return: + """ + saver = tf.train.Saver() + saver.save(self, cfg.TF_CKPT_FILE) -- Gitee From 0ac19642e861041406b52b21be22ee6f2f5c9ff9 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:43:10 +0000 Subject: [PATCH 25/38] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20lib?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ResNet50_ID0058_for_TensorFlow/precision_tool/lib/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/.keep diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/.keep b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/.keep new file mode 100644 index 000000000..e69de29bb -- Gitee From 9ea904c1290577c3ef151e2afab9417b1fba8be1 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:43:34 +0000 Subject: [PATCH 26/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/adapter/fusion.py | 112 ++++++++++ .../lib/adapter/msquickcmp_adapter.py | 87 ++++++++ .../lib/adapter/offline_om_adapter.py | 23 ++ .../precision_tool/lib/adapter/overflow.py | 194 +++++++++++++++++ .../precision_tool/lib/adapter/tf_adapter.py | 200 ++++++++++++++++++ 5 files changed, 616 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/fusion.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/msquickcmp_adapter.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/offline_om_adapter.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/overflow.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/tf_adapter.py diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/fusion.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/fusion.py new file mode 100644 index 000000000..b440b1055 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/fusion.py @@ -0,0 +1,112 @@ +# coding=utf-8 +import json +import os +import shutil +from ..util.util import util +from ..config import config as cfg + + +FUSION_RESULT_FILE_NAME = 'fusion_result.json' +EFFECT_TIMES_KEY = 'effect_times' +GRAPH_FUSION_KEY = 'graph_fusion' +UB_FUSION_KEY = 'ub_fusion' +GRAPH_ID_KEYS = ['graphId', 'session_and_graph_id'] + + +class FusionResult(object): + def __init__(self, fusion_json): + self.fusion_json = fusion_json + + def get_effect_graph_fusion(self): + """Get effect graph fusion rule""" + if GRAPH_FUSION_KEY in self.fusion_json: + return self._get_effect_fusion(self.fusion_json[GRAPH_FUSION_KEY]) + return {} + + def get_effect_ub_fusion(self): + """Get effect UB fusion rule""" + if UB_FUSION_KEY in self.fusion_json: + return self._get_effect_fusion(self.fusion_json[UB_FUSION_KEY]) + return {} + + def graph_id(self): + """Get graph id""" + for key in GRAPH_ID_KEYS: + if key in self.fusion_json: + return self.fusion_json[key] + return "NONE" + + @staticmethod + def _get_effect_fusion(fusion): + res = {} + for fusion_name in fusion: + effect_times = int(fusion[fusion_name][EFFECT_TIMES_KEY]) + if effect_times > 0: + res[fusion_name] = effect_times + return res + + +class Fusion(object): + def __init__(self): + self.fusion_result = [] + self.log = util.get_log() + + def prepare(self, json_path='./'): + """Prepare fusion rule manager + :param json_path: path to fusion_result.json + :return: None + """ + util.create_dir(cfg.FUSION_DIR) + file_path = os.path.join(json_path, FUSION_RESULT_FILE_NAME) + file_path_local = os.path.join(cfg.FUSION_DIR, FUSION_RESULT_FILE_NAME) + if not os.path.isfile(file_path): + if not os.path.isfile(file_path_local): + self.log.debug("Can not find fusion result json.") + return + else: + shutil.copy(file_path, cfg.FUSION_DIR) + fe_jsons = self._get_result_jsons(file_path_local) + for fe_json in fe_jsons: + self.fusion_result.append(FusionResult(fe_json)) + + def check(self): + """Check fusion rules + :return: None + """ + self.log.info("Check effect fusion rule list.") + for fusion in self.fusion_result: + graph_fusion_table = self._build_table('Graph Fusion [GraphID: %s]' % fusion.graph_id(), + fusion.get_effect_graph_fusion()) + ub_fusion_table = self._build_table('UB Fusion [GraphID: %s]' % fusion.graph_id(), + fusion.get_effect_ub_fusion()) + util.print_panel(util.create_columns([graph_fusion_table, ub_fusion_table]), + title='GraphID:' + fusion.graph_id(), fit=True) + + @staticmethod + def _get_result_jsons(file_name): + result_jsons = [] + with open(file_name, 'r') as f: + txt = f.read() + try: + result_jsons = json.loads(txt) + if isinstance(result_jsons, dict): + result_jsons = [result_jsons] + except ValueError: + sk = [] + start = -1 + for i in range(len(txt)): + if txt[i] == '{': + sk.append('{') + if txt[i] == '}': + sk.pop() + if len(sk) == 0: + result_jsons.append(json.loads(txt[start+1: i+1])) + start = i + return result_jsons + + @staticmethod + def _build_table(title, fusion): + table = util.create_table(title, ['Fusion Name', 'Effect times']) + for f in fusion: + table.add_row(f, str(fusion[f])) + return table diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/msquickcmp_adapter.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/msquickcmp_adapter.py new file mode 100644 index 000000000..525dc8ee5 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/msquickcmp_adapter.py @@ -0,0 +1,87 @@ +# coding=utf-8 +import os +import time +import pathlib +import shutil +from ..util.util import util +from ..util.constant import Constant +from ..config import config as cfg +from ..util.precision_tool_exception import PrecisionToolException + + +class MsQuickCmpAdapter(object): + def __init__(self, output_path): + self.output_path = output_path + self.log = util.get_log() + + def run(self): + if self.output_path is None or not os.path.isdir(self.output_path): + raise PrecisionToolException("Invalid output path.") + if os.path.exists(cfg.DATA_ROOT_DIR): + raise PrecisionToolException("Precision data dir exist, can not adapt msquickcmp result.") + + for dir_path, dir_names, file_names in os.walk(self.output_path, followlinks=True): + if 'model' in dir_names: + self._adapt_model(os.path.join(dir_path, 'model')) + if 'dump_data' in dir_names: + self._adapt_dump(os.path.join(dir_path, 'dump_data')) + for file_name in file_names: + if str(file_name).endswith(Constant.Suffix.CSV): + self._adapt_vector_compare_result(os.path.join(dir_path, file_name)) + + def _adapt_model(self, path): + file_names = os.listdir(path) + graph_id = 0 + for file_name in file_names: + if str(file_name).endswith(Constant.Suffix.JSON): + self.log.info("Find msquickcmp model json: %s", file_name) + util.create_dir(cfg.DEFAULT_NPU_GRAPH_DIR) + graph_file_name = 'ge_proto_%d_%s.txt' % (graph_id, cfg.BUILD_JSON_GRAPH_NAME) + graph_json_file_name = graph_file_name + Constant.Suffix.JSON + pathlib.Path(os.path.join(cfg.DEFAULT_NPU_GRAPH_DIR, graph_file_name)).touch() + src_path = os.path.join(path, file_name) + dst_path = os.path.join(cfg.DEFAULT_NPU_GRAPH_DIR, graph_json_file_name) + self.log.info("Copy graph file: %s->%s", src_path, dst_path) + shutil.copy(src_path, dst_path) + time.sleep(3) + pathlib.Path(dst_path).touch() + if not util.empty_dir(cfg.DEFAULT_NPU_GRAPH_DIR): + self.log.info("Adapt model success.") + + def _adapt_dump(self, path): + dir_names = os.listdir(path) + if 'tf' in dir_names: + self._adapt_tf_dump(os.path.join(path, 'tf')) + if 'onnx' in dir_names: + self._adapt_tf_dump(os.path.join(path, 'onnx')) + if 'npu' in dir_names: + self._adapt_npu_dump(os.path.join(path, 'npu')) + + def _adapt_tf_dump(self, path): + if util.empty_dir(path): + return + src_path = os.path.abspath(path) + util.create_dir(cfg.TF_DIR) + dst_path = cfg.TF_DUMP_DIR + self.log.info("Create symbol link file: %s->%s", src_path, dst_path) + os.symlink(src_path, dst_path) + self.log.info("Adapt tf dump success.") + + def _adapt_npu_dump(self, path): + sub_dirs = os.listdir(path) + self.log.info("Find npu dump dir:%s", sub_dirs) + sub_dirs = filter(lambda x: str(x).isdigit(), sub_dirs) + for sub_dir in sub_dirs: + util.create_dir(cfg.DEFAULT_NPU_DUMP_DIR) + src_path = os.path.abspath(os.path.join(path, sub_dir)) + dst_path = os.path.join(cfg.DEFAULT_NPU_DUMP_DIR, sub_dir) + self.log.info("Create symbol link file: %s->%s", src_path, dst_path) + os.symlink(src_path, dst_path) + self.log.info("Adapt npu dump success.") + + def _adapt_vector_compare_result(self, path): + target_path = os.path.join(cfg.VECTOR_COMPARE_PATH, '0') + util.create_dir(target_path) + dst_path = os.path.join(target_path, os.path.basename(path)) + shutil.copy(path, dst_path) + self.log.info("Adapt vector compare result.") diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/offline_om_adapter.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/offline_om_adapter.py new file mode 100644 index 000000000..a6cc8a5fb --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/offline_om_adapter.py @@ -0,0 +1,23 @@ +# coding=utf-8 +import os +import time +import pathlib +import shutil +from ..util.util import util +from ..util.constant import Constant +from ..config import config as cfg +from ..util.precision_tool_exception import PrecisionToolException + + +class OfflineOmAdapter(object): + """自动解析om文件至GE图""" + def __init__(self, file_name): + self.file_name = file_name + self.log = util.get_log() + + @staticmethod + def validate(file_name): + return os.path.isfile(file_name) and str(file_name).endswith(Constant.Suffix.OM) + + def run(self): + self.log("To impl") diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/overflow.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/overflow.py new file mode 100644 index 000000000..7908c18be --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/overflow.py @@ -0,0 +1,194 @@ +# coding=utf-8 +import json +import os + +from ..util.util import util +from ..util.precision_tool_exception import PrecisionToolException +from ..util.precision_tool_exception import catch_tool_exception +from ..util.constant import Constant +from ..config import config as cfg + + +AI_CORE_OVERFLOW_STATUS = { + '0x8': '符号证书最小附属NEG符号位取反溢出', + '0x10': '整数加法、减法、乘法或乘加操作计算有溢出', + '0x20': '浮点计算有溢出', + '0x80': '浮点数转无符号数的输入是负数', + '0x100': 'FP32转FP16或32符号整数转FP16中出现溢出', + '0x400': 'CUBE累加出现溢出' +} +DHA_ATOMIC_ADD_STATUS = { + '0x9': '[atomic overflow] 向上溢出', + '0xA': '[atomic underflow] 向下溢出', + '0xB': '[atomic src nan] 源操作数非法', + '0xC': '[atomic dst nan] 目的操作数非法', + '0xD': '[atomic both nan] 源操作数和目的操作数均非法' +} +L2_ATOMIC_ADD_STATUS = { + '000': '[atomic no error] 无异常', + '001': '[atomic overflow] 向上溢出', + '010': '[atomic underflow] 向下溢出', + '011': '[atomic src nan] 源操作数非法', + '100': '[atomic dst nan] 目的操作数非法', + '101': '[atomic both nan] 源操作数和目的操作数均非法' +} + + +class Overflow(object): + def __init__(self): + """Init""" + self.log = util.get_log() + self.debug_files = None + + @catch_tool_exception + def prepare(self): + """Prepare""" + # find right path in DUMP_FILES_NPU_ALL + util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR) + sub_dir = util.get_newest_dir(cfg.NPU_OVERFLOW_DUMP_DIR) + overflow_dump_files = util.list_npu_dump_files(os.path.join(cfg.NPU_OVERFLOW_DUMP_DIR, sub_dir)) + self.debug_files = [item for item in overflow_dump_files.values() if item.op_type == 'Opdebug'] + # sort by timestamp + self.debug_files = sorted(self.debug_files, key=lambda x: x.timestamp) + self.log.info("Find [%d] debug files in overflow dir.", len(self.debug_files)) + + def check(self, max_num=3): + """Check overflow info""" + if len(self.debug_files) == 0: + self.log.info("[Overflow] Checked success. find [0] overflow node!") + return + self.log.info("[Overflow] Find [%s] overflow debug file. Will show top %s ops.", len(self.debug_files), max_num) + for i, debug_file in enumerate(self.debug_files): + debug_decode_files = self._decode_file(debug_file, True) + with open(debug_decode_files[0].path, 'r') as f: + overflow_json = json.load(f) + util.print_panel(self._json_summary(overflow_json, debug_file)) + if i >= max_num: + break + + def _json_summary(self, json_txt, debug_file): + res = [] + detail = {'task_id': -1} + if 'magic' in json_txt: + # version 2 + detail = json_txt['acc_list']['data'] + res.append(' - [AccType:%s][Status:%s][TaskId:%s]' % ( + json_txt['acc_list'], detail['status'], detail['task_id'])) + if 'AI Core' in json_txt and json_txt['AI Core']['status'] > 0: + detail = json_txt['AI Core'] + res.append(' - [AI Core][Status:%s][TaskId:%s] %s' % ( + detail['status'], detail['task_id'], self._decode_ai_core_status(detail['status']))) + if 'DHA Atomic Add' in json_txt and json_txt['DHA Atomic Add']['status'] > 0: + detail = json_txt['DHA Atomic Add'] + res.append(' - [DHA Atomic Add][Status:%s][TaskId:%s] %s' % ( + detail['status'], detail['task_id'], self._decode_dha_atomic_add_status(detail['status']))) + if 'L2 Atomic Add' in json_txt and json_txt['L2 Atomic Add']['status'] > 0: + detail = json_txt['L2 Atomic Add'] + res.append(' - [L2 Atomic Add][Status:%s][TaskId:%s] %s' % ( + detail['status'], detail['task_id'], self._decode_l2_atomic_add_status(detail['status']))) + if str(detail['task_id']) == '-1': + detail['task_id'] = debug_file.task_id + if str(detail['stream_id']) == '-1': + detail['stream_id'] = debug_file.stream_id + dump_file_info = self._find_dump_files_by_task_id(detail['task_id'], detail['stream_id'], + debug_file.dir_path) + res.append(' - First overflow file timestamp [%s] -' % debug_file.timestamp) + if dump_file_info is None: + self.log.warning("Can not find any dump file for debug file: %s, op task id: %s", debug_file.file_name, + detail['task_id']) + else: + dump_decode_files = self._decode_file(dump_file_info) + # sort input/output & index + sorted(dump_decode_files, key=lambda x: x.idx) + for anchor_type in ['input', 'output']: + for dump_decode_file in dump_decode_files: + if dump_decode_file.type != anchor_type: + continue + res.append(' ├─ %s' % dump_decode_file.file_name) + res.append(' └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(dump_decode_file.path)) + res.insert(0, '[green][%s][%s][/green] %s' % (dump_file_info.op_type, dump_file_info.task_id, + dump_file_info.op_name)) + return Constant.NEW_LINE.join(res) + + @staticmethod + def _decode_file(file_info, debug=False): + file_name = file_info.file_name + if debug: + decode_files = util.list_debug_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name) + else: + decode_files = util.list_npu_dump_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name) + if len(decode_files) == 0: + # decode info file + util.convert_dump_to_npy(file_info.path, cfg.OVERFLOW_DECODE_DIR) + if debug: + decode_files = util.list_debug_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name) + else: + decode_files = util.list_npu_dump_decode_files(cfg.OVERFLOW_DECODE_DIR, file_name) + if len(decode_files) == 0: + raise PrecisionToolException("Decode overflow debug file: %s failed." % file_name) + decode_files = sorted(decode_files.values(), key=lambda x: x.timestamp) + return decode_files + + @staticmethod + def _find_dump_files_by_task_id(task_id, stream_id, search_dir): + dump_files = util.list_npu_dump_files(search_dir) + dump_file_list = [item for item in dump_files.values() if item.op_type != 'Opdebug'] + dump_file_list = sorted(dump_file_list, key=lambda x: x.timestamp) + for dump_file in dump_file_list: + if dump_file.task_id == int(task_id) and dump_file.stream_id == int(stream_id): + return dump_file + return None + + def _decode_ai_core_status(self, status): + error_code = [] + if type(status) is not int: + return error_code + bin_status = ''.join(reversed(bin(status))) + prefix = '' + self.log.debug('Decode AI Core Overflow status:[%s]', hex(status)) + for i in range(len(bin_status)): + if bin_status[i] == '1': + if hex(int('1' + prefix, 2)) not in AI_CORE_OVERFLOW_STATUS: + self.log.warning("Unknown AI Core overflow status: [%s]", hex(int('1' + prefix, 2))) + continue + error_code.append(AI_CORE_OVERFLOW_STATUS[hex(int('1' + prefix, 2))]) + prefix += '0' + return error_code + + def _decode_l2_atomic_add_status(self, status): + if type(status) is not int: + return 'status is not int.' + code, _ = self._sub_bin_code(status, 16, 18) + if code in L2_ATOMIC_ADD_STATUS: + return L2_ATOMIC_ADD_STATUS[code] + return 'Status invalid' + + def _decode_dha_atomic_add_status(self, status): + if type(status) is not int: + return 'status is not int.' + _, code = self._sub_bin_code(status, 8, 15) + if code in DHA_ATOMIC_ADD_STATUS: + return DHA_ATOMIC_ADD_STATUS[status] + return 'Status invalid' + + @staticmethod + def _sub_bin_code(status, start, end): + """ Get specific bit code from status in bin format + :param status: status num + :param start: start bit + :param end: end bit + :return: result in bin format and hex format + """ + bin_code = bin(status).replace('0b', '') + append_num = end + 1 - len(bin_code) + if append_num > 0: + bin_list = ['0'] * append_num + bin_list.append(bin_code) + bin_code = ''.join(bin_list) + bin_start = len(bin_code) - end - 1 + bin_end = len(bin_code) - start + bin_start = max(0, bin_start) + bin_code = bin_code[bin_start: bin_end] + return bin_code, hex(int(bin_code, 2)) + + diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/tf_adapter.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/tf_adapter.py new file mode 100644 index 000000000..980b9fe33 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/adapter/tf_adapter.py @@ -0,0 +1,200 @@ +# coding=utf-8 +import os +from ..util.util import util +from ..config import config as cfg +FLAG_DUMP_GE_GRAPH = 'DUMP_GE_GRAPH' +FLAG_DUMP_GRAPH_LEVEL = 'DUMP_GRAPH_LEVEL' +FLAG_DUMP_GRAPH_PATH = 'DUMP_GRAPH_PATH' +FLAG_NPU_DUMP_GRAPH = 'NPU_DUMP_GRAPH' +FUSION_SWITCH_FILE = os.path.join(os.path.dirname(__file__), '../config/fusion_switch.cfg') +FUSION_OFF_FILE = os.path.join(os.path.dirname(__file__), '../config/fusion_off.cfg') + + +class TfAdapter(object): + def __init__(self): + self.log = util.get_log() + + def sess_dump(self, sess): + """wrapper session with dumping debug wrapper. + In session run mode. Use sess=sess_dump(sess) + :param sess: origin session + :return: Session + """ + from tensorflow.python import debug as tf_debug + self._init() + return tf_debug.DumpingDebugWrapperSession(sess, cfg.TF_DEBUG_DUMP_DIR) + + def estimator_dump(self): + """In estimator mode. estim_spec = tf.estimator.EstimatorSpec(traing_hooks=[estimator_dump()]) + :return: + """ + from tensorflow.python import debug as tf_debug + self._init() + return tf_debug.DumpingDebugHook(cfg.TF_DEBUG_DUMP_DIR) + + def session_dump_config(self, session_config=None, action=None, dump_layer=None): + """ + In TF session mode. set dump_config in session_config. + exp. config = session_dump_config() + config.[set your own configs] + with tf.Session(config=config) as sess: + sess.run(_) + tf_debug.LocalCLIDebugWrapperSession(sess=sess, ui_type="readline") + :param session_config: original session config + :param action: if set action, no need to start app with cli wrapper + :return: config_pb2.ConfigProto + """ + from tensorflow.core.protobuf import config_pb2 + from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig + if ((not isinstance(session_config, config_pb2.ConfigProto)) and + (not issubclass(type(session_config), config_pb2.ConfigProto))): + session_config = config_pb2.ConfigProto() + custom_op = None + for existed_custom_op in session_config.graph_options.rewrite_options.custom_optimizers: + if existed_custom_op.name == 'NpuOptimizer': + custom_op = existed_custom_op + if custom_op is None: + custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = 'NpuOptimizer' + custom_op.parameter_map['use_off_line'].b = True + self.update_custom_op(custom_op, action, dump_layer) + session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + return session_config + + def estimator_dump_config(self, action=None): + """return DumpConfig. + In estimator mode. set dump_config in NPURunConfig(). + exp. config = NPURunConfig(dump_config=estimator_dum_config(), session_config=session_config) + :return: DumpConfig + """ + from npu_bridge.npu_init import DumpConfig + self._init() + if self._is_overflow(action): + config = DumpConfig(enable_dump_debug=True, dump_path=cfg.NPU_OVERFLOW_DUMP_DIR, dump_mode="all") + elif self._is_dump(action): + config = DumpConfig(enable_dump=True, dump_path=cfg.DEFAULT_NPU_DUMP_DIR, dump_step=cfg.TF_DUMP_STEP, + dump_mode="all") + else: + config = DumpConfig() + return config + + def npu_device_dump_config(self, npu_device, action): + """For tf2.x + :param npu_device: npu_device + :param action: dump | overflow| fusion_off | fusion_switch + :return: npu_device + """ + self._init() + if self._is_overflow(action): + npu_device.global_options().dump_config.enable_dump_debug = True + npu_device.global_options().dump_config.dump_path = cfg.NPU_OVERFLOW_DUMP_DIR + npu_device.global_options().dump_config.dump_debug_mode = "all" + npu_device.global_options().op_debug_level = cfg.OP_DEBUG_LEVEL + if self._is_dump(action): + npu_device.global_options().dump_config.enable_dump = True + npu_device.global_options().dump_config.dump_path = cfg.DEFAULT_NPU_DUMP_DIR + npu_device.global_options().dump_config.dump_mode = "all" + npu_device.global_options().op_debug_level = cfg.OP_DEBUG_LEVEL + npu_device.global_options().dump_config.dump_step = cfg.TF_DUMP_STEP + if self._is_dump_stats(action): + npu_device.global_options().dump_config.dump_data = "stats" + if self._is_fusion_off(action): + npu_device.global_options().fusion_switch_file = FUSION_OFF_FILE + print("[PrecisionTool] Set fusion switch file: ", FUSION_OFF_FILE) + if self._is_fusion_switch(action): + npu_device.global_options().fusion_switch_file = FUSION_SWITCH_FILE + print("[PrecisionTool] Set fusion switch file: ", FUSION_SWITCH_FILE) + return npu_device + + def update_custom_op(self, custom_op, action=None, dump_layer=None): + """Update custom_op + :param custom_op: origin custom op + :param action: dump | overflow | fusion_off | fusion_switch + :return: + """ + import tensorflow as tf + self._init() + custom_op.parameter_map['debug_dir'].s = tf.compat.as_bytes(cfg.DEFAULT_OP_DEBUG_DIR) + if self._is_overflow(action): + custom_op.parameter_map['enable_dump_debug'].b = True + custom_op.parameter_map['dump_debug_mode'].s = tf.compat.as_bytes("all") + custom_op.parameter_map['dump_path'].s = tf.compat.as_bytes(cfg.NPU_OVERFLOW_DUMP_DIR) + custom_op.parameter_map['op_debug_level'].i = cfg.OP_DEBUG_LEVEL + elif self._is_dump(action): + custom_op.parameter_map['enable_dump'].b = True + custom_op.parameter_map['dump_mode'].s = tf.compat.as_bytes("all") + custom_op.parameter_map['dump_path'].s = tf.compat.as_bytes(cfg.DEFAULT_NPU_DUMP_DIR) + custom_op.parameter_map['op_debug_level'].i = cfg.OP_DEBUG_LEVEL + custom_op.parameter_map['dump_step'].s = tf.compat.as_bytes(cfg.TF_DUMP_STEP) + if self._is_dump_stats(action): + custom_op.parameter_map['dump_data'].s = tf.compat.as_bytes("stats") + if dump_layer is not None: + custom_op.parameter_map['dump_layer'].s = tf.compat.as_bytes(dump_layer) + if self._is_fusion_off(action): + custom_op.parameter_map['fusion_switch_file'].s = tf.compat.as_bytes(FUSION_OFF_FILE) + print("[PrecisionTool] Set fusion switch file: ", FUSION_OFF_FILE) + elif self._is_fusion_switch(action): + custom_op.parameter_map['fusion_switch_file'].s = tf.compat.as_bytes(FUSION_SWITCH_FILE) + print("[PrecisionTool] Set fusion switch file: ", FUSION_SWITCH_FILE) + if self._is_prof(action): + custom_op.parameter_map["profiling_mode"].b = True + custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes('{"output":"%s",\ + "storage_limit": "1000MB","training_trace":"on","l2":"on","hccl":"on","task_trace":"on",\ + "aicpu":"on","fp_point":"", "bp_point":"","aic_metrics":"PipeUtilization","msproftx":"on"}' % + cfg.PROFILING_DIR) + return custom_op + + def _init(self): + util.create_dir(cfg.DEFAULT_OP_DEBUG_DIR) + util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR) + util.create_dir(cfg.DEFAULT_NPU_DUMP_DIR) + util.create_dir(cfg.DEFAULT_NPU_GRAPH_DIR) + util.create_dir(cfg.PROFILING_DIR) + self._set_dump_graph_flags() + + @staticmethod + def _set_dump_graph_flags(): + os.environ[FLAG_DUMP_GE_GRAPH] = str(cfg.DUMP_GE_GRAPH_VALUE) + os.environ[FLAG_DUMP_GRAPH_LEVEL] = str(cfg.DUMP_GRAPH_LEVEL_VALUE) + os.environ[FLAG_DUMP_GRAPH_PATH] = cfg.DEFAULT_NPU_GRAPH_DIR + os.environ[FLAG_NPU_DUMP_GRAPH] = 'true' + + @staticmethod + def _is_dump(action): + if action is not None: + return 'dump' in action + if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] == 'True': + print("[PrecisionTool] enable npu dump >======") + return True + return False + + @staticmethod + def _is_dump_stats(action): + if action is not None: + return 'dump_stats' in action + if cfg.PRECISION_TOOL_DUMP_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_DUMP_FLAG] == 'True': + print("[PrecisionTool] enable npu dump >======") + return True + return False + + @staticmethod + def _is_overflow(action): + if action is not None: + return 'overflow' in action + if cfg.PRECISION_TOOL_OVERFLOW_FLAG in os.environ and os.environ[cfg.PRECISION_TOOL_OVERFLOW_FLAG] == 'True': + print("[PrecisionTool] enable npu overflow >======") + return True + return False + + @staticmethod + def _is_fusion_off(action): + return 'fusion_off' in action if action is not None else False + + @staticmethod + def _is_fusion_switch(action): + return ('fusion_switch' in action) if action is not None else False + + @staticmethod + def _is_prof(action): + return ('prof' in action) if action is not None else False + -- Gitee From 78b5b652173db7e0c90f8904c566fadac171ac2a Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:43:49 +0000 Subject: [PATCH 27/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/compare/compare.py | 215 ++++++++++++++++++ .../lib/compare/compare_result.py | 143 ++++++++++++ 2 files changed, 358 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare_result.py diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare.py new file mode 100644 index 000000000..e166f5042 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare.py @@ -0,0 +1,215 @@ +# coding=utf-8 +""" +Compare +""" +import json +import os +import numpy as np +from .compare_result import CompareResult +from ..util.constant import Constant +from ..util.util import util +from ..config import config as cfg +from ..util.precision_tool_exception import PrecisionToolException +from ..util.precision_tool_exception import catch_tool_exception + + +class Compare(object): + def __init__(self): + """Init""" + super(Compare, self).__init__() + self.log = util.get_log() + self.vector_compare_results = {} + + @catch_tool_exception + def prepare(self): + util.create_dir(cfg.VECTOR_COMPARE_PATH) + + def npu_tf_vector_compare(self, graphs, npu_root_dir, tf_root_dir, result_dir): + """Compare npu dump data with tf dump data + :param graphs: npu ge graph json file list + :param npu_root_dir: + :param tf_root_dir: + :param result_dir: result dir + :return: + """ + for graph_file in graphs: + self.log.info("Compare npu tf with graph %s", graph_file) + sub_graphs = self._get_sub_graphs(graph_file) + if sub_graphs is None: + continue + for sub_graph in sub_graphs: + npu_dir = self._get_sub_dir_by_sub_graph_name(sub_graph, npu_root_dir) + + if npu_dir is None: + self.log.warning("Can not find any sub graph dir named %s", npu_dir) + # for some infer case, sub_graph name may not match sub dir name. + npu_dir_0 = self._get_sub_dir_by_sub_graph_name(sub_graph + '_0', npu_root_dir) + if npu_dir_0 is None: + self.log.warning("Can not find any sub graph dir named %s", npu_dir_0) + continue + npu_dir = npu_dir_0 + self.vector_compare(npu_dir, tf_root_dir, result_dir, graph_file) + + @catch_tool_exception + def _get_sub_dir_by_sub_graph_name(self, sub_graph, npu_root_dir): + sub_graph_dirs = [] + for dir_path, dir_names, file_names in os.walk(npu_root_dir, followlinks=True): + if sub_graph in dir_names: + # walk sub graph dir + for sub_dir_path, sub_dir_names, sub_file_names in os.walk(os.path.join(dir_path, sub_graph), + followlinks=True): + if len(sub_dir_names) == 0: + sub_graph_dirs.append(sub_dir_path) + if len(sub_graph_dirs) == 0: + return None + self.log.warning("Find [%d] dirs in sub graph dir [%s], %s, compare first.", len(sub_graph_dirs), sub_graph, + sub_graph_dirs) + return sub_graph_dirs[0] + + @catch_tool_exception + def _get_sub_graphs(self, graph_file): + with open(graph_file, 'r') as f: + graph_json = json.load(f) + if 'graph' not in graph_json: + raise PrecisionToolException("No graph in file: %s" % graph_file) + sub_graphs = [] + for graph in graph_json['graph']: + sub_graphs.append(graph['name']) + return sub_graphs + + ''' + @staticmethod + def _get_ge_default_dirs(self, root_dir): + for dir_path, dir_names, file_names in os.walk(root_dir, followlinks=True): + for dir_name in dir_names: + ''' + + def npu_vector_compare(self, debug_0_root_dir, debug_1_root_dir): + """Compare two npu dump data + :param debug_0_root_dir: + :param debug_1_root_dir: + :return: + """ + # debug_0_sub_dirs = self._get_ge_default_dirs(debug_0_root_dir) + # debug_1_sub_dirs = self._get_ge_default_dirs(debug_1_root_dir) + + def vector_compare(self, lh_path, rh_path, result_dir, graph_json=None): + """Compare all ops""" + if lh_path is None or util.empty_dir(lh_path): + raise PrecisionToolException("No valid dump file in %s" % lh_path) + if rh_path is None or util.empty_dir(rh_path): + raise PrecisionToolException("No valid dump file in %s" % rh_path) + self.log.info("Start vector compare process...") + util.compare_vector(lh_path, rh_path, graph_json, result_dir) + self.log.info("Vector compare process finish.") + + def _get_compare_result_by_file_name(self, file_name): + results = [] + if file_name is None: + sub_dir = util.get_newest_dir(cfg.VECTOR_COMPARE_PATH) + if sub_dir == '': + raise PrecisionToolException("Empty vector compare path:%s" % cfg.VECTOR_COMPARE_PATH) + file_name = os.path.join(cfg.VECTOR_COMPARE_PATH, sub_dir) + if os.path.isfile(file_name): + results.append(CompareResult(file_name)) + if os.path.isdir(file_name): + vector_compare_result_files = util.list_vector_compare_result_files(file_name) + if vector_compare_result_files is None or len(vector_compare_result_files) == 0: + raise PrecisionToolException("Can not find any vector compare result in dir:%s" % file_name) + file_list = sorted(vector_compare_result_files.values(), key=lambda x: x.timestamp) + file_names = [x.file_name for x in file_list] + self.log.debug("Find %s result files in dir %s", file_names, file_name) + for file in file_list: + results.append(CompareResult(file.path)) + return results + + @catch_tool_exception + def vector_summary(self, file_name=None, cos_sim_threshold=0.98, limit=1): + """Print not NaN result in vector compare result""" + compare_results = self._get_compare_result_by_file_name(file_name) + error_ops = [] + for compare_result in compare_results: + err_ops = compare_result.get_op_by_cosine_sim_threshold(cos_sim_threshold, limit) + self.log.info("Find %d ops less then %s in %s", len(err_ops), cos_sim_threshold, compare_result.file_path) + error_ops.extend(err_ops) + if len(error_ops) == 0: + self.log.info("Can not find any compare result over threshold: %s" % cos_sim_threshold) + else: + for i, error_op in enumerate(error_ops): + if i < limit: + error_op.summary(cos_sim_threshold) + return error_ops + + def compare_data(self, left, right, save_txt=False, rl=0.001, al=0.001, diff_count=20): + """Compare data""" + left = self._detect_file(left) + right = self._detect_file(right) + if left is None or right is None: + raise PrecisionToolException("invalid input or output") + # save to txt + if save_txt: + util.save_npy_to_txt(left) + util.save_npy_to_txt(right) + # compare data + total_cnt, all_close, cos_sim, err_percent = self._do_compare_data(left, right, rl, al, diff_count) + content = ['Left:', ' ├─ NpyFile: %s' % left] + if save_txt: + content.append(' ├─ TxtFile: [green]%s.txt[/green]' % left) + content.append(' └─ NpySpec: [yellow]%s[/yellow]' % util.gen_npy_info_txt(left)) + content.append('Right:') + content.append(' ├─ NpyFile: %s' % right) + if save_txt: + content.append(' ├─ TxtFile: [green]%s.txt[/green]' % right) + content.append(' └─ NpySpec: [yellow]%s[/yellow]' % util.gen_npy_info_txt(right)) + content.append('NumCnt: %s' % total_cnt) + content.append('AllClose: %s' % all_close) + content.append('CosSim: %s' % cos_sim) + content.append('ErrorPer: %s (rl= %s, al= %s)' % (err_percent, rl, al)) + util.print_panel(Constant.NEW_LINE.join(content)) + + def _do_compare_data(self, left, right, rl=0.001, al=0.001, diff_count=20): + data_left = np.load(left).astype(np.float32) + data_right = np.load(right).astype(np.float32) + shape_left = data_left.shape + shape_right = data_right.shape + if shape_left != shape_right: + self.log.warning("Data shape not equal: %s vs %s", data_left.shape, data_right.shape) + data_left = data_left.reshape(-1) + data_right = data_right.reshape(-1) + if data_left.shape[0] != data_right.shape[0]: + self.log.warning("Data size not equal: %s vs %s", data_left.shape, data_right.shape) + if data_left.shape[0] < data_right.shape[0]: + data_left = np.pad(data_left, (0, data_right.shape[0] - data_left.shape[0]), 'constant') + else: + data_right = np.pad(data_right,(0, data_left.shape[0] - data_right.shape[0]), 'constant') + all_close = np.allclose(data_left, data_right, atol=al, rtol=rl) + # cos_sim = 1 - spatial.distance.cosine(data_left, data_right) + cos_sim = np.dot(data_left, data_right) / ( + np.sqrt(np.dot(data_left, data_left)) * np.sqrt(np.dot(data_right, data_right))) + err_cnt = 0 + total_cnt = data_left.shape[0] + diff_table_columns = ['Index', 'Left', 'Right', 'Diff'] + err_table = util.create_table("Error Item Table", diff_table_columns) + top_table = util.create_table("Top Item Table", diff_table_columns) + for i in range(total_cnt): + abs_diff = abs(data_left[i] - data_right[i]) + if i < diff_count: + top_table.add_row(str(i), str(data_left[i]), str(data_right[i]), str(abs_diff)) + if abs_diff > (al + rl * abs(data_right[i])): + if err_cnt < diff_count: + err_table.add_row(str(i), str(data_left[i]), str(data_right[i]), str(abs_diff)) + err_cnt += 1 + err_percent = float(err_cnt / total_cnt) + util.print(util.create_columns([err_table, top_table])) + return total_cnt, all_close, cos_sim, err_percent + + def _detect_file(self, file_name): + """Find files in npu/overflow/cpu dump dir""" + if os.path.isfile(file_name): + return file_name + for parent_dir in [cfg.TMP_DIR, cfg.TF_DUMP_DIR]: + file_infos = util.list_numpy_files(parent_dir, file_name) + if len(file_infos) > 0: + self.log.info("Find %s, choose first one.", list(file_infos.keys())) + return list(file_infos.values())[0].path + return None diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare_result.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare_result.py new file mode 100644 index 000000000..5e67dd24d --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/compare/compare_result.py @@ -0,0 +1,143 @@ +import collections +import os +import numpy as np +from ..util.util import util +from ..util.constant import Constant +from ..util.precision_tool_exception import PrecisionToolException +from ..util.precision_tool_exception import catch_tool_exception + + +class RowMap(object): + """ + 'Index': 0, + 'LeftOp': 1, + 'RightOp': 2, + 'TensorIdx': 3, # TensorIndex + 'CosSim': 4, # CosineSimilarity + 'MaxAbs': 5, # MaxAbsoluteError + 'ARE': 6, # AccumulatedRelativeError + 'RED': 7, # RelativeEuclideanDistance + 'KLD': 8, # KullbackLeiblerDivergence + 'StandardDeviation': 9 # StandardDeviation + """ + def __init__(self, item=None): + self.index = 0 + self.left = 1 + self.right = 2 + self.tensor_index = 3 + self.cosine_similarity = 4 + self.max_abs = 5 + if item is not None: + self.update(item) + + def update(self, item): + for i, value in enumerate(item): + self.left = i if value == 'LeftOp' else self.left + self.right = i if value == 'RightOp' else self.right + self.tensor_index = i if value == 'TensorIndex' else self.tensor_index + self.cosine_similarity = i if value == 'CosineSimilarity' else self.cosine_similarity + self.max_abs = i if value == 'MaxAbsoluteError' else self.max_abs + + +class CompareItem(object): + def __init__(self, op_name, item, row_map): + self.row_map = row_map + self.index = int(item[self.row_map.index]) + self.op_name = op_name + self.left = item[self.row_map.left].split(" ") + self.right = item[self.row_map.right].split(" ") + self.input = [] + self.output = [] + + def update(self, item): + tensor_index = item[self.row_map.tensor_index] + if tensor_index not in ['NaN', '*']: + item_detail = tensor_index.split(':') + if len(item_detail) != 3: + raise PrecisionToolException("item:%d tensor index invalid. [%s]" % ( + item[self.row_map.index], tensor_index)) + if item_detail[1] == 'input': + self.input.insert(int(item_detail[2]), item) + else: + self.output.insert(int(item_detail[2]), item) + + def is_cosine_sim_over_threshold(self, threshold): + for item in self.output: + if item[self.row_map.cosine_similarity] == 'NaN': + continue + if float(item[self.row_map.cosine_similarity]) <= threshold: + return True + return False + + @staticmethod + def _color_data(data, threshold): + try: + data = float(data) + if np.isnan(data): + raise ValueError + elif data <= threshold: + return "[red]%s[/red]" % data + else: + return "[green]%s[/green]" % data + except ValueError: + return "[yellow]%s[/yellow]" % data + + def summary(self, threshold): + content = ["Left: %s" % self.left, "Right: %s" % self.right, "Input: "] + input_txt = [] + for i, item in enumerate(self.input): + input_txt.append(" - [%d]%s" % (i, self._color_data(item[self.row_map.cosine_similarity], threshold))) + content.extend([Constant.TAB_LINE.join(input_txt), "Output:"]) + output_txt = [] + for i, item in enumerate(self.output): + output_txt.append(" - [%d]%s" % (i, self._color_data(item[self.row_map.cosine_similarity], threshold))) + content.append(Constant.TAB_LINE.join(output_txt)) + title = "[%d] %s" % (self.index, self.op_name) + util.print_panel(Constant.NEW_LINE.join(content), title=title) + + +class CompareResult(object): + def __init__(self, file_path): + self.file_path = file_path + self.ops = None + self.prepare() + + @catch_tool_exception + def prepare(self): + if not str(self.file_path).endswith(Constant.Suffix.CSV): + raise PrecisionToolException("Compare result file %s not a csv file." % self.file_path) + if not os.path.isfile(self.file_path): + raise PrecisionToolException("Compare result file %s not exist." % self.file_path) + items = util.read_csv(self.file_path) + self.ops = collections.OrderedDict() + row_map = RowMap() + for item in items: + if item[row_map.index] == 'Index': + row_map.update(item) + continue + if item[row_map.tensor_index] in ['NaN', '*']: + continue + tensor_index = item[row_map.tensor_index] + op_name = tensor_index.split(":")[0] + if op_name not in self.ops: + self.ops[op_name] = CompareItem(op_name, item, row_map) + op = self.ops[op_name] + op.update(item) + + def get_compare_item_by_op(self, op_name): + if self.ops is None: + self.prepare() + if self.ops is None: + raise PrecisionToolException("Invalid compare result file: %s" % self.file_path) + if op_name in self.ops: + return self.ops[op_name] + return None + + def get_op_by_cosine_sim_threshold(self, threshold, limit=-1): + result = [] + for compare_item in self.ops.values(): + if compare_item.is_cosine_sim_over_threshold(threshold): + result.append(compare_item) + if len(result) == limit: + break + return result -- Gitee From e9ff37c666b10bce48f9b29e2bfe5d9291f4f340 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:44:07 +0000 Subject: [PATCH 28/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/config/config.py | 114 ++++++++++++++++++ .../precision_tool/lib/config/fusion_off.cfg | 10 ++ .../lib/config/fusion_switch.cfg | 6 + 3 files changed, 130 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/config.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_off.cfg create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_switch.cfg diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/config.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/config.py new file mode 100644 index 000000000..f9568666c --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/config.py @@ -0,0 +1,114 @@ +# coding=utf-8 +import os + +# Dump config '0|5|10' +TF_DUMP_STEP = '0' + +# path to run package operator cmp compare +# default may be /usr/local/Ascend/ +CMD_ROOT_PATH = '/usr/local/Ascend/' +ASCEND_SET_ENV = os.path.join(CMD_ROOT_PATH, 'bin/setenv.bash') + + +# ASCEND Log Path +ASCEND_LOG_PATH = '/root/ascend/log/plog/' + +# TOOL CONFIG +LOG_LEVEL = "NOTSET" +ROOT_DIR = '' + +# [train/infer] if adapt from msquickcmp result, set net type to infer +NET_TYPE = 'infer' + +''' +precision_data/ +├── npu +│ ├── debug_0 +| | ├── dump +| | | └── 20210510101133 +| │ └── graph +| | └── ge_proto_00000179_PreRunAfterBuild.txt +│ └── debug_1 +├── tf +| ├── tf_debug +| └── dump +├── overflow +├── fusion +└── temp + ├── op_graph + ├── decode + | ├── dump_decode + | ├── overflow_decode + | └── dump_convert + └── vector_compare + ├── 20210510101133 + | ├── result_123456.csv + | └── result_123455.csv + └── 20210510101134 + └── result_123458.csv +''' + +# Static dirs, do not change +DATA_ROOT_DIR = os.path.join(ROOT_DIR, 'precision_data') + +# fusion +FUSION_DIR = os.path.join(DATA_ROOT_DIR, 'fusion') + +# npu dump/graph parent dir +NPU_DIR = os.path.join(DATA_ROOT_DIR, 'npu') +DEFAULT_NPU_DIR = os.path.join(NPU_DIR, 'debug_0') +DEFAULT_NPU_DUMP_DIR = os.path.join(DEFAULT_NPU_DIR, 'dump') +DEFAULT_NPU_GRAPH_DIR = os.path.join(DEFAULT_NPU_DIR, 'graph') +PROFILING_DIR = os.path.join(DEFAULT_NPU_DIR, 'prof') +DEFAULT_OP_DEBUG_DIR = DEFAULT_NPU_DIR + +# npu overflow dir +OVERFLOW_DIR = os.path.join(DATA_ROOT_DIR, 'overflow') +NPU_OVERFLOW_DUMP_DIR = os.path.join(OVERFLOW_DIR, 'dump') + +# tf dirs +TF_DIR = os.path.join(DATA_ROOT_DIR, 'tf') +TF_DEBUG_DUMP_DIR = os.path.join(TF_DIR, 'tf_debug') +TF_DUMP_DIR = os.path.join(TF_DIR, 'dump') +TF_GRAPH_DIR = os.path.join(TF_DIR, 'graph') +# tf checkpoints +TF_CKPT_ROOT = os.path.join(TF_DIR, 'checkpoints') +TF_CKPT_FILE = os.path.join(TF_CKPT_ROOT, 'ckpt') +TF_CKPT_INPUT_DIR = os.path.join(TF_CKPT_ROOT, 'input') + +# pytroch dirs +PT_DIR = os.path.join(DATA_ROOT_DIR, 'pt') +PT_NPU_DIR = os.path.join(PT_DIR, 'npu') +PT_GPU_DIR = os.path.join(PT_DIR, 'gpu') + +# tmp dirs +TMP_DIR = os.path.join(DATA_ROOT_DIR, 'temp') +OP_GRAPH_DIR = os.path.join(TMP_DIR, 'op_graph') + +DECODE_DIR = os.path.join(TMP_DIR, 'decode') +OVERFLOW_DECODE_DIR = os.path.join(DECODE_DIR, 'overflow_decode') +DUMP_DECODE_DIR = os.path.join(DECODE_DIR, 'dump_decode') +PT_DUMP_DECODE_DIR = os.path.join(DECODE_DIR, 'pt') +DUMP_CONVERT_DIR = os.path.join(DECODE_DIR, 'dump_convert') + +VECTOR_COMPARE_PATH = os.path.join(TMP_DIR, 'vector_compare') +TF_TENSOR_NAMES = os.path.join(TMP_DIR, 'tf_tensor_names.txt') +TF_TENSOR_DUMP_CMD = os.path.join(TMP_DIR, 'tf_tensor_cmd.txt') + +# FLAG +PRECISION_TOOL_OVERFLOW_FLAG = 'PRECISION_TOOL_OVERFLOW' +PRECISION_TOOL_DUMP_FLAG = 'PRECISION_TOOL_DUMP' + +# for previous version, set 0 +OP_DEBUG_LEVEL = 4 +# DUMP CONFIG +DUMP_GE_GRAPH_VALUE = 2 +DUMP_GRAPH_LEVEL_VALUE = 3 +DUMP_SEED = 2022 + +# TF_DEBUG +TF_DEBUG_TIMEOUT = 360 + +# MSACCUCMP +MS_ACCU_CMP = r'msaccucmp.py[c]?' +BUILD_JSON_GRAPH_NAME = 'Build' diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_off.cfg b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_off.cfg new file mode 100644 index 000000000..823672b74 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_off.cfg @@ -0,0 +1,10 @@ +{ + "Switch": { + "GraphFusion": { + "ALL": "off" + }, + "UBFusion": { + "ALL": "off" + } + } +} \ No newline at end of file diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_switch.cfg b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_switch.cfg new file mode 100644 index 000000000..572ad4271 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/config/fusion_switch.cfg @@ -0,0 +1,6 @@ +{ +"Switch": { + "GraphFusion": {}, + "UBFusion": {} +} +} \ No newline at end of file -- Gitee From 653bdfcdafc56c75c0f6eb379ca5c4975c610799 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:44:26 +0000 Subject: [PATCH 29/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/dump/dump_manager.py | 89 ++++++++ .../precision_tool/lib/dump/npu_dump.py | 200 ++++++++++++++++++ .../precision_tool/lib/dump/pt_dump.py | 65 ++++++ .../precision_tool/lib/dump/tf_dump.py | 141 ++++++++++++ 4 files changed, 495 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/dump_manager.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/npu_dump.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/pt_dump.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/tf_dump.py diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/dump_manager.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/dump_manager.py new file mode 100644 index 000000000..939f4fa47 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/dump_manager.py @@ -0,0 +1,89 @@ +# coding=utf-8 +import os +import collections +from ..util.util import util +from ..util.constant import Constant +from .npu_dump import NpuDump +from .tf_dump import TfDump +from .pt_dump import PtDump +from ..config import config as cfg + + +class DumpManager(object): + def __init__(self): + self.npu_dumps = collections.OrderedDict() + self.pt_dump = PtDump(cfg.PT_DIR) + self.tf_dump = TfDump(cfg.TF_DUMP_DIR) + self._init_dirs() + + def prepare(self): + # 1. prepare npu dump + sub_dirs = os.listdir(cfg.NPU_DIR) + if len(sub_dirs) == 0: + # create default + sub_dirs = [Constant.DEFAULT_DEBUG_ID] + sorted(sub_dirs) + for sub_dir in sub_dirs: + npu_dump = NpuDump(sub_dir) + npu_dump.prepare() + self.npu_dumps[sub_dir] = npu_dump + # 2. prepare tf dump + self.tf_dump.prepare() + # 3. prepare pt dump + self.pt_dump.prepare() + + def get_dump_root_dir(self, debug_id): + if debug_id in self.npu_dumps: + return self.npu_dumps[debug_id].dump_root + return None + + def op_dump_summary(self, ops): + npu_result = collections.OrderedDict() + for debug_id, op in ops.items(): + if debug_id in self.npu_dumps: + npu_result[debug_id] = collections.OrderedDict() + for op_detail in op: + npu_result[debug_id][op_detail.graph_name] = self.npu_dumps[debug_id].op_dump_summary(op_detail) + tf_result = None + if self.tf_dump is not None and len(ops[Constant.DEFAULT_DEBUG_ID]) != 0: + tf_result = self.tf_dump.op_dump_summary(ops[Constant.DEFAULT_DEBUG_ID][0]) + return npu_result, tf_result + + def pt_dump_summary(self, ir_name): + """Pytorch dump summary""" + return self.pt_dump.op_dump_summary(ir_name) + + def convert_npu_dump(self, name, data_format=None, dst_path=None): + for _, npu_dump in enumerate(self.npu_dumps.values()): + npu_dump.convert_npu_dump(name, data_format, dst_path) + + def print_tensor(self, file_name, is_convert): + """Print numpy data file""" + if os.path.isfile(file_name): + return util.print_npy_summary(os.path.dirname(file_name), os.path.basename(file_name), is_convert) + # file_name = file_name.replace('/', '_') + # npu decode file + npu_convert_files = self.npu_dumps[Constant.DEFAULT_DEBUG_ID].get_npu_dump_decode_files_by_name(file_name) + self._print_tensors(npu_convert_files, is_convert) + # util.list_npu_dump_convert_files(cfg.DECODE_DIR, file_name) + # tf decode file + tf_decode_files = self.tf_dump.get_dump_files_by_name(file_name, True) + self._print_tensors(tf_decode_files, is_convert) + # pt decode file + pt_decode_files = self.pt_dump.get_dump_files_by_name(file_name) + self._print_tensors(pt_decode_files, is_convert) + + @staticmethod + def _print_tensors(file_infos, is_convert): + if file_infos is not None: + for file_info in file_infos.values(): + util.print_npy_summary(file_info.dir_path, file_info.file_name, is_convert) + + @staticmethod + def _init_dirs(): + """Create dump file dirs""" + util.create_dir(cfg.DUMP_DECODE_DIR) + util.create_dir(cfg.NPU_OVERFLOW_DUMP_DIR) + util.create_dir(cfg.OVERFLOW_DECODE_DIR) + util.create_dir(cfg.TF_DUMP_DIR) + util.create_dir(cfg.PT_DIR) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/npu_dump.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/npu_dump.py new file mode 100644 index 000000000..7e3e4a9d8 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/npu_dump.py @@ -0,0 +1,200 @@ +# coding=utf-8 +import os +import re +from ..util.util import util +from ..util.constant import Constant +from ..util.precision_tool_exception import catch_tool_exception +from ..util.precision_tool_exception import PrecisionToolException +from ..config import config as cfg + + +class NpuDumpDecodeFile(object): + def __init__(self): + self.log = util.get_log() + self.input_files = {} + self.output_files = {} + self.timestamp = -1 + self.op_name = '' + self.op_type = '' + self.task_id = -1 + # self.stream_id = -1 + + def update(self, file_info): + """Prepare op npu decode file map.""" + if not self._check(file_info): + self.log.warning('Invalid NpuDumpDecodeFile: %s', file_info) + return + if file_info.type == 'input': + self.input_files[file_info.idx] = file_info + else: + self.output_files[file_info.idx] = file_info + + def summary(self): + txt = ['[yellow][%s][TaskID: %d][/yellow][green][%s][/green] %s' % ( + self.timestamp, self.task_id, self.op_type, self.op_name)] + if len(self.input_files) > 0: + info = self.input_files[0] + shape, dtype, max_data, min_data, mean = util.npy_info(info.path) + txt.append(' - Input: [green][0][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % ( + shape, dtype, max_data, min_data, mean, info.file_name)) + for idx in range(1, len(self.input_files)): + info = self.input_files[idx] + shape, dtype, max_data, min_data, mean = util.npy_info(info.path) + txt.append(' [green][%d][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % ( + idx, shape, dtype, max_data, min_data, mean, info.file_name)) + if len(self.output_files) > 0: + info = self.output_files[0] + shape, dtype, max_data, min_data, mean = util.npy_info(info.path) + txt.append(' - Output: [green][0][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % ( + shape, dtype, max_data, min_data, mean, info.file_name)) + for idx in range(1, len(self.output_files)): + info = self.output_files[idx] + shape, dtype, max_data, min_data, mean = util.npy_info(info.path) + txt.append(' [green][%d][/green][yellow][%s][%s][Max:%d][Min:%d][Mean:%d][/yellow] %s' % ( + idx, shape, dtype, max_data, min_data, mean, info.file_name)) + return Constant.NEW_LINE.join(txt) + + def _check(self, file_info): + if self.timestamp == -1: + self.timestamp = file_info.timestamp + self.op_name = file_info.op_name + self.op_type = file_info.op_type + self.task_id = file_info.task_id + # self.stream_id = file_info['stream'] + return True + return self.timestamp == file_info['timestamp'] + + +class NpuDump(object): + def __init__(self, debug_id=Constant.DEFAULT_DEBUG_ID): + """Init""" + self.log = util.get_log() + self.debug_id = debug_id + npu_root = os.path.join(cfg.NPU_DIR, debug_id) + self.dump_root = os.path.join(npu_root, Constant.DUMP) + self.decode_dir = os.path.join(cfg.DUMP_DECODE_DIR, debug_id) + self.dump_files = None + self._init_dirs() + + def prepare(self): + """Prepare npu/cpu dump files""" + self._parse_dump_files() + + def get_dump_files_by_op(self, op): + """Get npu dump files by Op""" + npu_files = {} + op_name = op.name().replace('/', '_').replace('.', '_') + match_name = op.type() + '.' + op_name + '\\.' + for f in self.dump_files: + # match op name and graph name, infer dump directory may not has graph + if re.match(match_name, f) and (op.graph_name in self.dump_files[f].path or cfg.NET_TYPE == 'infer'): + npu_files[f] = self.dump_files[f] + return npu_files + + @catch_tool_exception + def op_dump_summary(self, op): + """ print op dump info""" + if op is None: + raise PrecisionToolException("Get None operator") + # search npu dump file by op name + npu_dump_files = self.get_npu_dump_decode_files_by_op(op) + npu_dump_files = sorted(npu_dump_files.values(), key=lambda x: (x.idx, x.timestamp)) + input_txt = ['NpuDumpInput:'] + output_txt = ['NpuDumpOutput:'] + for npu_dump_file in npu_dump_files: + if str(npu_dump_file.file_name).endswith(Constant.Suffix.CSV): + detail = util.read_csv(npu_dump_file.path) + input_txt.append(' -[%d]%s' % (npu_dump_file.idx, npu_dump_file.file_name)) + output_txt.append(' -[%d]%s' % (npu_dump_file.idx, npu_dump_file.file_name)) + for item in detail: + item_txt = '[Shape: %s] [Dtype: %s] [Max: %s] [Min: %s] [Mean: %s]' % (item[5], item[3], item[6], item[7], item[8]) + if item[0] == 'Input': + input_txt.append(' └─ [green][%s][/green][yellow]%s[/yellow]' % (item[1], item_txt)) + elif item[0] == 'Output': + output_txt.append(' └─ [green][%s][/green][yellow]%s[/yellow]' % (item[1], item_txt)) + continue + if npu_dump_file.type == 'input': + input_txt.append(' -[green][%s][/green] %s' % (npu_dump_file.idx, npu_dump_file.file_name)) + input_txt.append(' └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(npu_dump_file.path)) + else: + output_txt.append(' -[green][%s][/green] %s' % (npu_dump_file.idx, npu_dump_file.file_name)) + output_txt.append(' └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(npu_dump_file.path)) + input_txt.extend(output_txt) + return Constant.NEW_LINE.join(input_txt) + + def _init_dirs(self): + util.create_dir(self.dump_root) + util.create_dir(self.decode_dir) + + @catch_tool_exception + def _parse_dump_files(self): + """prepare npu dump, support soft link""" + sub_dir = util.get_newest_dir(self.dump_root) + sub_dir = os.path.join(self.dump_root, sub_dir) if sub_dir != '' else self.dump_root + self.dump_files = util.list_npu_dump_files(sub_dir) + + def list_dump(self, dir_path, file_name): + """list dump""" + + @staticmethod + def get_npu_dump_decode_files_by_name(file_name): + file_name = file_name.replace('/', '_') + return util.list_npu_dump_convert_files(cfg.DECODE_DIR, file_name) + + def get_npu_dump_decode_files_by_op(self, op): + """Get npu dump decode files by op""" + dump_files = self.get_dump_files_by_op(op) + result = {} + for dump_file_key in dump_files.keys(): + dump_file = dump_files[dump_file_key] + if str(dump_file.file_name).endswith(Constant.Suffix.CSV): + result.update({dump_file_key: dump_file}) + continue + dump_decode_files = util.list_npu_dump_decode_files(self.decode_dir, dump_file.file_name) + if len(dump_decode_files) == 0: + util.convert_dump_to_npy(dump_file.path, self.decode_dir) + dump_decode_files = util.list_npu_dump_decode_files(self.decode_dir, dump_file.file_name) + result.update(dump_decode_files) + return result + + def convert_npu_dump(self, name, data_format=None, dst_path=None): + """Convert npu dump to npy of data_format""" + if os.path.isfile(name): + # absolute path to file + self.log.info("Decode file: %s", name) + file_name = os.path.basename(name) + file_path = name + elif os.path.isdir(name): + # decode all files in path + self.log.info("Decode all files in path: %s", name) + file_name = '' + file_path = name + elif self.dump_files is not None and name in self.dump_files: + self.log.info("Decode npu dump file: %s in default dump path", name) + file_info = self.dump_files[name] + file_name = file_info.file_name + file_path = file_info.path + else: + # maybe op name + file_info = self._get_file_by_op_name(name) + if file_info is None: + raise PrecisionToolException("Can not find any op/dump file named %s" % name) + file_name = file_info.file_name + file_path = file_info.path + dst_path = cfg.DUMP_CONVERT_DIR if dst_path is None else dst_path + util.convert_dump_to_npy(file_path, dst_path, data_format) + dump_convert_files = util.list_npu_dump_convert_files(dst_path, file_name) + # print result info + + summary_txt = ['SrcFile: %s' % name] + for convert_file in dump_convert_files.values(): + summary_txt.append(' - %s' % convert_file.file_name) + util.print_panel(Constant.NEW_LINE.join(summary_txt)) + + def _get_file_by_op_name(self, op_name): + """Get dump file info by op name""" + op_name = op_name.replace('/', '_') + for file_info in self.dump_files.values(): + if file_info.op_name == op_name: + return file_info + return None diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/pt_dump.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/pt_dump.py new file mode 100644 index 000000000..27fd006dc --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/pt_dump.py @@ -0,0 +1,65 @@ +# coding=utf-8 +from ..util.util import util +from ..util.h5_util import H5Util +from ..util.h5_util import gen_h5_data_name +from ..config import config as cfg +from ..util.constant import Constant + + +class PtDump(object): + def __init__(self, data_dir): + self.log = util.get_log() + self.npu = None + self.gpu = None + self.data_dir = data_dir + + def prepare(self): + util.create_dir(cfg.PT_NPU_DIR) + util.create_dir(cfg.PT_GPU_DIR) + util.create_dir(cfg.PT_DUMP_DECODE_DIR) + if not util.empty_dir(cfg.PT_NPU_DIR): + npu_h5_files = util.list_h5_files(cfg.PT_NPU_DIR) + if len(npu_h5_files) != 0: + file_list = sorted(npu_h5_files.values(), key=lambda x: x.timestamp) + self.npu = H5Util(file_list[0].path, prefix='npu') + if not util.empty_dir(cfg.PT_GPU_DIR): + gpu_h5_files = util.list_h5_files(cfg.PT_GPU_DIR) + if len(gpu_h5_files) != 0: + file_list = sorted(gpu_h5_files.values(), key=lambda x: x.timestamp) + self.gpu = H5Util(file_list[0].path, prefix='gpu') + + @staticmethod + def get_dump_files_by_name(file_name): + """Get dump files by name""" + npu_pattern = gen_h5_data_name(file_name, 'npu') if '/' in file_name else file_name + gpu_pattern = gen_h5_data_name(file_name, 'gpu') if '/' in file_name else file_name + files = util.list_numpy_files(cfg.PT_DUMP_DECODE_DIR, extern_pattern=npu_pattern) + files.update(util.list_numpy_files(cfg.PT_DUMP_DECODE_DIR, extern_pattern=gpu_pattern)) + return files + + def op_dump_summary(self, ir_name): + summary_list = [] + op_id = self._parse_op_id(ir_name) + if self.npu is not None: + h5_op = self.npu.get_op(op_id) + if h5_op is not None: + summary_list.append('NPU:') + summary_list.append(h5_op.summary()) + if self.gpu is not None: + h5_op = self.gpu.get_op(op_id) + if h5_op is not None: + summary_list.append('GPU:') + summary_list.append(h5_op.summary()) + return Constant.NEW_LINE.join(summary_list) + + @staticmethod + def _parse_op_id(ir_name): + op_id = str(ir_name) + if op_id.isnumeric(): + op_id = ir_name + else: + for name in op_id.split('/'): + if name.isnumeric(): + op_id = name + break + return op_id diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/tf_dump.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/tf_dump.py new file mode 100644 index 000000000..bb4230f54 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/dump/tf_dump.py @@ -0,0 +1,141 @@ +# coding=utf-8 +import os +import re +import time +import sys +from ..util.util import util +from ..util.constant import Constant +from ..util.precision_tool_exception import catch_tool_exception +from ..util.precision_tool_exception import PrecisionToolException +from ..config import config as cfg + + +class TfDump(object): + def __init__(self, dump_root=cfg.TF_DUMP_DIR): + self.log = util.get_log() + self.dump_root = dump_root + self.dump_files = None + + def prepare(self): + if not os.path.exists(self.dump_root): + util.create_dir(self.dump_root) + self._parse_dump_files() + + def get_dump_files_by_op(self, op): + """Get cpu dump files by op""" + tf_files = {} + for output in op.outputs(): + if output.data_dump_origin_name() != '': + tf_files.update(self.get_dump_files_by_name(output.data_dump_origin_name())) + if len(tf_files) == 0: + tf_files.update(self.get_dump_files_by_name(op.name())) + return tf_files + + def get_dump_files_by_name(self, name, likely=False): + match_name = name.replace('/', '_') + if not likely: + match_name = match_name.replace('.', '_') + '\\.' + tf_files = {} + for f in self.dump_files: + if re.match(match_name, f): + tf_files[f] = self.dump_files[f] + return tf_files + + @catch_tool_exception + def op_dump_summary(self, op): + # cpu dump info + if op is None: + return '' + cpu_dump_txt = ['TfDumpOutput:'] + cpu_dump_files = self.get_dump_files_by_op(op) + cpu_dump_file_list = sorted(cpu_dump_files.values(), key=lambda x: x.timestamp) + for cpu_dump_file in cpu_dump_file_list: + cpu_dump_txt.append(' -[green][%s][/green] %s' % (cpu_dump_file.idx, cpu_dump_file.file_name)) + cpu_dump_txt.append(' └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(cpu_dump_file.path)) + return Constant.NEW_LINE.join(cpu_dump_txt) + + def _parse_dump_files(self): + self.dump_files = util.list_cpu_dump_decode_files(self.dump_root) + + def run_tf_dbg_dump(self, cmd_line=None): + """Run tf train script to get dump data.""" + if os.path.exists(cfg.TF_DEBUG_DUMP_DIR) and len(os.listdir(cfg.TF_DEBUG_DUMP_DIR)) != 0: + self.log.info("TF offline debug path [%s] is not empty, will analyze it directly." % cfg.TF_DEBUG_DUMP_DIR) + elif cmd_line is not None: + self.log.info("Run command: %s" % cmd_line) + util.execute_command(cmd_line) + self.log.info("Run finish, start analyze TF dump.") + if not os.path.exists(cfg.TF_DEBUG_DUMP_DIR) or len(os.listdir(cfg.TF_DEBUG_DUMP_DIR)) == 0: + raise PrecisionToolException("Empty tf debug dir. %s" % cfg.TF_DEBUG_DUMP_DIR) + run_dirs = os.listdir(cfg.TF_DEBUG_DUMP_DIR) + run_dirs.sort() + # create dirs + util.create_dir(cfg.TF_DUMP_DIR) + util.create_dir(cfg.TMP_DIR) + # extra the last run dir + for run_dir in run_dirs: + time.sleep(1) + command = "%s -m tensorflow.python.debug.cli.offline_analyzer --ui_type readline --dump_dir %s" % ( + util.python, os.path.join(cfg.TF_DEBUG_DUMP_DIR, run_dir)) + self._do_run_tf_dbg_dump(command, 0) + + @staticmethod + def _make_pt_commands(tensor_name_path): + pt_command_list = [] + tensor_count = {} + with open(tensor_name_path) as tensor_name_file: + # skip 3 line + next(tensor_name_file) + next(tensor_name_file) + next(tensor_name_file) + # start to convert tensor to pt command + for line in tensor_name_file: + new_line = line.strip() + tensor_name = new_line[new_line.rfind(' ') + 1:] + if tensor_name not in tensor_count: + tensor_count[tensor_name] = 0 + else: + tensor_count[tensor_name] += 1 + + npy_file_name = "%s.%s.npy" % (tensor_name.replace("/", "_").replace(":", "."), + str(round(time.time() * 1000000))) + pt_command_list.append("pt %s -n %d -w %s" % + (tensor_name, tensor_count[tensor_name], + os.path.join(cfg.TF_DUMP_DIR, npy_file_name))) + return pt_command_list + + def _do_run_tf_dbg_dump(self, cmd_line, run_times=2): + """Run tf debug with pexpect, should set tf debug ui_type='readline'""" + try: + import pexpect + import readline + except ImportError as import_err: + self.log.error("Import failed with err:%s. You can run " + "'pip3 install pexpect gnureadline pyreadline' to fix it.", + import_err) + raise PrecisionToolException("Import module error.") + self.log.info("======< Auto run tf train process to dump data >======") + self.log.info("Send run times: %d", run_times) + tf_dbg = pexpect.spawn(cmd_line) + # tf_dbg.logfile = open(cfg.DUMP_FILES_CPU_LOG, 'wb') + tf_dbg.logfile = sys.stdout.buffer + for i in range(run_times): + tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT) + self.log.info("Process %d tf_debug.run", i + 1) + tf_dbg.sendline('run') + self.log.info("Generate tensor name file.") + tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT) + tf_dbg.sendline('lt > %s' % cfg.TF_TENSOR_NAMES) + tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT) + if not os.path.exists(cfg.TF_TENSOR_NAMES): + self.log.error("Failed to get tensor name in tf_debug.") + raise PrecisionToolException("Get tensor name in tf_debug failed.") + self.log.info("Save tensor name success. Generate tf dump commands from file: %s", cfg.TF_TENSOR_NAMES) + pt_commands = self._make_pt_commands(cfg.TF_TENSOR_NAMES) + self.log.info("Pt %d tensors." % len(pt_commands)) + for cmd in pt_commands: + self.log.debug(cmd.strip()) + tf_dbg.sendline(cmd.strip()) + tf_dbg.expect('tfdbg>', timeout=cfg.TF_DEBUG_TIMEOUT) + tf_dbg.sendline('exit') + self.log.info('Finish dump tf data') -- Gitee From 7402ffda3526e9ba678f5d32c2679a8cf9c55570 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:44:44 +0000 Subject: [PATCH 30/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/graph/desc.py | 154 +++++++++ .../precision_tool/lib/graph/graph_manager.py | 111 ++++++ .../precision_tool/lib/graph/npu_graph.py | 316 ++++++++++++++++++ .../precision_tool/lib/graph/op.py | 255 ++++++++++++++ .../precision_tool/lib/graph/tf_graph.py | 45 +++ 5 files changed, 881 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/desc.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/graph_manager.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/npu_graph.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/op.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/tf_graph.py diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/desc.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/desc.py new file mode 100644 index 000000000..5ac35dbfd --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/desc.py @@ -0,0 +1,154 @@ +# coding=utf-8 +from ..util.util import util + +ATTR = 'attr' +ATTR_KEY = 'key' +ATTR_VALUE = 'value' +DATA_DUMP_ORIGIN_OUTPUT_INDEX = '_datadump_origin_output_index' +FUSION_ORIGIN_OUTPUT_INDEX = '_fusion_origin_output_index' +DATA_DUMP_ORIGIN_NAME = '_datadump_origin_name' +ORIGIN_FORMAT = 'origin_format' +ORIGIN_SHAPE = 'origin_shape' +VALUE_RANGE = 'value_range' +SHAPE_RANGE = 'shape_range' +DT_STRING = 's' +DT_INT = 'i' +DT_LIST_LIST_INT = 'list_list_int' +DT_LIST_LIST_I = 'list_list_i' +DT_LIST = 'list' +DT_LIST_INT = 'list_i' +DATA_TYPE_DEFAULT_VALUE = { + 'i': 0, + 's': '' +} + + +class Desc(object): + """ Op desc + shape: data shape + dtype: data type + format: data format + npu_file: npu file name/path + cpu_file: cpu file name/path + idx: input idx + """ + def __init__(self, desc_json, index): + self.desc_json = desc_json + self.index = index + self.log = util.get_log() + + def idx(self): + return self.index + + def shape(self): + return self.desc_json['shape']['dim'] if 'shape' in self.desc_json else [] + + def dtype(self): + return self.desc_json['dtype'] if 'dtype' in self.desc_json else '' + + def format(self): + return self.desc_json['layout'] if 'layout' in self.desc_json else [] + + def origin_shape(self): + return self._get_attr_list(ORIGIN_SHAPE, DT_INT) + + def origin_format(self): + return self._get_attr(ORIGIN_FORMAT, DT_STRING) + + def value_range(self): + return self._get_attr_list_list(VALUE_RANGE, DT_LIST_INT) + + def shape_range(self): + return self._get_attr_list_list(SHAPE_RANGE, DT_LIST_INT) + + def _get_attr_list_list(self, key, data_type): + val = self._get_attr_base(key, DT_LIST_LIST_INT) + if val is None or DT_LIST_LIST_I not in val: + return [] + res = [] + for item in val[DT_LIST_LIST_I]: + if data_type in item: + res.append(item[data_type]) + return res + + def _get_attr_list(self, key, data_type): + val = self._get_attr_base(key, DT_LIST) + return val[data_type] if val is not None and data_type in val else [] + + def _get_attr(self, key, data_type): + val = self._get_attr_base(key, data_type) + return val if val is not None else DATA_TYPE_DEFAULT_VALUE[data_type] + + def _get_attr_base(self, key, data_type): + if ATTR in self.desc_json: + for attr in self.desc_json[ATTR]: + if attr[ATTR_KEY] == key: + if attr[ATTR_VALUE] is not None and data_type in attr[ATTR_VALUE]: + return attr[ATTR_VALUE][data_type] + return None + + def compare(self, right_desc): + if self.dtype() == right_desc.dtype() and self.format() == right_desc.format(): + return "[green][%d] [%s][%s] %s[/green]" % (self.idx(), self.dtype(), self.format(), self.shape()), True + else: + return "[yellow][%d] [%s][%s] %s | [%s][%s] %s[/yellow]" % ( + self.idx(), self.dtype(), self.format(), self.shape(), + right_desc.dtype(), right_desc.format(), right_desc.shape()), False + + def data_dump_origin_name(self): + return '' + + +class InputDesc(Desc): + def __init__(self, name, desc_json, index): + super(InputDesc, self).__init__(desc_json, index) + if name == '': + self.log.warning('invalid input name.') + name_info = name.split(':') + self.op_name = name + self.peer_index = -2 + if len(name_info) == 2: + self.op_name = name_info[0] + self.peer_index = int(name_info[1]) + + def name(self): + return self.op_name + + def peer_idx(self): + return self.peer_index + + def is_control(self): + return self.peer_index == -1 + + def summary(self, origin_txt=False): + """idx | dtype | format | shape | [blue]value_range | shape_range| origin_shape[/blue] | op_name | peer_idx""" + if origin_txt: + return "[%d][%s][%s]%s %s:%d" % (self.idx(), self.dtype(), self.format(), + self.shape(), self.name(), self.peer_idx()) + return "[green][%d][/green][yellow][%s][%s]%s[/yellow][blue] %s %s %s[/blue] %s:%d" % ( + self.idx(), self.dtype(), self.format(), self.shape(), + self.value_range(), self.shape_range(), self.origin_shape(), self.name(), self.peer_idx()) + + +class OutputDesc(Desc): + def __init__(self, name, desc_json, index): + super(OutputDesc, self).__init__(desc_json, index) + if name == '': + self.log.warning('invalid output name.') + self.op_names = name.split(':') + + def names(self): + return self.op_names + + def summary(self, origin_txt=False): + if origin_txt: + return "[%d][%s][%s]%s %s" % (self.idx(), self.dtype(), self.format(), self.shape(), self.names()) + return "[green][%d][/green][yellow][%s][%s]%s[/yellow][blue] %s %s %s[/blue] %s" % ( + self.idx(), self.dtype(), self.format(), self.shape(), + self.value_range(), self.shape_range(), self.origin_shape(), self.names()) + + def data_dump_origin_name(self): + return self._get_attr(DATA_DUMP_ORIGIN_NAME, DT_STRING) + + def data_dump_origin_output_index(self): + return self._get_attr(DATA_DUMP_ORIGIN_OUTPUT_INDEX, DT_INT) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/graph_manager.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/graph_manager.py new file mode 100644 index 000000000..cef861741 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/graph_manager.py @@ -0,0 +1,111 @@ +# coding=utf-8 +""" +Graph Manager +""" +import os +import collections +from ..util.constant import Constant +from .npu_graph import NpuGraph +from .tf_graph import TfGraph +from ..util.util import util +from ..util.precision_tool_exception import catch_tool_exception +from ..util.precision_tool_exception import PrecisionToolException +from ..config import config as cfg + + +class GraphManager(object): + def __init__(self): + self.log = util.get_log() + self.npu_graphs = collections.OrderedDict() + self.tf_graph = None + + def prepare(self): + # prepare npu graphs + if not os.path.exists(cfg.NPU_DIR): + util.create_dir(cfg.NPU_DIR) + sub_dirs = os.listdir(cfg.NPU_DIR) + if len(sub_dirs) == 0: + # create default dir + sub_dirs = [Constant.DEFAULT_DEBUG_ID] + for sub_dir in sub_dirs: + npu_graph = NpuGraph(sub_dir) + npu_graph.prepare() + self.npu_graphs[sub_dir] = npu_graph + # prepare cpu graph + self.tf_graph = TfGraph(cfg.TF_GRAPH_DIR) + + def check_cast(self): + for graph in self.npu_graphs.values(): + graph.check_cast() + + def check_dtype(self): + for graph in self.npu_graphs.values(): + graph.check_dtype() + + def check_similarity(self): + self._check_npu_graph_similarity() + + def _check_npu_graph_similarity(self): + """Check npu graph similarity""" + if len(self.npu_graphs) < 2: + self.log.debug("Only one npu debug, no need to check npu graph similarity.") + return + left_graphs = self.npu_graphs[Constant.DEFAULT_DEBUG_ID].sub_graphs + right_graphs = self.npu_graphs[Constant.NPU_DEBUG_ID_1].sub_graphs + for left_graph in left_graphs.values(): + for right_graph in right_graphs.values(): + if left_graph.graph_id != right_graph.graph_id: + continue + left_graph.compare(right_graph) + + def get_graphs(self, debug_id): + if debug_id not in self.npu_graphs: + raise PrecisionToolException("Get graphs failed with no debug_id:%s" % debug_id) + return self.npu_graphs[debug_id].build_json_files + + def get_ops(self, op_name, graph_name=None): + """ Get npu/tf ops by op_name + :param op_name: op name + :param graph_name: graph name + :return: npu op dict: debug_id->Op, tf op + """ + npu_ops = collections.OrderedDict() + for debug_id, npu_graph in self.npu_graphs.items(): + npu_ops[debug_id] = npu_graph.get_op(op_name, graph_name) + # tf graph op + return npu_ops, None + + def print_op_list(self, op_type='', op_name='', pass_name='', kernel_name=''): + if op_type == '' and op_name == '' and pass_name == '' and kernel_name == '': + table_list = [] + for debug_id, graph in self.npu_graphs.items(): + table = util.create_table(debug_id, ["OpType", "Count"]) + op_type_counter = collections.Counter() + for op in graph.ops_list: + op_type_counter[op.type()] += 1 + for op_types, count in op_type_counter.items(): + table.add_row(op_types, str(count)) + table_list.append(table) + util.render(util.create_columns(table_list)) + + else: + for debug_id, graph in self.npu_graphs.items(): + ops = graph.list_ops(op_type, op_name, pass_name, kernel_name) + ops_txt = ['[green][%s][/green][yellow][%s][/yellow] %s' % ( + op.type(), op.pass_name(), op.name()) for op in ops] + util.print_panel(Constant.NEW_LINE.join(ops_txt), debug_id) + + @staticmethod + def op_graph_summary(ops, attr_detail=False): + npu_summary = collections.OrderedDict() + for debug_id, op in ops.items(): + npu_summary[debug_id] = collections.OrderedDict() + for op_detail in op: + npu_summary[debug_id][op_detail.graph_name] = op_detail.summary(attr_detail=attr_detail) + return npu_summary, None + + def save_sub_graph(self, ops, deep): + for debug_id, op in ops.items(): + if debug_id in self.npu_graphs: + for op_detail in op: + self.npu_graphs[debug_id].save_sub_graph(op_detail, deep) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/npu_graph.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/npu_graph.py new file mode 100644 index 000000000..5a93b9718 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/npu_graph.py @@ -0,0 +1,316 @@ +# coding=utf-8 +""" +Graph Manager +""" +import json +import os +import collections +import time +from .op import Op +from ..util.util import util +from ..util.constant import Constant +from ..util.precision_tool_exception import catch_tool_exception +from ..util.precision_tool_exception import PrecisionToolException +from ..config import config as cfg + +DANGEROUS_CAST = { + 'DT_FLOAT': ['DT_INT32'] +} + +NO_DIG_OPS = ['AtomicAddrClean', 'NetOutput'] +CKPT_META_SHUFFIX='.meta' + +OP_CAST = 'Cast' + + +class NpuSubGraph(object): + def __init__(self, graph_json, build_file, npu_graph): + self.log = util.get_log() + self.graph_name = graph_json['name'] + self.npu_graph = npu_graph + self.graph = graph_json + self.build_file = build_file + self.ops_list = collections.OrderedDict() + self.ops_type_list = {} + self._prepare() + self.graph_id = self._get_graph_id() + + def _prepare(self): + self.log.debug("Graph %s operator count: %d" % (self.graph_name, len(self.graph['op']))) + for op_json in self.graph['op']: + op_name = op_json['name'] + op_type = op_json['type'] + if op_name not in self.ops_list: + self.ops_list[op_name] = [] + op = Op(op_json, self.ops_list, self.graph['name'], self.npu_graph, self) + if op_type not in self.ops_type_list: + self.ops_type_list[op_type] = {} + self.ops_list[op_name] = op + self.ops_type_list[op_type][op_name] = op + + def _get_graph_id(self): + if 'attr' in self.graph: + for item in self.graph['attr']: + if item['key'] == '_session_graph_id': + return item['value']['s'] + self.log.warning("Unknown sub graph id.") + return "UNKNOWN" + + def compare(self, sub_graph): + """compare with another sub graph""" + if not isinstance(sub_graph, NpuSubGraph): + raise PrecisionToolException("Should compare with another subgraph.") + right_ops_list = sub_graph.ops_list + ignore_ops = ["TransData", "Cast", "Recv", "Send", "Variable", "NetOutput", "NoOp", "Assign", "Constant", + "StreamActive"] + similar_count = 0 + for op_name in self.ops_list: + if self.ops_list[op_name].type() in ignore_ops: + continue + if op_name not in right_ops_list: + self.log.warning("Can not Find [%s] %s in right subgraph.", self.ops_list[op_name].type(), op_name) + continue + result, similar = self.ops_list[op_name].compare(right_ops_list[op_name]) + if not similar: + util.print_panel(result, title=op_name) + else: + similar_count += 1 + for op_name in right_ops_list: + if right_ops_list[op_name].type() in ignore_ops: + continue + if op_name not in self.ops_list: + self.log.warning("Can not Find [%s] %s in left subgraph.", right_ops_list[op_name].type(), op_name) + self.log.info("Compare [%s] [%s], similarity is [%s / %s]", + self.graph_name, sub_graph.graph_name, similar_count, len(self.ops_list)) + + def get_op(self, name): + if name in self.ops_list: + return [self.ops_list[name]] + guess_op_list = [] + for op_detail in self.ops_list.values(): + if name in op_detail.name() or name == str(op_detail.name()).replace('/', '_'): + guess_op_list.append(op_detail) + return guess_op_list + + def get_parent_node_by_subgraph_name(self, graph_name): + ops = [] + for op_detail in self.ops_list.values(): + if graph_name in op_detail.subgraph_names(): + ops.append(op_detail) + return ops + + def get_op_by_type(self, op_type): + ops = [] + for op_detail in self.ops_list.values(): + if op_type == op_detail.type(): + ops.append(op_detail) + return ops + + def check_cast(self): + cast_list = {} + danger_cast_list = {} + if OP_CAST in self.ops_type_list: + cast_ops = self.ops_type_list[OP_CAST] + for op in cast_ops.values(): + input_type = '' + output_type = '' + for input_desc in op.inputs(): + input_type = input_desc.dtype() if input_desc.dtype() != '' else input_type + for output_desc in op.outputs(): + output_type = output_desc.dtype() if output_desc.dtype() != '' else output_type + cast_type = "%s -> %s" % (input_type, output_type) + if cast_type not in cast_list: + cast_list[cast_type] = [] + cast_list[cast_type].append(op.name()) + for cast_type in cast_list: + if self._is_dangerous_cast(cast_type): + summary_txt = "[green][Cast][/green][red][%s][/red] %s" % (cast_type, cast_list[cast_type]) + util.print(summary_txt) + + @staticmethod + def _is_dangerous_cast(cast_type): + """Check if cast """ + cast_info = cast_type.split(" -> ") + input_dtype = cast_info[0] + output_dtype = cast_info[1] + if input_dtype in DANGEROUS_CAST: + if output_dtype in DANGEROUS_CAST[input_dtype]: + return True + return False + + +class NpuGraph(object): + def __init__(self, debug_id=Constant.DEFAULT_DEBUG_ID): + self.log = util.get_log() + self.build_files = None + self.build_json_files = [] + self.debug_id = debug_id + self.npu_root = os.path.join(cfg.NPU_DIR, debug_id) + self.graph_root = os.path.join(self.npu_root, Constant.GRAPH) + self.sub_graphs = collections.OrderedDict() + self.ops_list = [] + util.create_dir(self.graph_root) + + @catch_tool_exception + def prepare(self): + """prepare""" + self._prepare_npu_graphs() + if self.build_files is not None: + for build_file in self.build_files: + self._parse_ops(build_file) + + def check_cast(self): + """Check cast op type""" + for sub_graph in self.sub_graphs.values(): + sub_graph.check_cast() + + def check_dtype(self): + """Check op input/output dtype""" + for op in self.ops_list: + input_dtype = '' + for input_desc in op.inputs(): + input_dtype += ' ' + input_desc.dtype() + output_dtype = '' + for output_desc in op.outputs(): + output_dtype += ' ' + output_desc.dtype() + util.print('[green][%s][/green] %s\n - Input: %s\n - Output: %s' % ( + op.type(), op.name(), input_dtype, output_dtype)) + + def check_similarity(self): + """Check graph similarity.""" + + @catch_tool_exception + def save_sub_graph(self, op, deep=0, dump_manager=None, compare_manager=None): + """Save sub graph""" + if op is None: + raise PrecisionToolException("Save sub graph failed as root operator is None.") + try: + from graphviz import Digraph + file_name_list = [self.debug_id, op.graph_name, op.type(), op.name().replace('/', '_').replace('.', '_'), + str(deep), 'gv'] + file_name = '.'.join(file_name_list) + path = os.path.join(cfg.OP_GRAPH_DIR, file_name) + dot = Digraph(file_name, filename=path, node_attr={'shape': 'Mrecord'}, format='svg') + dot_list = [] + edge_list = [] + self._gen_sub_graph(dot, op, deep, dot_list, edge_list, 'red', direction='all', + dump_manager=dump_manager, compare_manager=compare_manager) + dot.format = 'svg' + dot.save(path) + self.log.info("Sub graph saved to %s" % os.path.abspath(cfg.OP_GRAPH_DIR)) + try: + dot.view(path) + time.sleep(1) + except Exception as err: + raise PrecisionToolException( + "graphviz not install, use [yum/apt-get] install graphviz xdg-utils. %s" % err) + except ImportError as err: + raise PrecisionToolException("Save sub graph failed as import graphviz module failed. %s" % err) + + def _gen_sub_graph(self, dot, op, deep, dot_list, edge_list, color='black', direction='all', + dump_manager=None, compare_manager=None): + """Gen sub graph""" + if deep == 0 or op.type() in NO_DIG_OPS: + return + if op.name() not in dot_list: + dot.node(op.name(), self._gen_sub_graph_label(op), color=color, tooltip=op.summary(True)) + dot_list.append(op.name()) + # add input and output + for desc in op.inputs(): + sub_op = self.get_op(desc.name(), op.graph_name) + if len(sub_op) != 0: + sub_op = sub_op[0] + if direction in ['all', 'input']: + self._gen_sub_graph(dot, sub_op, deep - 1, dot_list, edge_list, direction='input') + if sub_op.name() in dot_list: + src_edge = '%s:o%d' % (sub_op.name(), desc.peer_idx()) + else: + dot.node(sub_op.name(), self._gen_sub_graph_label(sub_op), color=color, tooltip=op.summary(True)) + src_edge = '%s:o%d' % (sub_op.name(), desc.peer_idx()) + dst_edge = '%s:i%d' % (op.name(), desc.idx()) + if src_edge + dst_edge not in edge_list: + dot.edge(src_edge, dst_edge) + edge_list.append(src_edge + dst_edge) + # add output + for desc in op.outputs(): + for out_node_name in desc.names(): + sub_op = self.get_op(out_node_name, op.graph_name) + if len(sub_op) != 0 and direction in ['all', 'output']: + sub_op = sub_op[0] + self._gen_sub_graph(dot, sub_op, deep - 1, dot_list, edge_list, direction='output') + + def _gen_sub_graph_label(self, op): + input_labels = [] + for desc in op.inputs(): + input_labels.append(self._gen_sub_graph_desc(desc, 'i')) + output_labels = [] + for desc in op.outputs(): + output_labels.append(self._gen_sub_graph_desc(desc, 'o')) + str_cell = '|' + return '{{ %s } | [%s] %s | { %s }}' % (str_cell.join(input_labels), op.type(), op.name(), + str_cell.join(output_labels)) + + @staticmethod + def _gen_sub_graph_desc(desc, id_prefix): + desc_str = r'<%s%d> [%d]' % (id_prefix, desc.idx(), desc.idx()) + desc_str = r'%s [%s]' % (desc_str, desc.dtype()) if desc.dtype() != '' else desc_str + desc_str = r'%s\n%s' % (desc_str, desc.shape()) if len(desc.shape()) != 0 else desc_str + return desc_str + + def list_ops(self, op_type='', op_name='', pass_name='', kernel_name=''): + """list ops in graph""" + return filter(lambda op: op_type in op.type() and op_name in op.name() and ( + pass_name == '' or pass_name in op.pass_name()) and kernel_name in op.kernel_name(), self.ops_list) + + def get_op(self, name, graph_name=None): + """get op by name""" + # get op in specific sub graph + if graph_name is not None and graph_name in self.sub_graphs: + return self.sub_graphs[graph_name].get_op(name) + ops = [] + for sub_graph in self.sub_graphs.values(): + ops.extend(sub_graph.get_op(name)) + # check if there is an exact match operation + match_ops = list(filter(lambda x: x.name() == name, ops)) + if len(match_ops) != 0: + return match_ops + # return guess operations by name + self.log.info("Can not find Operator named %s. You may mean the operator bellow.", name) + guess_op_name_list = ['[green][%s][/green] %s' % (x.type(), x.name()) for x in ops] + util.print_panel(Constant.NEW_LINE.join(guess_op_name_list), title='Possible Operators') + return ops + + def get_parent_node_by_subgraph_name(self, graph_name): + ops = [] + for sub_graph in self.sub_graphs.values(): + ops.extend(sub_graph.get_parent_node_by_subgraph_name(graph_name)) + return ops + + def _prepare_npu_graphs(self): + """prepare ge graphs """ + # move graphs to precision data dir + graph_files = util.list_ge_graph_files(self.graph_root) + self.build_files = sorted(filter(lambda x: x.graph_name == cfg.BUILD_JSON_GRAPH_NAME, graph_files.values()), + key=lambda x: x.graph_id) + if len(self.build_files) == 0: + self.log.warning("Can not find any build files in dir: %s", self.graph_root) + self.log.info("Find [%d] GE build files.", len(self.build_files)) + + @catch_tool_exception + def _parse_ops(self, build_file): + """Parse *_Build.txt.json to op objects.""" + build_file_json = build_file.path + '.json' + build_file_json = util.convert_proto_to_json(build_file.path, build_file_json) + if build_file_json is not None: + self.build_json_files.append(build_file_json) + with open(build_file_json, 'r') as f: + graph_json = json.load(f) + if 'graph' not in graph_json: + raise PrecisionToolException("No graph in file: %s" % build_file.file_name) + if len(graph_json['graph']) != 1: + self.log.warning("There are more then one graph in ge build file, find %d" % len(graph_json['graph'])) + # sub_graphs = [] + for graph in graph_json['graph']: + npu_sub_graph = NpuSubGraph(graph, build_file, self) + self.sub_graphs[graph['name']] = npu_sub_graph + self.ops_list.extend(npu_sub_graph.ops_list.values()) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/op.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/op.py new file mode 100644 index 000000000..2dbd5a31b --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/op.py @@ -0,0 +1,255 @@ +# coding=utf-8 +import json +import re +from typing import List +from .desc import InputDesc +from .desc import OutputDesc +from ..util.util import util +from ..util.constant import Constant +from ..util.precision_tool_exception import PrecisionToolException + +NO_INPUT_NODES = ['Data', 'AtomicAddrClean', 'Recv', 'Constant'] +NO_OUTPUT_NODES = ['Send', 'Recv', 'NetOutput', 'PartitionedCall'] + +JSON_KEY_NAME = 'name' +JSON_KEY_ID = 'id' +JSON_KEY_TYPE = 'type' +JSON_KEY_ATTR = 'attr' +JSON_KEY = 'key' +JSON_VALUE = 'value' +JSON_KEY_LIST = 'list' +JSON_KEY_STR = 's' +JSON_KEY_INT = 'i' +JSON_KEY_INPUT_I = 'input_i' +JSON_KEY_OUTPUT_I = 'output_i' +JSON_KEY_PASS_NAME = 'pass_name' +JSON_KEY_DATA_DUMP_ORIGINAL_OP_NAMES = '_datadump_original_op_names' +JSON_KEY_GE_ATTR_OP_KERNEL_LIB_NAME = "_ge_attr_op_kernel_lib_name" +JSON_KEY_PARENT_NODE_INDEX = "_parent_node_index" +JSON_KEY_SUBGRAPH_NAME = "subgraph_name" + +KERNEL_NAME_SHUFFIX = '_kernelname' + + +class Op(object): + """ Op class. + name: op name + type: op type + inputs: list of input descs + outputs: list of output descs + """ + def __init__(self, op_json, op_list, graph_name, npu_graph, sub_graph): + """Init""" + self.op_json = op_json + self.op_list = op_list + self.graph_name = graph_name + self.npu_graph = npu_graph + self.sub_graph = sub_graph + self.input_list = None + self.output_list = None + self.log = util.get_log() + + def name(self): + """Get op name""" + return self.op_json[JSON_KEY_NAME] + + def id(self): + """Get op id""" + return self.op_json[JSON_KEY_ID] if JSON_KEY_ID in self.op_json else '' + + def json(self): + return json.dumps(self.op_json, indent=2) + + def type(self): + """Get op type""" + return self.op_json[JSON_KEY_TYPE] + + def subgraph_names(self): + return self.op_json[JSON_KEY_SUBGRAPH_NAME] if JSON_KEY_SUBGRAPH_NAME in self.op_json else [] + + def inputs(self): + """Get the input list""" + if self.input_list is None: + self._parse_inputs() + if len(self.input_list) == 0 and self.type() == 'Data': + # Looking for Real Data + self._looking_for_real_inputs() + return self.input_list + + def input_addr(self): + return self.op_json[JSON_KEY_INPUT_I] + + def outputs(self): + """Get output list""" + if self.output_list is None: + self._parse_outputs() + if len(self.output_list) == 0 and self.type() == 'PartitionedCall': + self._looking_for_real_outputs() + return self.output_list + + def output_addr(self): + return self.op_json[JSON_KEY_OUTPUT_I] + + def pass_name(self): + return self._attr(JSON_KEY_PASS_NAME) + + def kernel_name(self): + return self._attr(self.name() + KERNEL_NAME_SHUFFIX) + + def ge_attr_op_kernel_lib_name(self): + return self._attr(JSON_KEY_GE_ATTR_OP_KERNEL_LIB_NAME) + + def data_dump_original_op_names(self): + return self._attr(JSON_KEY_DATA_DUMP_ORIGINAL_OP_NAMES) + + def parent_node_index(self): + return self._attr(JSON_KEY_PARENT_NODE_INDEX) + + def _attr(self, key): + if JSON_KEY_ATTR in self.op_json: + for attr in self.op_json[JSON_KEY_ATTR]: + if key == attr[JSON_KEY]: + if JSON_KEY_STR in attr[JSON_VALUE]: + return attr[JSON_VALUE][JSON_KEY_STR] + elif JSON_KEY_LIST in attr[JSON_VALUE]: + if JSON_KEY_STR in attr[JSON_VALUE][JSON_KEY_LIST]: + return attr[JSON_VALUE][JSON_KEY_LIST][JSON_KEY_STR] + elif JSON_KEY_INT in attr[JSON_VALUE]: + return attr[JSON_VALUE][JSON_KEY_INT] + else: + self.log.warning("Unknown attr format: %s", attr[JSON_VALUE]) + return '' + + def compare(self, right_op): + """Compare with another op""" + if not isinstance(right_op, Op): + raise PrecisionToolException("Should compare with another op.") + res_str = ['LeftOp(Type/Name) : [green][%s][/green] %s' % (self.type(), self.name()), + 'RightOp(Type/Name): [green][%s][/green] %s' % (right_op.type(), right_op.name())] + similar = True + if len(self.inputs()) != len(right_op.inputs()): + res_str.append("Input: [yellow]Input num mismatch.[/yellow]") + else: + res_str.append("Input:") + for left_input in self.inputs(): + for right_input in right_op.inputs(): + if left_input.idx() != right_input.idx(): + continue + txt, input_similar = left_input.compare(right_input) + res_str.append(' - ' + txt) + similar = similar and input_similar + if len(self.outputs()) != len(right_op.outputs()): + res_str.append("Output: [yellow]Output num mismatch.[/yellow]") + else: + res_str.append("Output:") + for left_output in self.outputs(): + for right_output in right_op.outputs(): + if left_output.idx() != right_output.idx(): + continue + txt, output_similar = left_output.compare(right_output) + res_str.append(' - ' + txt) + similar = similar and output_similar + return Constant.NEW_LINE.join(res_str), similar + + def _attr_detail(self): + """Gen attr details""" + res_str = [] + if JSON_KEY_ATTR in self.op_json: + res_str = [' ' + str(i) for i in self.op_json[JSON_KEY_ATTR]] + return Constant.NEW_LINE.join(res_str) + + def summary(self, origin_txt=False, attr_detail=False): + """Summary of current op""" + res_str = ['Op(Type/Name): [green][%s][/green] %s' % (self.type(), self.name()), + 'ID: [yellow]%s[/yellow]' % self.id(), + 'KernelName: [yellow]%s[/yellow]' % self.kernel_name(), + 'KernelLibName: [yellow]%s[/yellow]' % self.ge_attr_op_kernel_lib_name(), + 'GraphName: [yellow]%s[/yellow]' % self.graph_name] + pass_name = self.pass_name() + if pass_name != '': + res_str.append('PassName: [yellow]%s[/yellow]' % pass_name) + origin_op = self.data_dump_original_op_names() + if origin_op != '': + res_str.append('OriginalOp: %s' % origin_op) + if attr_detail: + res_str.append(self._attr_detail()) + res_str.append('InputAddr : [yellow]%s[/yellow]' % self.input_addr()) + res_str.append('OutputAddr: [yellow]%s[/yellow]' % self.output_addr()) + res_str.append('Input:%s' % InputDesc.summary.__doc__) + for i in self.inputs(): + res_str.append(' -' + i.summary(origin_txt)) + res_str.append('Output:') + for i in self.outputs(): + res_str.append(' -' + i.summary(origin_txt)) + return Constant.NEW_LINE.join(res_str) + + def _parse_inputs(self): + """ parse input desc in graph """ + self.input_list = [] + if 'input' not in self.op_json: + if self.type() not in NO_INPUT_NODES: + self.log.warning('Parse Op[%s][%s] inputs error.' % (self.type(), self.name())) + return self.input_list + desc_index = 0 + for i in range(len(self.op_json['input'])): + name = self.op_json['input'][i] + if name == '': + # if self.type() not in NO_INPUT_NODES: + # self.log.warning('invalid input name.') + continue + name_info = name.split(':') + if len(name_info) == 2 and int(name_info[1]) == -1: + # control edge + self.input_list.append(InputDesc(name, [], i)) + else: + self.input_list.append(InputDesc(name, self.op_json['input_desc'][desc_index], i)) + desc_index += 1 + self.input_list.sort(key=lambda x: x.index) + return self.input_list + + def _parse_outputs(self): + """ parse output desc in graph """ + self.output_list = [] + if 'dst_index' not in self.op_json: + if self.type() not in NO_OUTPUT_NODES: + self.log.warning('Parse Op[%s][%s] outputs error.' % (self.type(), self.name())) + return self.output_list + desc_index = 0 + for i in range(len(self.op_json['dst_index'])): + dst_name = self.op_json['dst_name'][i] + if self.op_json['dst_index'][i] == -1: + # control edge + self.output_list.append(OutputDesc(dst_name, [], -1)) + else: + self.output_list.append(OutputDesc(dst_name, self.op_json['output_desc'][desc_index], desc_index)) + desc_index += 1 + self.output_list.sort(key=lambda x: x.index) + return self.output_list + + def _looking_for_real_inputs(self): + """Find real inputs of subgraph data node.""" + graph_name = self.graph_name + parent_node_idx = self.parent_node_index() + parent_nodes = self.npu_graph.get_parent_node_by_subgraph_name(graph_name) + self.log.debug("Find %s parent nodes." % len(parent_nodes)) + for parent_node in parent_nodes: + inputs = parent_node.inputs() + if len(inputs) <= parent_node_idx: + self.log.warning("Parent node has %d inputs, bug need index %d" % (len(inputs), parent_node_idx)) + continue + self.input_list.append(inputs[parent_node_idx]) + + def _looking_for_real_outputs(self): + """Find real outputs of PartitionedCall Node""" + subgraph_names = self.subgraph_names() + for subgraph_name in subgraph_names: + net_output_with_subgraph_name = subgraph_name + '_Node_Output' + net_output_nodes = self.npu_graph.get_op(net_output_with_subgraph_name) + self.log.debug("Find %s net output nodes, just need one." % len(net_output_nodes)) + self.log.info("Note: PartitionCall output nodes is the node connect to PartitionCall from inside.") + for output_node in net_output_nodes: + self.output_list = output_node.inputs() + + + + diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/tf_graph.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/tf_graph.py new file mode 100644 index 000000000..acf8c8920 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/graph/tf_graph.py @@ -0,0 +1,45 @@ +# coding=utf-8 +import collections +import logging +import os +from ..util.util import util +from ..util.precision_tool_exception import catch_tool_exception +from ..util.precision_tool_exception import PrecisionToolException +from ..config import config as cfg + +CKPT_META_SHUFFIX='.meta' + + +class TfGraph(object): + def __init__(self, graph_root=cfg.TF_GRAPH_DIR): + """""" + self.graph_root = graph_root + self.log = util.get_log() + self.op_list = collections.OrderedDict() + + @catch_tool_exception + def get_op_list(self, ckpt_path=None): + if self.op_list is None: + self._convert_ckpt_to_graph(ckpt_path) + return self.op_list + + def _convert_ckpt_to_graph(self, ckpt_path): + log_level = self.log.level + try: + self.log.setLevel('ERROR') + import tensorflow as tf + self.log.setLevel(log_level) + except ImportError as err: + self.log.setLevel(log_level) + raise PrecisionToolException("Import tensorflow failed.") + meta_files = util.list_cpu_graph_files(ckpt_path) + if len(meta_files) == 0: + raise PrecisionToolException("Can not find any ckpt meta files.") + file_list = sorted(meta_files.values(), key=lambda x: x['timestamp']) + ckpt_file = file_list[-1] + self.log.info("Find %d tf ckpt meta files, choose [%s]" % (len(meta_files), ckpt_file['file_name'])) + self.op_list = collections.OrderedDict() + saver = tf.train.import_meta_graph(ckpt_file['path'], clear_devices=True) + graph = tf.get_default_graph() + for op in graph.get_operations(): + self.op_list[op.name] = op -- Gitee From 5e2aeaea3b64d9f24789e29fda0ed9547e15e691 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:46:17 +0000 Subject: [PATCH 31/38] 1 Signed-off-by: huangju1993 --- .../lib/train/train_analysis.py | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/train/train_analysis.py diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/train/train_analysis.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/train/train_analysis.py new file mode 100644 index 000000000..b7547d677 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/train/train_analysis.py @@ -0,0 +1,112 @@ +# coding=utf-8 +import os +import numpy as np +from ..adapter.tf_adapter import TfAdapter +from ..dump.tf_dump import TfDump +from ..util.util import util +from ..config import config as cfg +from ..util.precision_tool_exception import PrecisionToolException + + +class TrainAnalysis(object): + def __init__(self): + self.log = util.get_log() + self.tf_adapter = TfAdapter() + + @staticmethod + def gen_feed_file_name(name): + file_name = str(name).replace(':', '_').replace('/', '_') + '.npy' + return os.path.join(cfg.TF_CKPT_INPUT_DIR, file_name) + + def _init_session(self, device='npu', action='dump'): + """""" + import tensorflow as tf + if device == 'npu': + # util.execute_command('source %s', cfg.ASCEND_SET_ENV) + return tf.Session(config=self.tf_adapter.session_dump_config(None, action=action)) + sess = tf.Session(config=tf.ConfigProto()) + return self.tf_adapter.sess_dump(sess) + + def _reset_dropout_rate(self, graph): + import tensorflow as tf + for op in graph.get_operations(): + if 'dropout' in op.name and 'rate' in op.name: + self.log.debug("Find dropout rate node [%s][%s]" % (op.type, op.name)) + # tensor = graph.get_tensor_by_name(op.name) + if op.type != 'Const': + self.log.warning("Drop out op [%s] is not Const, skip reset rate. May cause difference.") + continue + op._set_attr('value', tf.AttrValue(tensor=tf.make_tensor_proto(0.0, tf.float32))) + self.log.debug("Set op: %s" % str(op)) + + def _prepare_graph(self, graph): + graph.seed = cfg.DUMP_SEED + self._reset_dropout_rate(graph) + return graph + + def _load_train_graph(self, sess): + import tensorflow as tf + if util.empty_dir(cfg.TF_CKPT_ROOT): + raise PrecisionToolException('checkpoint dir [%s] is empty, can not run train analysis process.' % + cfg.TF_CKPT_ROOT) + checkpoint = tf.train.latest_checkpoint(cfg.TF_CKPT_ROOT) + if checkpoint is None: + raise PrecisionToolException('Load ckpt failed from [%s].' % cfg.TF_CKPT_ROOT) + saver = tf.train.import_meta_graph(checkpoint + '.meta') + self._prepare_graph(tf.get_default_graph()) + saver.restore(sess, checkpoint) + return tf.get_default_graph() + + @staticmethod + def _get_input_from_graph(graph): + input_nodes = [] + tensor_index = {} + for op in graph.get_operations(): + if 'Placeholder' == op.type: + if op.name in tensor_index: + tensor_index[op.name] += 1 + else: + tensor_index[op.name] = 0 + node = graph.get_tensor_by_name(op.name + ':' + str(tensor_index[op.name])) + input_nodes.append(node) + return input_nodes + + def _get_input_tensors(self, input_nodes): + feed_map = {} + for node in input_nodes: + file_name = self.gen_feed_file_name(node.name) + if os.path.isfile(file_name): + feed_map[node] = np.load(file_name) + else: + # TD data type + feed_map[node] = np.random.random(node.shape) + return feed_map + + def _build_feed_map(self, graph): + input_nodes = self._get_input_from_graph(graph) + return self._get_input_tensors(input_nodes) + + def _analysis(self, device, action='dump'): + import tensorflow as tf + if device == 'npu': + import npu_bridge.npu_init + sess = self._init_session(device, action=action) + graph = self._load_train_graph(sess) + train_op = tf.get_collection(tf.GraphKeys.TRAIN_OP) + feed_map = self._build_feed_map(graph) + sess.run(train_op, feed_dict=feed_map) + if device == 'cpu': + tf_dump = TfDump() + tf_dump.run_tf_dbg_dump() + + def run(self, device='all', action='dump'): + """ + :param device: all | npu | cpu + :param action: dump | overflow | fusion_switch | fusion_off + :return: + """ + if device == 'all': + self._analysis('cpu', action) + self._analysis('npu', action) + else: + self._analysis(device, action) -- Gitee From 91a77b9e32683bec9f82027a0e0e8bd88449c94d Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:46:32 +0000 Subject: [PATCH 32/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/util/constant.py | 20 + .../precision_tool/lib/util/file_desc.py | 38 ++ .../precision_tool/lib/util/h5_util.py | 190 +++++++ .../precision_tool/lib/util/onnx_builder.py | 0 .../lib/util/precision_tool_exception.py | 24 + .../precision_tool/lib/util/tool_object.py | 10 + .../precision_tool/lib/util/util.py | 536 ++++++++++++++++++ 7 files changed, 818 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/constant.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/file_desc.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/h5_util.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/onnx_builder.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/precision_tool_exception.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/tool_object.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/util.py diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/constant.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/constant.py new file mode 100644 index 000000000..8106bb8d4 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/constant.py @@ -0,0 +1,20 @@ +# coding=utf-8 + + +class Constant(object): + VERSION = "0.1.11" + NEW_LINE = "\n" + TAB_LINE = "\t" + DEFAULT_DEBUG_ID = "debug_0" + NPU_DEBUG_ID_1 = "debug_1" + GRAPH = "graph" + DUMP = "dump" + + class Suffix(object): + JSON = '.json' + CSV = '.csv' + H5 = '.h5' + OM = '.om' + + class Pattern(object): + GE_PROTO_GRAPH_PATTERN = r'^ge_proto_([0-9]+)_(graph_[0-9]+_)*([A-Za-z0-9_-]+)\.txt$' diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/file_desc.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/file_desc.py new file mode 100644 index 000000000..0773632b1 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/file_desc.py @@ -0,0 +1,38 @@ +# coding=utf-8 +import os + + +class FileDesc(object): + def __init__(self, file_name, dir_path, timestamp=-1): + self.file_name = file_name + self.dir_path = dir_path + self.path = os.path.join(dir_path, file_name) + self.timestamp = timestamp + self.idx = 0 + if self.timestamp == -1: + self.timestamp = os.path.getmtime(self.path) + + +class BuildGraphFileDesc(FileDesc): + def __init__(self, file_name, dir_path, timestamp, graph_id, graph_name): + super(BuildGraphFileDesc, self).__init__(file_name, dir_path, timestamp) + self.graph_id = graph_id + self.graph_name = graph_name + + +class NpuDumpFileDesc(FileDesc): + def __init__(self, file_name, dir_path, timestamp, op_name, op_type, task_id, stream_id=0): + super(NpuDumpFileDesc, self).__init__(file_name, dir_path, timestamp) + self.op_name = op_name + self.op_type = op_type + self.task_id = task_id + stream_id = 0 if stream_id is None else int(stream_id) + self.stream_id = stream_id + self.idx = dir_path.split(os.sep)[-1] + + +class DumpDecodeFileDesc(NpuDumpFileDesc): + def __init__(self, file_name, dir_path, timestamp, op_name, op_type, task_id, anchor_type, anchor_idx): + super(DumpDecodeFileDesc, self).__init__(file_name, dir_path, timestamp, op_name, op_type, task_id) + self.type = anchor_type + self.idx = anchor_idx diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/h5_util.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/h5_util.py new file mode 100644 index 000000000..1b294ab97 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/h5_util.py @@ -0,0 +1,190 @@ +import collections +import os +import numpy as np + +try: + import h5py +except ImportError as import_err: + h5py = None + print("Failed to import h5py. some function may disable. Run 'pip3 install h5py' to fix it.", + import_err) + +from ..util.util import util +from ..util.constant import Constant +from ..config import config as cfg + + +class IdxType(object): + # /batch_norm/88/input/xxx + OP_TYPE = 'OP_TYPE' + OP_NAME = 'OP_NAME' + OP_ANC = 'OP_ANC' + + +H5_NAME_IDX = [IdxType.OP_TYPE, IdxType.OP_NAME, IdxType.OP_ANC] + + +def gen_h5_data_name(name, prefix='npu'): + return "%s_h5%s.npy" % (prefix, name.replace('/', '_')) + + +class H5Data(object): + def __init__(self, data, prefix='npu'): + self.data = data + self.prefix = prefix + self.name = gen_h5_data_name(self.data.name, self.prefix) + + def np_data(self): + np_data = np.array(self.data) + self._save(np_data) + return np_data + + def _save(self, data): + path = os.path.join(cfg.PT_DUMP_DECODE_DIR, self.name) + np.save(path, data) + + +class H5Op(object): + def __init__(self, name, h5_node, prefix='npu'): + self.log = util.get_log() + self.name = name + self.prefix = prefix + self.h5_node = h5_node + self.inputs = {} + self.outputs = {} + self.group = { + 'grads': {}, + 'tensors': {}, + 'grad_inputs': {}, + 'result': {} + } + ''' + self.input_grad = {} + self.output_grad = {} + self.input_tensor = {} + self.output_tensor = {} + ''' + self._prepare() + + def summary(self): + summary_txt = [] + summary_txt.extend(self._gen_txt(self.inputs, '-Input:')) + summary_txt.extend(self._gen_txt(self.outputs, '-Output:')) + summary_txt.extend(self._gen_txt(self.group['grads'], 'Grads:')) + summary_txt.extend(self._gen_txt(self.group['tensors'], '-Tensors:')) + summary_txt.extend(self._gen_txt(self.group['grad_inputs'], '-GradInputs:')) + summary_txt.extend(self._gen_txt(self.group['result'], '-Result:')) + return Constant.NEW_LINE.join(summary_txt) + + @staticmethod + def _gen_txt(h5_data, name): + if len(h5_data) == 0: + return [] + txt = [name] + for idx, data in enumerate(h5_data.values()): + txt.append(' └─[green][%s][/green] %s' % (idx, data.name)) + txt.append(' └─ [yellow]%s[/yellow]' % util.gen_npy_info_txt(data.np_data())) + return txt + + def _parse_group(self, node): + sub_node_type = node.name.split('/')[-1] + if sub_node_type in self.group.keys(): + for item in node: + sub_node = node[item] + if isinstance(sub_node, h5py.Dataset): + self.group[sub_node_type][item] = H5Data(sub_node, self.prefix) + else: + self.log.warning("Unknown sub node: %s" % sub_node) + else: + self.log.warning("Unknown sub node type: %s(%s)" % (sub_node_type, node)) + + def _prepare_input_output(self, node, desc_type): + for desc_name in node: + sub_node = node[desc_name] + if isinstance(sub_node, h5py.Group): + self._parse_group(sub_node) + elif isinstance(sub_node, h5py.Dataset): + update_dict = self.inputs if desc_type == 'input' else self.outputs + update_dict[desc_name] = H5Data(sub_node, self.prefix) + else: + self.log.warning("Unknown type: %s(%s)" % (type(sub_node), sub_node)) + + def _prepare(self): + for desc_type in self.h5_node: + if desc_type in ['input', 'output']: + self._prepare_input_output(self.h5_node[desc_type], desc_type) + else: + self.log.warning("Unknown desc type: %s(%s)" % (desc_type, self.h5_node)) + + +class H5Util(object): + def __init__(self, file_name, prefix): + self.log = util.get_log() + self.file_name = file_name + self.prefix = prefix + self.h5 = None + self.ops = collections.OrderedDict() + self._prepare() + + def __del__(self): + if self.h5 is not None: + self.h5.close() + + def get_op(self, op_id): + if op_id in self.ops: + return self.ops[op_id] + self.log.warning("Can not find any h5 op id: %s" % op_id) + return None + + def get_tensor_by_name(self, tensor_name): + if self.h5 is None: + self.log.warning("h5 file is None.") + return None + if tensor_name in self.h5: + return np.array(self.h5[tensor_name]) + return None + + def print_tensor(self, tensor_name): + tensor = self.get_tensor_by_name(tensor_name) + if tensor is None: + self.log.warning("Tensor:%s not exist." % tensor_name) + return + file_path = self._dump_numpy(tensor_name, tensor) + util.print_npy_summary(os.path.dirname(file_path), os.path.basename(file_path)) + + def _prepare(self): + if not os.path.isfile(self.file_name) or not str(self.file_name).endswith(Constant.Suffix.H5): + self.log.error("File [%s] not exist or not a h5 file" % self.file_name) + if h5py is None: + self.log.warning("Can not find python module h5py.") + self.h5 = h5py.File(self.file_name, 'r') + self._list_tensors(self.h5) + + def _list_tensors(self, h5, idx=0, name=''): + for item in h5: + item_name = name + '/' + item + if idx == 1: + self.ops[str(item)] = H5Op(item_name, h5[item_name], self.prefix) + continue + self._list_tensors(h5[item], idx+1, item_name) + + def _list_tensors_loop(self, h5, idx=0, name=''): + for item in h5: + if isinstance(h5[item], h5py.Group): + item_name = name + '/' + item + print(item_name) + # check + if H5_NAME_IDX[idx] == IdxType.OP_NAME and item_name not in self.ops: + self.ops[item_name] = H5Op(item) + if H5_NAME_IDX[idx] == IdxType.OP_ANC: + self.ops[item_name] = H5Op(item) + self._list_tensors(h5[item], idx + 1, item_name) + + def _dump_numpy(self, tensor_name, tensor): + if not os.path.exists(cfg.PT_DUMP_DECODE_DIR): + util.create_dir(cfg.PT_DUMP_DECODE_DIR) + file_name = tensor_name.replace('/', '_').strip('_') + '.npy' + file_path = os.path.join(cfg.PT_DUMP_DECODE_DIR, file_name) + self.log("Dump file: %s" % file_path) + np.save(file_path, tensor) + return file_path diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/onnx_builder.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/onnx_builder.py new file mode 100644 index 000000000..e69de29bb diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/precision_tool_exception.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/precision_tool_exception.py new file mode 100644 index 000000000..02084770f --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/precision_tool_exception.py @@ -0,0 +1,24 @@ +# coding=utf-8 +import logging + + +class PrecisionToolException(Exception): + """ + Class for PrecisionTool Exception + """ + def __init__(self, error_info): + super(PrecisionToolException, self).__init__() + self.error_info = error_info + + +def catch_tool_exception(fuc): + def handle(*args, **kwargs): + log = logging.getLogger() + try: + return fuc(*args, **kwargs) + except PrecisionToolException as pte: + log.warning(pte.error_info) + except SystemExit: + # do not exit + log.debug("Exit") + return handle diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/tool_object.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/tool_object.py new file mode 100644 index 000000000..7412b6cee --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/tool_object.py @@ -0,0 +1,10 @@ +# coding=utf-8 + + +class ToolObject(object): + _instance = None + + def __new__(cls, *args, **kwargs): + if not cls._instance: + cls._instance = super(ToolObject, cls).__new__(cls, *args, **kwargs) + return cls._instance diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/util.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/util.py new file mode 100644 index 000000000..88fbe0b00 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/util/util.py @@ -0,0 +1,536 @@ +# coding=utf-8 +import csv +import re +import sys +import os +import shutil +import numpy as np +import logging +import subprocess +from .constant import Constant +from .precision_tool_exception import PrecisionToolException +from .precision_tool_exception import catch_tool_exception +from .file_desc import * +from ..config import config as cfg + +try: + from rich.traceback import install + from rich.panel import Panel + from rich.table import Table + from rich import print as rich_print + from rich.columns import Columns + install() +except ImportError as import_err: + install = None + Panel = None + Table = None + Columns = None + rich_print = print + print("Failed to import rich. some function may disable. Run 'pip3 install rich' to fix it.", + import_err) + +try: + import readline + readline.parse_and_bind('tab: complete') +except ImportError as import_error: + print("Unable to import module: readline. Run 'pip3 install gnureadline pyreadline' to fix it.") + +# patterns +OFFLINE_DUMP_PATTERN = r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)\.?([0-9]+)?\.([0-9]{1,255})[.csv]?" +OFFLINE_DUMP_DECODE_PATTERN = \ + r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})\.?[0-9]?[\.0-9]+?" \ + r"\.([a-z]+)\.([0-9]{1,255})\.npy$" +OFFLINE_DUMP_CONVERT_PATTERN = \ + r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})" \ + r"\.([a-z]+)\.([0-9]{1,255})(\.[x0-9]+)?\.npy$" +OFFLINE_FILE_NAME = 'op_type.op_name.task_id(.stream_id).timestamp' +OP_DEBUG_NAME = 'OpDebug.Node_OpDebug.taskid.timestamp' +CPU_DUMP_DECODE_PATTERN = r"^([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})\.npy$" +CPU_FILE_DECODE_NAME = 'op_name.0(.0).timestamp.npy' +OP_DEBUG_PATTERN = r"Opdebug\.Node_OpDebug\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})" +OP_DEBUG_DECODE_PATTERN = r"Opdebug\.Node_OpDebug\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})[\.0-9]*\.([a-z]+)\.([0-9]{1,255})\.json" +VECTOR_COMPARE_RESULT_PATTERN = r"result_([0-9]{1,255})\.csv" +TIMESTAMP_DIR_PATTERN = '[0-9]{1,255}' +NUMPY_PATTERN = r".*\.npy$" +H5_PATTERN = r".*\.h5$" +CSV_SHUFFIX = '.csv' +NUMPY_SHUFFIX = '.npy' +CKPT_META_SHUFFIX = r".*.meta$" +MAPPING_CSV = "mapping.csv" + + +class Util(object): + def __init__(self): + self.atc = None + self.ms_accu_cmp = None + logging.basicConfig(level=cfg.LOG_LEVEL, format="%(asctime)s (%(process)d) -[%(levelname)s]%(message)s", + datefmt="%Y-%m-%d %H:%M:%S") + self.log = logging.getLogger() + self.python = sys.executable + + def get_log(self): + return self.log + + def execute_command(self, cmd: str): + """ Execute shell command + :param cmd: command + :return: status code + """ + if cmd is None: + self.log.error("Command is None.") + return -1 + self.log.debug("[Run CMD]: %s", cmd) + complete_process = subprocess.run(cmd, shell=True) + return complete_process.returncode + + @staticmethod + def empty_dir(dir_path: str) -> bool: + """ Check if target dir is empty + :param dir_path: target dir + :return: bool + """ + if not os.path.exists(dir_path): + return True + if len(os.listdir(dir_path)) == 0: + return True + return False + + def convert_proto_to_json(self, src_file, dst_file): + """Convert GE proto graphs to json format. + command: atc --mode=5 --om=ge_proto_Build.txt --json=xxx.json + :param src_file: proto file + :param dst_file: output json file + :return: result json file + """ + if not os.path.exists(src_file): + raise PrecisionToolException("Source proto file %s not exist." % src_file) + # src_file = os.path.join(cfg.GRAPH_DIR_ALL, proto_file) + # json_file = proto_file + '.json' + # dst_file = os.path.join(cfg.GRAPH_DIR_BUILD, json_file) + if os.path.exists(dst_file) and os.path.getmtime(dst_file) > os.path.getmtime(src_file): + self.log.debug("GE graph build json already exist.") + return dst_file + cmd = '%s --mode=5 --om=%s --json=%s' % (self._get_atc(), src_file, dst_file) + self.execute_command(cmd) + if not os.path.isfile(dst_file): + raise PrecisionToolException("Convert GE build graph to json failed. can not find any json file.") + self.log.info('Finish convert [%s] build graph from proto to json format.', src_file) + return dst_file + + def convert_dump_to_npy(self, src_file, dst_path, data_format=None): + """Convert npu dump files to npy format. + :param src_file: src file + :param dst_path: dst path + :param data_format: target data format + :return: status code + """ + self.create_dir(dst_path) + format_cmd = '' if data_format is None else '-f %s' % data_format + cmd = '%s %s convert -d %s -out %s %s' % (self.python, self._get_ms_accu_cmp(), src_file, dst_path, format_cmd) + return self.execute_command(cmd) + + def compare_vector(self, npu_dump_dir, cpu_dump_dir, graph_json, result_path): + """Run compare vector command. + :param npu_dump_dir: npu dump data dir + :param cpu_dump_dir: cpu dump data dir + :param graph_json: graph json + :param result_path: result path + :return: status code + """ + self.create_dir(result_path) + if graph_json is None: + cmd = '%s %s compare -m %s -g %s -out %s' % ( + self.python, self._get_ms_accu_cmp(), npu_dump_dir, cpu_dump_dir, result_path) + else: + cmd = '%s %s compare -m %s -g %s -f %s -out %s' % ( + self.python, self._get_ms_accu_cmp(), npu_dump_dir, cpu_dump_dir, graph_json, result_path) + return self.execute_command(cmd) + + def list_dump_files(self, path, sub_path=''): + """List npu dump files in npu dump dir. + default only list the newest sub dir ordered by timestamp. set sub_path to specific other sub_path + :param path: dump path + :param sub_path: sub dir + :return: dump_files, parent_dirs + """ + parent_dirs = {} + dump_files = {} + newest_sub_path = self.get_newest_dir(path) if sub_path == '' else sub_path + dump_pattern = re.compile(OFFLINE_DUMP_PATTERN) + for dir_path, dir_names, file_names in os.walk(os.path.join(path, newest_sub_path), followlinks=True): + for name in file_names: + dump_match = dump_pattern.match(name) + if dump_match is None: + continue + dump_files[name] = self._gen_dump_file_info(name, dump_match, dir_path) + if dir_path not in parent_dirs: + parent_dirs[dir_path] = {} + parent_dirs[dir_path][name] = dump_files[name] + return dump_files, parent_dirs + + def parse_mapping_csv(self, path, pattern, extern_pattern=''): + """parse mapping csv in dump path""" + dump_files = {} + re_pattern = re.compile(pattern) + for dir_path, dir_names, file_names in os.walk(path, followlinks=True): + if MAPPING_CSV not in file_names: + continue + mapping = self.read_csv(os.path.join(dir_path, MAPPING_CSV)) + for item in mapping: + src_file = os.path.abspath(os.path.join(dir_path, item[0])) + if not os.path.isfile(src_file): + self.log.warning("Can not find file %s in mapping.csv, dir: %s.", item[0], dir_path) + continue + match = re_pattern.match(item[1]) + if match is None: + self.log.warning("file name [%s] in mapping.csv is invalid.", item[1]) + continue + file_desc = self._gen_dump_file_info(item[0], match, dir_path) + dst_file_name = '.'.join([file_desc.op_type, file_desc.file_name, str(file_desc.task_id), + str(file_desc.stream_id), str(file_desc.timestamp)]) + if item[1].endswith(Constant.Suffix.CSV): + dst_file_name += '.csv' + dst_file = os.path.abspath(os.path.join(dir_path, dst_file_name)) + if not os.path.islink(src_file): + os.rename(src_file, dst_file) + os.symlink(dst_file, src_file) + file_desc.path = dst_file + file_desc.file_name = dst_file_name + dump_files[item[1]] = file_desc + return dump_files + + def list_npu_dump_files(self, path, extern_pattern=''): + npu_dump_files = self._list_file_with_pattern(path, OFFLINE_DUMP_PATTERN, extern_pattern, + self._gen_dump_file_info) + npu_dump_files.update(self.parse_mapping_csv(path, OFFLINE_DUMP_PATTERN, extern_pattern)) + return npu_dump_files + + def list_ge_graph_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, Constant.Pattern.GE_PROTO_GRAPH_PATTERN, extern_pattern, + self._gen_build_graph_file_info) + + def list_npu_dump_decode_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, OFFLINE_DUMP_DECODE_PATTERN, extern_pattern, + self._gen_npu_dump_decode_file_info) + + def list_debug_decode_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, OP_DEBUG_DECODE_PATTERN, extern_pattern, + self._gen_overflow_debug_decode_file_info) + + def list_cpu_dump_decode_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, CPU_DUMP_DECODE_PATTERN, extern_pattern, + self._gen_cpu_dump_decode_file_info) + + def list_cpu_graph_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, CKPT_META_SHUFFIX, extern_pattern, + self._gen_cpu_graph_files_info) + + def list_vector_compare_result_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, VECTOR_COMPARE_RESULT_PATTERN, extern_pattern, + self._gen_vector_compare_result_file_info) + + def list_npu_dump_convert_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, OFFLINE_DUMP_CONVERT_PATTERN, extern_pattern, + self._gen_npu_dump_convert_file_info) + + def list_numpy_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, NUMPY_PATTERN, extern_pattern, + self._gen_numpy_file_info) + + def list_h5_files(self, path, extern_pattern=''): + return self._list_file_with_pattern(path, H5_PATTERN, extern_pattern, + self._gen_file_info) + + def create_dir(self, path): + """Create dir if not exist + :param path: path + :return: bool + """ + if os.path.exists(path): + return True + try: + os.makedirs(path, mode=0o700) + except OSError as err: + self.log.error("Failed to create %s. %s", path, str(err)) + return False + return True + + def clear_dir(self, path: str, pattern=''): + """Clear dir with pattern (file/path name match pattern will be removed) + :param path: path + :param pattern: pattern + :return: None + """ + if not os.path.exists(path): + return + try: + for f in os.listdir(path): + if not re.match(pattern, f): + continue + file_path = os.path.join(path, f) + if os.path.isfile(file_path): + os.remove(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except OSError as err: + self.log.error("Failed to remove %s. %s", path, str(err)) + + @staticmethod + def npy_info(source_data): + """Get npy information + :param source_data: npy path + :return: (shape, dtype) + """ + if isinstance(source_data, str): + if not str(source_data).endswith(NUMPY_SHUFFIX): + raise PrecisionToolException("Npy file [%s] is invalid" % source_data) + data = np.load(source_data, allow_pickle=True) + elif isinstance(source_data, np.ndarray): + data = source_data + else: + raise PrecisionToolException("Invalid source data:%s" % source_data) + if data.dtype == 'object': + raise PrecisionToolException("Invalid source data, data is object.") + if np.size(data) == 0: + raise PrecisionToolException("Empty source data:%s" % source_data) + return data.shape, data.dtype, data.max(), data.min(), data.mean() + + @catch_tool_exception + def gen_npy_info_txt(self, source_data): + """ Generate numpy info txt. + :param source_data: source path or np.ndarray + :return: txt + """ + try: + shape, dtype, max_data, min_data, mean = self.npy_info(source_data) + return '[Shape: %s] [Dtype: %s] [Max: %s] [Min: %s] [Mean: %s]' % (shape, dtype, max_data, min_data, mean) + except PrecisionToolException: + return '' + + def print_npy_summary(self, path, file_name, is_convert=False, extern_content=''): + """Print summary of npy data + :param path: file path + :param file_name: file name + :param is_convert: if convert to txt file + :param extern_content: extern content append to the summary + :return: None + """ + target_file = os.path.join(path, file_name) + if not os.path.exists(target_file): + raise PrecisionToolException("File [%s] not exist" % target_file) + data = np.load(target_file, allow_pickle=True) + table = self.create_table('', ['Index', 'Data']) + flatten_data = data.flatten() + for i in range(min(16, int(np.ceil(flatten_data.size / 8)))): + last_idx = min(flatten_data.size, i*8+8) + table.add_row(str(i * 8), ' '.join(flatten_data[i*8: last_idx].astype('str').tolist())) + summary = ['[yellow]%s[/yellow]' % self.gen_npy_info_txt(data), 'Path: %s' % target_file] + if is_convert: + summary.append('TxtFile: %s.txt' % target_file) + if extern_content != '': + summary.append('%s' % extern_content) + self.print_panel(self.create_columns([table, Constant.NEW_LINE.join(summary)]), file_name) + if is_convert: + self.save_npy_to_txt(data, target_file + '.txt') + + def save_npy_to_txt(self, src_file, dst_file='', align=0): + """save numpy file to txt file. + default data will be aligned to the last axis of data.shape + :param src_file: src file name + :param dst_file: dst file name + :param align: data align + :return: None + """ + if dst_file == '': + dst_file = src_file + '.txt' + if os.path.exists(dst_file): + self.log.debug("Dst file %s exists, will not save new one.", dst_file) + return + if isinstance(src_file, str): + data = np.load(src_file, allow_pickle=True) + elif isinstance(src_file, np.ndarray): + data = src_file + else: + raise PrecisionToolException("invalid src_file: %s", src_file) + if data.dtype == 'object': + raise PrecisionToolException("Invalid source data, data is object.") + shape = data.shape + data = data.flatten() + if align == 0: + if len(shape) == 0: + align = 1 + else: + align = shape[-1] + elif data.size % align != 0: + pad_array = np.zeros((align - data.size % align,)) + data = np.append(data, pad_array) + np.savetxt(dst_file, data.reshape((-1, align)), delimiter=' ', fmt='%g') + + def read_csv(self, path): + """Read csv file to list. + :param path: csv file path + :return: list + """ + if not str(path).endswith(CSV_SHUFFIX): + self.log.error("csv path [%s] is invalid", path) + return + rows = [] + with open(path) as f: + csv_handle = csv.reader(f) + for row in csv_handle: + rows.append(row) + return rows + + @staticmethod + def print(content): + rich_print(content) + + @staticmethod + def render(content, rich=True): + if rich: + rich_print(content) + else: + print(content) + + @staticmethod + def create_table(title, columns): + if Table is None: + raise PrecisionToolException("No rich module error.") + table = Table(title=title) + for column_name in columns: + table.add_column(column_name, overflow='fold') + return table + + @staticmethod + def create_columns(content): + if Columns is None: + raise PrecisionToolException("No rich module error.") + return Columns(content) + + def print_panel(self, content, title='', fit=True): + """ Print panel. + :param content: content + :param title: title + :param fit: if panel size fit the content + :return:Node + """ + if Panel is None: + print(content) + return + if fit: + self.print(Panel.fit(content, title=title)) + else: + self.print(Panel(content, title=title)) + + @staticmethod + def _detect_file(file_name, root_dir): + """Find file in root dir""" + result = [] + for dir_path, dir_names, file_names in os.walk(root_dir, followlinks=True): + for name in file_names: + if re.match(file_name, name): + result.append(os.path.join(dir_path, name)) + return result + + def _detect_file_if_not_exist(self, target_file): + """Find specific file in cmd root path""" + self.log.info("Try to auto detect file with name: %s.", target_file) + res = self._detect_file(target_file, cfg.CMD_ROOT_PATH) + if len(res) == 0: + raise PrecisionToolException("Cannot find any file named %s in dir %s" % (target_file, cfg.CMD_ROOT_PATH)) + self.log.info("Detect [%s] success. %s", target_file, res) + return res[0] + + def _get_atc(self): + if self.atc is None: + self.atc = self._detect_file_if_not_exist('^atc$') + return self.atc + + def _get_ms_accu_cmp(self): + if self.ms_accu_cmp is None: + self.ms_accu_cmp = self._detect_file_if_not_exist(cfg.MS_ACCU_CMP) + return self.ms_accu_cmp + + def get_newest_dir(self, path: str): + """Find the newest subdir in specific path, subdir should named by timestamp.""" + if not os.path.isdir(path): + self.log.warning("Path [%s] not exists", path) + return '' + paths = os.listdir(path) + sub_paths = [] + for p in paths: + if re.match(TIMESTAMP_DIR_PATTERN, p): + sub_paths.append(p) + if len(sub_paths) == 0: + self.log.debug("Path [%s] has no timestamp dirs.", path) + return '' + newest_sub_path = sorted(sub_paths)[-1] + self.log.info("Sub path num:[%d]. Dirs[%s], choose[%s]", len(sub_paths), str(sub_paths), newest_sub_path) + return newest_sub_path + + @staticmethod + def _list_file_with_pattern(path, pattern, extern_pattern, gen_info_func): + if path is None or not os.path.exists(path): + raise PrecisionToolException("Path %s not exist." % path) + file_list = {} + re_pattern = re.compile(pattern) + for dir_path, dir_names, file_names in os.walk(path, followlinks=True): + for name in file_names: + match = re_pattern.match(name) + if match is None: + continue + if extern_pattern != '' and not re.match(extern_pattern, name): + continue + file_list[name] = gen_info_func(name, match, dir_path) + return file_list + + @staticmethod + def _gen_numpy_file_info(name, math, dir_path): + return FileDesc(name, dir_path) + + @staticmethod + def _gen_file_info(name, math, dir_path): + return FileDesc(name, dir_path) + + @staticmethod + def _gen_build_graph_file_info(name, match, dir_path): + return BuildGraphFileDesc(name, dir_path, -1, int(match.group(1)), match.groups()[-1]) + + @staticmethod + def _gen_dump_file_info(name, match, dir_path): + return NpuDumpFileDesc(name, dir_path, int(match.groups()[-1]), op_name=match.group(2), op_type=match.group(1), + task_id=int(match.group(3)), stream_id=match.group(4)) + + @staticmethod + def _gen_npu_dump_decode_file_info(name, match, dir_path): + return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-3]), op_name=match.group(2), + op_type=match.group(1), task_id=int(match.group(3)), + anchor_type=match.groups()[-2], anchor_idx=int(match.groups()[-1])) + + @staticmethod + def _gen_cpu_dump_decode_file_info(name, match, dir_path): + return DumpDecodeFileDesc(name, dir_path, -1, op_name=match.group(1), op_type='', task_id=0, + anchor_type='output', anchor_idx=int(match.group(2))) + + @staticmethod + def _gen_cpu_graph_files_info(name, match, dir_path): + return FileDesc(name, dir_path, -1) + + @staticmethod + def _gen_overflow_debug_decode_file_info(name, match, dir_path): + return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-3]), op_name='Node_OpDebug', op_type='Opdebug', + task_id=int(match.group(1)), anchor_type=match.groups()[-2], + anchor_idx=int(match.groups()[-1])) + + @staticmethod + def _gen_vector_compare_result_file_info(name, match, dir_path): + return FileDesc(name, dir_path, int(match.group(1))) + + @staticmethod + def _gen_npu_dump_convert_file_info(name, match, dir_path): + return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-4]), op_name=match.group(2), + op_type=match.group(1), task_id=int(match.group(3)), anchor_type=match.groups()[-3], + anchor_idx=int(match.groups()[-2])) + + +util = Util() -- Gitee From 94c931814638103eea01f614e6fcead0447f07c7 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 07:46:50 +0000 Subject: [PATCH 33/38] 1 Signed-off-by: huangju1993 --- .../precision_tool/lib/__init__.py | 0 .../precision_tool/lib/cpu_tvm.py | 51 ++++ .../precision_tool/lib/interactive_cli.py | 87 +++++++ .../precision_tool/lib/precision_tool.py | 230 ++++++++++++++++++ 4 files changed, 368 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/__init__.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/cpu_tvm.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/interactive_cli.py create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/precision_tool.py diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/__init__.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/cpu_tvm.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/cpu_tvm.py new file mode 100644 index 000000000..a0906071e --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/cpu_tvm.py @@ -0,0 +1,51 @@ +import numpy as np +from tbe import tvm + + +class CpuTvm(): + def __init__(self, json_file, dump_input_files, dump_output_files): + self.json_file = json_file + self.dump_input_files = dump_input_files + self.dump_output_files = dump_output_files + self.input_list = [] + self.output_list = [] + + def _load_schedule(self): + with open(self.json_file, 'r') as jsonfile: + tvm_node = tvm.load_json(jsonfile.read()) + self.output_list = tvm_node.op.attrs['output_list'] + self.input_list = tvm_node.op.attrs['input_list'] + schedule = tvm.create_schedule([res.op for res in self.output_list]) + return schedule + + def _build_tvm(self, schedule): + tensor_list = [ele for ele in self.input_list if ele is not None] + for ele in self.output_list: + if ele is not None: + tensor_list.append(ele) + fusion_op = tvm.build(schedule, tensor_list, "c", "llvm") + return fusion_op + + def _load_data(self, dump_files): + ctx = tvm.cpu(0) + data_tvm = [] + for dump_file in dump_files: + data_temp_numpy = np.load(dump_file) + data_temp_tvm = tvm.nd.array(data_temp_numpy, ctx) + data_tvm.append(data_temp_tvm) + return data_tvm + + def run_cpu_tvm(self): + # load schedule and build tvm + schedule = self._load_schedule() + fusion_op = self._build_tvm(schedule) + + #load data and run cpu tvm + data_tvm_in = self._load_data(self.dump_input_files) + data_tvm_out = self._load_data(self.dump_output_files) + data_tvm_in.extend(data_tvm_out) + fusion_op(*data_tvm_in) + + #tvm format to numpy format + data_np_out = [data.asnumpy() for data in data_tvm_out] + return data_np_out diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/interactive_cli.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/interactive_cli.py new file mode 100644 index 000000000..4e6aedd18 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/interactive_cli.py @@ -0,0 +1,87 @@ +# coding=utf-8 +import cmd +from .util.util import util +from .util.constant import Constant +from .precision_tool import PrecisionTool + +HEADER = r""" ____ _ _ ______ __ + / __ \________ _____(_)____(_)___ ____/_ __/___ ____ / / + / /_/ / ___/ _ \/ ___/ / ___/ / __ \/ __ \/ / / __ \/ __ \/ / + / ____/ / / __/ /__/ (__ ) / /_/ / / / / / / /_/ / /_/ / / +/_/ /_/ \___/\___/_/____/_/\____/_/ /_/_/ \____/\____/_/ version=%s""" % Constant.VERSION + +HELP_AC = "Run auto check function, use [-c] to start vector compare process.\n" \ + " usage: ac (-c) \n" +HELP_RUN = "Run any shell command.\n" \ + " usage: (run) vim tensor_name.txt \n" +HELP_PT = "Print npy tensor, use [-c] to convert and save to txt file.\n" \ + " usage: pt (-c) [tensor_name.npy] \n" + + +class InteractiveCli(cmd.Cmd): + def __init__(self): + cmd.Cmd.__init__(self) + self.prompt = "PrecisionTool > " + self.precision_tool = None + util.print_panel(HEADER) + self._prepare() + + def default(self, line=''): + util.execute_command(line) + return False + + def _prepare(self): + self.precision_tool = PrecisionTool() + self.precision_tool.prepare() + + def do_ac(self, line=''): + """Auto check.""" + self.precision_tool.do_auto_check(self._parse_argv(line)) + + def do_run(self, line=''): + """Run any shell command""" + util.execute_command(line) + + def do_ls(self, line=''): + """List ops: \n usage: ls (op(default)/dump) -n [op_name] -t [op_type]""" + argv = self._parse_argv(line) + if len(argv) > 0 and argv[0] == 'dump': + return self.precision_tool.do_list_dump(argv[1:]) + self.precision_tool.do_list_nodes(argv) + + def do_ni(self, line=''): + """Print node info:\n usage: ni (-n) [op_name]""" + self.precision_tool.do_node_info(self._parse_argv(line, '-n')) + + def do_dc(self, line=''): + """Convert npu dump by op names:\n usage: dc (-n) [npu dump file] -f [target format]""" + self.precision_tool.do_convert_npu_dump(self._parse_argv(line, '-n')) + + def do_vc(self, line=''): + """Do vector compare: \n usage: vc """ + self.precision_tool.do_vector_compare(self._parse_argv(line)) + + def do_vcs(self, line=''): + """Do vector compare summary""" + self.precision_tool.do_vector_compare_summary(self._parse_argv(line)) + + def do_pt(self, line=''): + """Print data info:\n usage: pt (-n) [*.npy] (-c)\n -c: convert and save to txt file""" + self.precision_tool.do_print_data(self._parse_argv(line, '-n')) + + def do_cp(self, line=''): + """Compare two data file """ + self.precision_tool.do_compare_data(self._parse_argv(line, '-n')) + + def do_train(self, line=''): + """Train process:\n usage: train -d all -a dump""" + self.precision_tool.do_train_analysis(self._parse_argv(line)) + + @staticmethod + def _parse_argv(line, insert=None): + argv = line.split() if line != '' else [] + if '-h' in argv: + return argv + if insert is not None and len(argv) > 0 and argv[0] != insert: + argv.insert(0, insert) + return argv diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/precision_tool.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/precision_tool.py new file mode 100644 index 000000000..d118b86bc --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/precision_tool/lib/precision_tool.py @@ -0,0 +1,230 @@ +import argparse +import os +import time + +from .adapter.overflow import Overflow +from .dump.dump_manager import DumpManager +from .graph.graph_manager import GraphManager +from .compare.compare import Compare +from .adapter.fusion import Fusion +from .train.train_analysis import TrainAnalysis +from .util.util import util +from .util.constant import Constant +from .config import config as cfg +from .util.precision_tool_exception import PrecisionToolException +from .util.precision_tool_exception import catch_tool_exception + + +class PrecisionTool(object): + def __init__(self): + """init""" + self.graph_manager = GraphManager() + self.overflow = Overflow() + self.dump_manager = DumpManager() + self.compare = Compare() + self.fusion = Fusion() + self.train_analysis = TrainAnalysis() + self.log = util.get_log() + + @catch_tool_exception + def prepare(self): + """prepare""" + util.create_dir(cfg.DATA_ROOT_DIR) + self.graph_manager.prepare() + self.dump_manager.prepare() + self.overflow.prepare() + self.fusion.prepare() + self.compare.prepare() + + @catch_tool_exception + def do_auto_check(self, argv): + """Auto check""" + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--vector_compare', dest='vector_compare', help='Run vector compare process', + action='store_true') + parser.add_argument('-l', '--limit', dest='limit', type=int, help='limit', default=3) + args = parser.parse_args(argv) + # vector compare + if args.vector_compare: + self.do_vector_compare() + self.do_vector_compare_summary() + self.do_check_fusion() + self.do_check_overflow(args.limit) + self.do_check_cast() + self.do_check_graph_similarity() + + @catch_tool_exception + def do_check_overflow(self, limit=3): + """check overflow""" + self.overflow.check(limit) + + @catch_tool_exception + def do_check_cast(self): + self.graph_manager.check_cast() + + @catch_tool_exception + def do_check_dtype(self): + """Check input/output dtype""" + self.graph_manager.check_dtype() + + @catch_tool_exception + def do_check_fusion(self): + """print fusion info summary""" + self.fusion.check() + + @catch_tool_exception + def do_check_graph_similarity(self): + self.graph_manager.check_similarity() + + @catch_tool_exception + def do_vector_compare(self, argv=None): + """do vector compare""" + parser = argparse.ArgumentParser() + parser.add_argument('-lt', '--left', dest='lt', default=None, help='left path(npu dump path)') + parser.add_argument('-rt', '--right', dest='rt', default=None, help='right path(cpu/npu dump path)') + parser.add_argument('-g', '--graph', dest='graph', required=False, default=None, help='graph json file') + args = parser.parse_args() if argv is None else parser.parse_args(argv) + # 1. compare npu_debug0 - tf dump data (auto) + # 2. compare npu_debug0 - npu_debug1 dump data + # 3. compare dir - dir dump data + result_dir = os.path.join(cfg.VECTOR_COMPARE_PATH, time.strftime("%Y%m%d%H%M%S", time.localtime())) + if args.lt is None: + debug_0_dump_root = self.dump_manager.get_dump_root_dir(Constant.DEFAULT_DEBUG_ID) + if util.empty_dir(debug_0_dump_root): + raise PrecisionToolException("NPU debug_0 dump dir is empty, no files to compare.") + if not util.empty_dir(cfg.TF_DUMP_DIR): + self.log.info("Tf dump dir is not empty, will compare npu dump data with tf dump data.") + self.compare.npu_tf_vector_compare(self.graph_manager.get_graphs(Constant.DEFAULT_DEBUG_ID), + debug_0_dump_root, cfg.TF_DUMP_DIR, result_dir) + else: + self.log.warning("Tf dump dir is empty, maybe run [python3 precision_tool/cli.py tf_dump] to decode" + " tf debug data.") + debug_1_dump_root = self.dump_manager.get_dump_root_dir(Constant.NPU_DEBUG_ID_1) + if debug_1_dump_root is not None and not util.empty_dir(debug_1_dump_root): + self.log.info("NPU debug_1 dump dir is not empty, will compare two npu dump data.") + self.compare.npu_vector_compare(debug_0_dump_root, debug_1_dump_root) + else: + lh_path = args.lt + rh_path = args.rt + graph_json = args.graph + self.compare.vector_compare(lh_path, rh_path, result_dir, graph_json) + self.compare.vector_summary(result_dir) + + @catch_tool_exception + def do_vector_compare_summary(self, argv=None): + parser = argparse.ArgumentParser(description="show vector compare result summary.") + parser.add_argument('-f', '--file', dest='file', default=None, required=False, help='compare_result file/path') + parser.add_argument('-c', '--cos_sim', dest='cos_sim', type=float, help='cos_sim_threshold', default=0.98) + parser.add_argument('-l', '--limit', dest='limit', type=int, help='limit', default=3) + args = parser.parse_args() if argv is None else parser.parse_args(argv) + error_ops = self.compare.vector_summary(args.file, args.cos_sim, args.limit) + # parse error_ops + + @catch_tool_exception + def do_print_data(self, argv=None): + """print tensor data""" + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--name', dest='name', default='', help='list by op name') + args = parser.parse_args() if argv is None else parser.parse_args(argv) + self.dump_manager.print_tensor(args.name, True) + + @catch_tool_exception + def do_list_nodes(self, argv): + """list op nodes in graph""" + parser = argparse.ArgumentParser() + parser.add_argument('-t', '--type', dest='type', default='', help='list by op type') + parser.add_argument('-n', '--name', dest='name', default='', help='list by op name') + parser.add_argument('-f', '--fusion', dest='fusion', default='', help='list by op fusion pass') + parser.add_argument('-k', '--kernel_name', dest='kernel_name', default='', help='list by op kernel_name') + args = parser.parse_args(argv) + self.graph_manager.print_op_list(args.type, args.name, args.fusion, args.kernel_name) + + @catch_tool_exception + def do_node_info(self, argv): + """Print op node info""" + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--name', dest='name', default='', help='op name') + parser.add_argument('-g', '--graph', dest='graph', help='graph name') + parser.add_argument('-a', '--attr', dest='attr', action='store_true', help='show all attr info') + parser.add_argument('-c', '--check', dest='check ', action='store_true', help='check single op precision') + parser.add_argument('-s', '--save', dest='save', type=int, default=0, + help='save subgraph, param gives the deep of subgraph') + args = parser.parse_args(argv) + # print graph op info + npu_ops, _ = self.graph_manager.get_ops(args.name, args.graph) + npu_op_summary, tf_op_summary = self.graph_manager.op_graph_summary(npu_ops, args.attr) + npu_dump_summary, tf_dump_summary = self.dump_manager.op_dump_summary(npu_ops) + pt_dump_summary = self.dump_manager.pt_dump_summary(args.name) + # merge graph/dump/compare info + for debug_id, graph_summary in npu_op_summary.items(): + for graph_name, summary_detail in graph_summary.items(): + summary_txt = [summary_detail] + if debug_id in npu_dump_summary and graph_name in npu_dump_summary[debug_id]: + summary_txt.append(npu_dump_summary[debug_id][graph_name]) + if tf_dump_summary is not None: + summary_txt.append(tf_dump_summary) + title = "[green](%s)[/green] %s" % (debug_id, graph_name) + util.print_panel(Constant.NEW_LINE.join(summary_txt), title) + if pt_dump_summary != '': + util.print_panel(pt_dump_summary, args.name) + if args.save != 0: + self.graph_manager.save_sub_graph(npu_ops, args.save) + + @catch_tool_exception + def do_compare_data(self, argv): + """compare two tensor""" + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--name', dest='names', type=str, default=[], help='op name', nargs='+') + parser.add_argument('-p', '--print', dest='count', default=20, type=int, help='print err data num') + parser.add_argument('-s', '--save', dest='save', action='store_true', help='save data in txt format') + parser.add_argument('-al', '--atol', dest='atol', default=0.001, type=float, help='set rtol') + parser.add_argument('-rl', '--rtol', dest='rtol', default=0.001, type=float, help='set atol') + args = parser.parse_args(argv) + if len(args.names) != 2: + self.log.error("compare files should be 2.") + else: + self.compare.compare_data(args.names[0], args.names[1], args.save, args.rtol, args.atol, args.count) + + @catch_tool_exception + def do_list_dump(self, argv): + """List dump files""" + parser = argparse.ArgumentParser() + parser.add_argument('-t', '--type', dest='type', help='') + parser.add_argument('-n', '--name', dest='name') + self.dump_manager.list_dump(argv.dir, argv.name) + + @catch_tool_exception + def do_convert_npu_dump(self, argv): + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--name', dest='name', help='op name') + parser.add_argument('-f', '--format', dest='format', default=None, required=False, help='target format') + parser.add_argument('-o', '--output', dest='output', required=False, default=None, help='output path') + args = parser.parse_args(argv) + self.dump_manager.convert_npu_dump(args.name, args.format, args.output) + + @catch_tool_exception + def do_convert_all_npu_dump(self): + self.dump_manager.decode_all_npu_dump() + + @catch_tool_exception + def check_graph_similarity(self): + """ Check graph similarity """ + + @catch_tool_exception + def do_train_analysis(self, argv): + parser = argparse.ArgumentParser() + parser.add_argument('-d', '--device', dest='device', default='all', required=False, + help='train device, support cpu/npu/all') + parser.add_argument('-a', '--action', dest='action', default='dump', required=False, + help='action, support dump(-d cpu/npu)[overflow]|fusion_off|fusion_switch(npu)') + args = parser.parse_args(argv) + self.train_analysis.run(args.device, args.action) + + def single_cmd(self, argv): + cmd_func_map = {'compare': self.do_compare_data, + 'vector_compare': self.do_vector_compare, + 'train': self.do_train_analysis} + if argv[1] in cmd_func_map: + func = cmd_func_map[argv[1]] + return func(argv[2:]) + raise PrecisionToolException("cmd %s is not supported or cmd should be run in interactive mode." % argv[1]) -- Gitee From 7f2f3063d0e3865feeb22537d29f6a516a05cd65 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 08:27:14 +0000 Subject: [PATCH 34/38] cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh. Signed-off-by: huangju1993 --- .../test/train_performance_bs256_1p.sh | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh index 48c689f74..c85da6f03 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh @@ -4,6 +4,8 @@ cur_path=`pwd` #集合通信参数,不需要修改 +source ~/.bashrc +docker_enable="false" export RANK_SIZE=1 export JOB_ID=99990001 export RANK_ID=1 @@ -90,6 +92,12 @@ do cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source $cur_path/set_conda.sh + source activate $conda_name + elif [[ $para == --docker_enable* ]];then + docker_enable=`echo ${para#*=}` fi done @@ -103,6 +111,13 @@ if [[ $data_path == "" ]];then exit 1 fi +#docker适配 +if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then + echo "docker_enable basic" + export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin + export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH +fi + #修改参数 sed -i "50s|PATH_TO_BE_CONFIGURED|${data_path}|g" $cur_path/../src/configs/res50_256bs_1p.py sed -i "107s|PATH_TO_BE_CONFIGURED|${cur_path}/output/0/d\_solution/ckpt0|g" $cur_path/../src/configs/res50_256bs_1p.py @@ -173,7 +188,11 @@ if [[ ${fp32} == "--fp32" ]];then elif [[ ${hf32} == "--hf32" ]];then CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' elif [[ ${ffts} == "--ffts" ]];then - CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'_'ffts' + if [[ $docker_enable == "basic" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'basic'_'docker'_'overflow'_'perf'_'ffts' + else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'_'ffts' + fi else CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' fi @@ -202,3 +221,5 @@ echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log + +rm -rf $cur_path/output/overflow_dump \ No newline at end of file -- Gitee From 86616d0ea9f24155a7a445fed178a3a554200f74 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 08:43:12 +0000 Subject: [PATCH 35/38] cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump. Signed-off-by: huangju1993 --- .../src/trainers/gpu_base_trainer_dump | 243 ++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump new file mode 100644 index 000000000..2bb7b1854 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump @@ -0,0 +1,243 @@ +# coding=utf-8 +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import tensorflow as tf +import math +import time +from . import train_helper +from .train_helper import stage +from utils.logger import rank0log + +#from tensorflow.contrib.offline_train.python.npu.npu_config import NPURunConfig +from npu_bridge.estimator.npu.npu_config import NPURunConfig +#from tensorflow.contrib.offline_train.python.npu.npu_estimator import NPUEstimator +from npu_bridge.estimator.npu.npu_estimator import NPUEstimator +#from tensorflow.contrib.offline_train.python.npu.npu_optimizer import NPUDistributedOptimizer +from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer + +class GPUBaseTrain(object): + def __init__(self, session, config, data, model, logger): + self.sess = session + self.config = config + self.data = data + self.model = model + self.logger = logger + self.print_logger = self.logger.logger + self.all_preds = [] + self.all_targets = [] + if self.config['accelerator'] == 'gpu': + self.classifier, self.training_hook = self.get_classifier() + else: + # from tensorflow.contrib.offline_train.python.npu.npu_config import NPURunConfig + from npu_bridge.estimator.npu.npu_config import NPURunConfig + # from tensorflow.contrib.offline_train.python.npu.npu_estimator import NPUEstimator + from npu_bridge.estimator.npu.npu_estimator import NPUEstimator +# from tensorflow.contrib.offline_train.python.npu.npu_optimizer import NPUDistributedOptimizer + from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer + self.classifier, self.training_hook = self.get_npu_classifier() + + + + def get_classifier(self): + classifier = tf.estimator.Estimator( + model_fn=self.model.get_estimator_model_func, + model_dir=self.config['log_dir'], + config = tf.estimator.RunConfig( + session_config=self.sess.get_config(), + save_summary_steps=self.config['save_summary_steps'] if self.config['do_checkpoint'] else None, + save_checkpoints_steps=self.config['save_checkpoints_steps'] if self.config['do_checkpoint'] else None, + keep_checkpoint_max=None + ) + ) + + training_hooks = [train_helper.PrefillStagingAreasHook()] + training_hooks.append(self.logger) + + return classifier, training_hooks + + def get_npu_classifier(self): + session_config = tf.ConfigProto( + inter_op_parallelism_threads=10, + intra_op_parallelism_threads=10, + allow_soft_placement=True,) + + + if self.config['over_dump'] == "True": + print("NPU overflow dump is enabled") + from npu_bridge.npu_init import DumpConfig + dump_config = DumpConfig( + enable_dump_debug=True, dump_path=self.config['over_dump_path'], dump_debug_mode="all") + if self.config['debug'] : + run_config = NPURunConfig(dump_config=dump_config, hcom_parallel=True, precision_mode="allow_mix_precision", enable_data_pre_proc=True, save_checkpoints_steps=112590, session_config=session_config, model_dir = self.config['model_dir'], iterations_per_loop=self.config['iterations_per_loop'], keep_checkpoint_max=5) + else : + run_config = NPURunConfig(dump_config=dump_config, hcom_parallel=True, precision_mode="allow_mix_precision", save_summary_steps=0, log_step_count_steps=None, enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir'], iterations_per_loop=self.config['iterations_per_loop']) + else: + import precision_tool.tf_config as npu_tf_config + if self.config['debug']: + if self.config['precision_mode'] == 'must_keep_origin_dtype': + run_config = NPURunConfig(hcom_parallel=True, + precision_mode="must_keep_origin_dtype", + enable_data_pre_proc=True, + save_checkpoints_steps=112590, + session_config=session_config, + model_dir = self.config['model_dir'], + iterations_per_loop=self.config['iterations_per_loop'], + keep_checkpoint_max=5, + enable_small_channel=1) + else: + dump_config = npu_tf_config.estimator_dump_config(action='dump') + run_config = NPURunConfig(hcom_parallel=True, + dump_config=dump_config, + precision_mode="allow_mix_precision", + enable_data_pre_proc=True, + save_checkpoints_steps=112590, + session_config=session_config, + model_dir=self.config['model_dir'], + iterations_per_loop=self.config['iterations_per_loop'], + keep_checkpoint_max=5, + enable_small_channel=1, + modify_mixlist='./src/trainers/ReduceMeanD.json') + else: + run_config = NPURunConfig(hcom_parallel=True, precision_mode="allow_mix_precision", save_summary_steps=0, log_step_count_steps=None, enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir'], iterations_per_loop=self.config['iterations_per_loop']) + +# run_config = NPURunConfig(enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir']) + + # classifier = tf.estimator.Estimator( + # model_fn=self.model.get_estimator_model_func, + # model_dir=self.config['log_dir'], + # config = tf.estimator.RunConfig( + # session_config=self.sess.get_config(), + # save_summary_steps=self.config['save_summary_steps'] if self.config['do_checkpoint'] else None, + # save_checkpoints_steps=self.config['save_checkpoints_steps'] if self.config['do_checkpoint'] else None, + # keep_checkpoint_max=None + # ) + # ) + + classifier =NPUEstimator( + model_fn= self.model.get_estimator_model_func, + config= run_config + ) + + training_hooks = [] + if self.config['debug']: + training_hooks = [train_helper.PrefillStagingAreasHook()] + training_hooks.append(self.logger) + + return classifier, training_hooks + + def train(self): + print ('training steps: %d' % self.config['nstep']) + self.classifier.train( input_fn=lambda:self.data.get_train_input_fn(), + max_steps = self.config['nstep'], + hooks = self.training_hook + ) + + + def evaluate(self): + rank0log(self.print_logger, "Evaluating") + rank0log(self.print_logger, "Validation dataset size: {}".format(self.config['num_evaluating_samples'] )) + time.sleep(5) # a little extra margin... + try: + ckpts = train_helper.sort_and_load_ckpts(self.config['model_dir']) + print("=========ckpt==========") + print(ckpts) + print("=========ckpt==========") + for i, c in enumerate(ckpts): + if i < len(ckpts) - 1: + if i % self.config['eval_interval'] != 0: + continue + eval_result = self.classifier.evaluate( + input_fn=lambda: self.data.get_eval_input_fn(), + checkpoint_path=c['path']) + c['epoch'] = math.ceil(c['step'] / (self.config['num_training_samples']/ (self.config['global_batch_size']))) + c['top1'] = eval_result['val-top1acc'] + c['top5'] = eval_result['val-top5acc'] + c['loss'] = eval_result['loss'] + + rank0log(self.print_logger, ' step epoch top1 top5 loss checkpoint_time(UTC)') + for i, c in enumerate(ckpts): + if 'top1' not in c: + continue + rank0log(self.print_logger,'{:5d} {:5.1f} {:5.3f} {:6.2f} {:6.2f} {time}' + .format(c['step'], + c['epoch'], + c['top1'] * 100, + c['top5'] * 100, + c['loss'], + time=time.strftime('%Y-%m-%d %H:%M:%S', + time.localtime(c['mtime'])))) + rank0log(self.print_logger, "Finished evaluation") + except KeyboardInterrupt: + self.print_logger.error("Keyboard interrupt") + + def train_and_evaluate(self): + success = False + epochs_between_evals = self.config.get('epochs_between_evals', 4) + + + for i in range(self.config['num_epochs'] // epochs_between_evals): + + rank0log(self.print_logger, "Starting a training cycle") + + self.classifier.train(input_fn=lambda:self.data.get_train_input_fn(), + steps = self.config['nsteps_per_epoch']*epochs_between_evals, + hooks = self.training_hook ) + + rank0log(self.print_logger, "Starting to evaluate") + rank0log(self.print_logger, "Validation dataset size: {}".format(self.config['num_evaluating_samples'] )) + time.sleep(5) # a little extra margin... + + ckpts = train_helper.sort_and_load_ckpts(self.config['model_dir']) + c = ckpts[-1] + eval_result = self.classifier.evaluate( + input_fn=lambda: self.data.get_eval_input_fn(), + checkpoint_path=c['path']) + + c['epoch'] = math.ceil(c['step'] / (self.config['num_training_samples']/ (self.config['global_batch_size']))) + c['top1'] = eval_result['val-top1acc'] + c['top5'] = eval_result['val-top5acc'] + c['loss'] = eval_result['loss'] + + rank0log(self.print_logger, ' step epoch top1 top5 loss checkpoint_time(UTC)') + + rank0log(self.print_logger,'{:5d} {:5.1f} {:5.3f} {:6.2f} {:6.2f} {time}' + .format(c['step'], + c['epoch'], + c['top1'] * 100, + c['top5'] * 100, + c['loss'], + time=time.strftime('%Y-%m-%d %H:%M:%S', + time.localtime(c['mtime'])))) + if eval_result['val-top1acc']*100 > self.config.get('stop_threshold', 74.9): + success = True + break + + + -- Gitee From 1ddf1afd9ab3aedc8f170d63d37a48b1518821a3 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 08:47:29 +0000 Subject: [PATCH 36/38] /cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump.py. Signed-off-by: huangju1993 --- .../trainers/{gpu_base_trainer_dump => gpu_base_trainer_dump.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/{gpu_base_trainer_dump => gpu_base_trainer_dump.py} (100%) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump.py similarity index 100% rename from TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump rename to TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer_dump.py -- Gitee From bdf070fe9b467e66c475a5f17a4751cfa7bd9dd4 Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 08:49:17 +0000 Subject: [PATCH 37/38] built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50_dump.py. Signed-off-by: huangju1993 --- .../src/mains/res50_dump.py | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50_dump.py diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50_dump.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50_dump.py new file mode 100644 index 000000000..062fc7757 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50_dump.py @@ -0,0 +1,148 @@ +# coding=utf-8 +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import tensorflow as tf +import sys +import ast +import os +base_path=os.path.split(os.path.realpath(__file__))[0] +print ("#########base_path:", base_path) +path_1 = base_path + "/../" +print (path_1) +path_2 = base_path + "/../models" +print (path_2) +path_3 = base_path + "/../../" +print (path_3) + + +sys.path.insert(1, path_1) +sys.path.append(base_path + "/../models") +sys.path.append(base_path + "/../../") +sys.path.append(base_path + "/../../models") + +from utils import create_session as cs +from utils import logger as lg +from data_loader.resnet50 import data_loader as dl +from models.resnet50 import res50_model as ml +from optimizers import optimizer as op +from losses import res50_loss as ls +from trainers import gpu_base_trainer_dump as tr +# from configs import res50_config as cfg +from hyper_param import hyper_param as hp +from layers import layers as ly +import argparse + +def main(): + #-------------------choose the config file in .sh file----------- + cmdline = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + cmdline.add_argument('--config_file', default="", + help="""config file used.""") + cmdline.add_argument('--iterations_per_loop', default=1, + help="""config file used.""") + cmdline.add_argument('--max_train_steps', default=200, + help="""config file used.""") + cmdline.add_argument('--debug', default=True, type=ast.literal_eval, + help="""config file used.""") + cmdline.add_argument('--eval', default=False, type=ast.literal_eval, + help="""config file used.""") + cmdline.add_argument('--model_dir', default="./model_dir", + help="""config file used.""") + cmdline.add_argument('--precision_mode', default='allow_mix_precision', type=str, help='precision_mode') + + # modify for npu overflow start + # enable overflow + cmdline.add_argument("--over_dump", default="False", + help="whether to enable overflow") + cmdline.add_argument("--over_dump_path", default="./", + help="path to save overflow dump files") + cmdline.add_argument("--data_path", default="", help="path of dataset") + + FLAGS, unknown_args = cmdline.parse_known_args() + if len(unknown_args) > 0: + for bad_arg in unknown_args: + print("ERROR: Unknown command line arg: %s" % bad_arg) + raise ValueError("Invalid command line arg(s)") + + cfg_file = FLAGS.config_file + configs = 'configs' + cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file) + #------------------------------------------------------------------ + ''' + if FLAGS.precision_mode == "allow_mix_precision": + option = {} + option["ACL_PRECISION_MODE"] = "allow_mix_precision" + torch_npu.npu.set_option(option) + ''' + config = cfg.res50_config() + config['iterations_per_loop'] = int(FLAGS.iterations_per_loop) + config['max_train_steps'] = int(FLAGS.max_train_steps) + config['debug'] = FLAGS.debug + config['precision_mode'] = FLAGS.precision_mode + config['eval'] = FLAGS.eval + config['model_dir'] = FLAGS.model_dir + if FLAGS.data_path: + config['data_url'] = FLAGS.data_path + + config['over_dump'] = FLAGS.over_dump + config['over_dump_path'] = FLAGS.over_dump_path + + print("iterations_per_loop:%d" %(config['iterations_per_loop'])) + print("max_train_steps :%d" %(config['max_train_steps'])) + print("debug :%s" %(config['debug'])) + print("precision_mode :%s" %(config['precision_mode'])) + print("eval :%s" %(config['eval'])) + print("model_dir :%s" %(config['model_dir'])) + print("over_dump :%s" %(config['over_dump'])) + print("over_dump_path :%s" %(config['over_dump_path'])) + Session = cs.CreateSession(config) + data = dl.DataLoader(config) + hyper_param = hp.HyperParams(config) + layers = ly.Layers() + optimizer = op.Optimizer(config) + loss = ls.Loss(config) + logger = lg.LogSessionRunHook(config) # add tensorboard summary + + model = ml.Model(config, data, hyper_param,layers, optimizer, loss, logger) # get the model + trainer = tr.GPUBaseTrain(Session, config, data, model, logger) # use Estimator to build training process + + if config['mode'] =='train': + trainer.train() + if config['eval'] : + trainer.evaluate() + elif config['mode'] =='evaluate': + trainer.evaluate() + elif config['mode'] =='train_and_evaluate': + trainer.train_and_evaluate() + else: + raise ValueError('Invalid type of mode') + +if __name__ == '__main__': + main() -- Gitee From 5c179ce96a97d7a280b823a5b80cbdce3887cb7e Mon Sep 17 00:00:00 2001 From: huangju1993 Date: Wed, 17 Jul 2024 08:57:10 +0000 Subject: [PATCH 38/38] cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p_dump.sh. Signed-off-by: huangju1993 --- .../test/train_performance_bs256_1p_dump.sh | 225 ++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p_dump.sh diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p_dump.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p_dump.sh new file mode 100644 index 000000000..90233d8a3 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p_dump.sh @@ -0,0 +1,225 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +source ~/.bashrc +docker_enable="false" +export RANK_SIZE=1 +export JOB_ID=99990001 +export RANK_ID=1 +export HCCL_CONNECT_TIMEOUT=600 +RANK_ID_START=0 + +# 数据集路径,保持为空,不需要修改 +data_path="" +ffts='None' +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="ResNet50_ID0058_for_TensorFlow" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=256 +#训练step +train_steps=2000 +#学习率 +learning_rate= + +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --autotune whether to enable autotune, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --ffts* ]];then + ffts=`echo ${para#*=}` + elif [[ $para == --autotune* ]];then + autotune=`echo ${para#*=}` + mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak + mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak + autotune_dump_path=${cur_path}/output/autotune_dump + mkdir -p ${autotune_dump_path}/GA + mkdir -p ${autotune_dump_path}/rl + cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/ + cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source $cur_path/set_conda.sh + source activate $conda_name + elif [[ $para == --docker_enable* ]];then + docker_enable=`echo ${para#*=}` + fi +done + +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#docker适配 +if [[ $docker_enable == "basic" ]] || [[ $docker_enable == "privileged" ]]; then + echo "docker_enable basic" + export PATH=$PATH:/home/anaconda3/envs/$conda_name/bin + export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:$LD_LIBRARY_PATH +fi + +#修改参数 +sed -i "50s|PATH_TO_BE_CONFIGURED|${data_path}|g" $cur_path/../src/configs/res50_256bs_1p.py +sed -i "107s|PATH_TO_BE_CONFIGURED|${cur_path}/output/0/d\_solution/ckpt0|g" $cur_path/../src/configs/res50_256bs_1p.py + +cp data_loader.py $cur_path/../src/data_loader/resnet50/ + +if [[ ${ffts} == "--ffts" ]];then + export ASCEND_ENHANCE_ENABLE=1 +fi +#训练开始时间,不需要修改 +start_time=$(date +%s) +cd $cur_path/../ +#进入训练脚本目录,需要模型审视修改 +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + export DEVICE_INDEX=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path,--autotune + nohup python3.7 ${cur_path}/../src/mains/res50_dump.py --config_file=res50_256bs_1p \ + --max_train_steps=${train_steps} \ + --iterations_per_loop=100 \ + --debug=True \ + --eval=False \ + --precision_mode ${precision_mode} \ + --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#参数改回 +sed -i "50s|${data_path}|PATH_TO_BE_CONFIGURED|g" $cur_path/../src/configs/res50_256bs_1p.py +sed -i "107s|${cur_path}/output/0/d\_solution/ckpt0|PATH_TO_BE_CONFIGURED|g" $cur_path/../src/configs/res50_256bs_1p.py + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "FPS: " | awk -F "FPS: " '{print $2}' | awk -F " loss:" '{print $1}' | tail -n +2 | awk '{sum+=$1} END {print sum/NR}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'` +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +elif [[ ${ffts} == "--ffts" ]];then + if [[ $docker_enable == "basic" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'basic'_'docker'_'dump'_'perf'_'ffts' + else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'_'ffts' + fi +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi + + + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${RANK_SIZE}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "FPS: " $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss: " '{print $2}' | awk -F "total" '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log + +rm -rf $cur_path/../precision_data \ No newline at end of file -- Gitee