diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/compare.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/compare.py index ba8692578620ca0e6a006bbee65f03efc77fb3f7..51bbe57a4e96a86256b31fc9f6cb112f4c9c42c3 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/compare.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/compare.py @@ -16,6 +16,7 @@ """ import os +import time import numpy as np from .utils import Util from .config import Const @@ -148,3 +149,105 @@ class Compare: err_percent = float(err_cnt / total_cnt) self.util.print(self.util.create_columns([err_table, top_table])) return total_cnt, all_close, cos_sim, err_percent + + def _compare_npy(self, file, bench_file, output_path): + data = np.load(file) + bench_data = np.load(bench_file) + shape, dtype = data.shape, data.dtype + bench_shape, bench_dtype = bench_data.shape, bench_data.dtype + filename = os.path.basename(file) + bench_filename = os.path.basename(bench_file) + if shape != bench_shape or dtype != bench_dtype: + self.log.error( + "Shape or dtype between two npy files is inconsistent. Please check the two files." + "File 1: %s, file 2: %s", file, bench_file) + self.util.deal_with_dir_or_file_inconsistency(output_path) + return + md5_consistency = False + if self.util.get_md5_for_numpy(data) == self.util.get_md5_for_numpy(bench_data): + md5_consistency = True + data_mean = np.mean(data) + bench_data_mean = np.mean(bench_data) + abs_error = np.abs(data - bench_data) + bench_data = self.util.deal_with_value_if_has_zero(bench_data) + rel_error = np.abs(abs_error / bench_data) + abs_diff_max = abs_error.max() + rel_diff_max = np.max(rel_error) + compare_result = [[filename, bench_filename, data_mean, bench_data_mean, md5_consistency, abs_diff_max, + rel_diff_max]] + self.util.write_csv(compare_result, output_path) + + def compare_all_file_in_directory(self, my_dump_dir, golden_dump_dir, output_path): + if not (self.util.is_subdir_count_equal(my_dump_dir, golden_dump_dir) + and self.util.check_npy_files_valid_in_dir(my_dump_dir) + and self.util.check_npy_files_valid_in_dir(golden_dump_dir)): + self.log.error( + "Top level(Npy files level) directory structure is inconsistent. Please check the two directory.") + self.util.deal_with_dir_or_file_inconsistency(output_path) + return + my_npy_files = self.util.get_sorted_files_names(my_dump_dir) + golden_npy_files = self.util.get_sorted_files_names(golden_dump_dir) + for my_npy_file_name, golden_npy_file_name in zip(my_npy_files, golden_npy_files): + my_npy_path = os.path.join(my_dump_dir, my_npy_file_name) + golden_npy_path = os.path.join(golden_dump_dir, golden_npy_file_name) + self._compare_npy(my_npy_path, golden_npy_path, output_path) + + def compare_timestamp_directory(self, my_dump_dir, golden_dump_dir, output_path): + if not self.util.is_subdir_count_equal(my_dump_dir, golden_dump_dir): + self.log.error( + "Second level(Timestamp level) directory structure is inconsistent. Please check the two directory.") + self.util.deal_with_dir_or_file_inconsistency(output_path) + return + my_ordered_subdirs = self.util.get_sorted_subdirectories_names(my_dump_dir) + golden_ordered_subdirs = self.util.get_sorted_subdirectories_names(golden_dump_dir) + for my_subdir_name, golden_subdir_name in zip(my_ordered_subdirs, golden_ordered_subdirs): + my_subdir_path = os.path.join(my_dump_dir, my_subdir_name) + golden_subdir_path = os.path.join(golden_dump_dir, golden_subdir_name) + self.compare_all_file_in_directory(my_subdir_path, golden_subdir_path, output_path) + + def compare_converted_dir(self, my_dump_dir, golden_dump_dir, output_dir): + if not self.util.is_subdir_count_equal(my_dump_dir, golden_dump_dir): + self.log.error( + "Top level(Opname level) directory structure is inconsistent. Please check the two directory.") + return + timestamp = int(time.time()) + output_file_name = f"batch_compare_{timestamp}.csv" + output_path = os.path.join(output_dir, output_file_name) + title_rows = [[ + "NPU File Name", + "Bench File Name", + "Mean", + "Bench Mean", + "Md5 Consistency", + "Max Abs Error", + "Max Relative Error" + ]] + self.util.write_csv(title_rows, output_path) + + my_ordered_subdirs = self.util.get_sorted_subdirectories_names(my_dump_dir) + golden_ordered_subdirs = self.util.get_sorted_subdirectories_names(golden_dump_dir) + for my_subdir_name, golden_subdir_name in zip(my_ordered_subdirs, golden_ordered_subdirs): + if not my_subdir_name == golden_subdir_name: + self.log.error( + "Top level(Opname level) directory structure is inconsistent. Please check the two directory.") + self.util.deal_with_dir_or_file_inconsistency(output_path) + return + my_subdir_path = os.path.join(my_dump_dir, my_subdir_name) + golden_subdir_path = os.path.join(golden_dump_dir, golden_subdir_name) + self.compare_timestamp_directory(my_subdir_path, golden_subdir_path, output_path) + self.util.change_filemode_safe(output_path) + self.log.info("Compare result is saved in : %s", output_path) + + def convert_api_dir_to_npy(self, dump_dir, param, output_dir, msaccucmp_path): + dump_dir = self.util.path_strip(dump_dir) + for root, dirs, files in os.walk(dump_dir): + for file in files: + file_path = os.path.join(root, file) + file_name = os.path.basename(file_path) + parts = file_name.split(".") + if len(parts) < 5: + continue + op_name = parts[1] + timestamp = parts[-1] + output_path = os.path.join(output_dir, op_name, timestamp) + self.convert_dump_to_npy(file_path, param, output_path, msaccucmp_path) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/config.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/config.py index 9cf479d64488497be8d0816f20cbc250b5ced831..97de8572cb74cf998bc1ffa2b58405eca2830184 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/config.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/config.py @@ -16,6 +16,7 @@ """ import os +import numpy as np class Const: @@ -27,6 +28,8 @@ class Const: DATA_ROOT_DIR = os.path.join(ROOT_DIR, 'parse_data') DUMP_CONVERT_DIR = os.path.join(DATA_ROOT_DIR, 'dump_convert') COMPARE_DIR = os.path.join(DATA_ROOT_DIR, 'compare_result') + BATCH_DUMP_CONVERT_DIR = os.path.join(DATA_ROOT_DIR, 'batch_dump_convert') + BATCH_COMPARE_DIR = os.path.join(DATA_ROOT_DIR, 'batch_compare_result') OFFLINE_DUMP_CONVERT_PATTERN = \ r"^([A-Za-z0-9_-]+)\.([A-Za-z0-9_-]+)\.([0-9]+)(\.[0-9]+)?\.([0-9]{1,255})" \ r"\.([a-z]+)\.([0-9]{1,255})(\.[x0-9]+)?\.npy$" @@ -38,6 +41,7 @@ class Const: FILE_PATTERN = r'^[a-zA-Z0-9_./-]+$' ONE_GB = 1 * 1024 * 1024 * 1024 TEN_GB = 10 * 1024 * 1024 * 1024 + FLOAT_TYPE = [np.half, np.single, float, np.double, np.float64, np.longdouble, np.float32, np.float16] HEADER = r""" ____ / __ \____ ______________ / /_/ / __ `/ ___/ ___/ _ \ diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/interactive_cli.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/interactive_cli.py index df1de148e21219361b39a9994c51df0e8fac77b5..7c407e791fc76f245821abc6f8c8dd0905f594b2 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/interactive_cli.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/interactive_cli.py @@ -15,6 +15,7 @@ # limitations under the License. """ import cmd +import argparse from .parse_tool import ParseTool from .utils import Util from .config import Const @@ -51,8 +52,39 @@ class InteractiveCli(cmd.Cmd): def do_run(self, line=""): self.util.execute_command(line) + @catch_exception def do_vc(self, line=""): - self.parse_tool.do_vector_compare(self._parse_argv(line)) + parser = argparse.ArgumentParser() + parser.add_argument( + "-m", "--my_dump_path", dest="my_dump_path", default=None, + help=" my dump path, the data compared with golden data", + required=True + ) + parser.add_argument( + "-g", "--golden_dump_path", dest="golden_dump_path", default=None, + help=" the golden dump data path", + required=True + ) + parser.add_argument( + "-out", "--output_path", dest="output_path", default=None, + help=" the output path", + required=False + ) + parser.add_argument( + "-cmp_path", "--msaccucmp_path", dest="msaccucmp_path", default=None, + help=" the msaccucmp.py file path", + required=False + ) + args = parser.parse_args(self._parse_argv(line)) + self.util.check_path_valid(args.my_dump_path) + self.util.check_path_valid(args.golden_dump_path) + self.util.check_files_in_path(args.my_dump_path) + self.util.check_files_in_path(args.golden_dump_path) + if self.util.dir_contains_only(args.my_dump_path, ".npy") and \ + self.util.dir_contains_only(args.golden_dump_path, ".npy"): + self.parse_tool.do_compare_converted_dir(args) + else: + self.parse_tool.do_vector_compare(args) def do_dc(self, line=""): self.parse_tool.do_convert_dump(self._parse_argv(line)) @@ -65,3 +97,7 @@ class InteractiveCli(cmd.Cmd): def do_cn(self, line=''): self.parse_tool.do_compare_data(self._parse_argv(line)) + + def do_cad(self, line=''): + self.parse_tool.do_convert_api_dir(self._parse_argv(line)) + diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_tool.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_tool.py index 2dbda307857e54bc01bca1804c3269fc355fa6b4..7e84f247076d094d9178a0e75d0430d7fb299d67 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_tool.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_tool.py @@ -35,39 +35,13 @@ class ParseTool: self.util.create_dir(Const.DATA_ROOT_DIR) @catch_exception - def do_vector_compare(self, argv=None): - parser = argparse.ArgumentParser() - parser.add_argument( - "-m", "--my_dump_path", dest="my_dump_path", default=None, - help=" my dump path, the data compared with golden data", - required=True - ) - parser.add_argument( - "-g", "--golden_dump_path", dest="golden_dump_path", default=None, - help=" the golden dump data path", - required=True - ) - parser.add_argument( - "-out", "--output_path", dest="output_path", default=None, - help=" the output path", - required=False - ) - parser.add_argument( - "-cmp_path", "--msaccucmp_path", dest="msaccucmp_path", default=None, - help=" the msaccucmp.py file path", - required=False - ) - args = parser.parse_args(argv) + def do_vector_compare(self, args): if not args.output_path: result_dir = os.path.join(Const.COMPARE_DIR) else: result_dir = args.output_path my_dump_path = args.my_dump_path golden_dump_path = args.golden_dump_path - self.util.check_path_valid(my_dump_path) - self.util.check_path_valid(golden_dump_path) - self.util.check_files_in_path(my_dump_path) - self.util.check_files_in_path(golden_dump_path) if not os.path.isdir(my_dump_path) or not os.path.isdir(golden_dump_path): self.util.log.error("Please enter a directory not a file") raise ParseException(ParseException.PARSE_INVALID_PATH_ERROR) @@ -140,3 +114,41 @@ class ParseTool: self.util.check_path_format(args.my_dump_path, Const.NPY_SUFFIX) self.util.check_path_format(args.golden_dump_path, Const.NPY_SUFFIX) self.compare.compare_data(args.my_dump_path, args.golden_dump_path, args.save, args.rtol, args.atol, args.count) + + @catch_exception + def do_compare_converted_dir(self, args): + """compare two dir""" + my_dump_dir = self.util.path_strip(args.my_dump_path) + golden_dump_dir = self.util.path_strip(args.golden_dump_path) + if my_dump_dir == golden_dump_dir: + self.util.log.error("My directory path and golden directory path is same. Please check parameter" + " '-m' and '-g'.") + raise ParseException("My directory path and golden directory path is same.") + output_path = self.util.path_strip(args.output_path) if args.output_path else Const.BATCH_COMPARE_DIR + if not os.path.isdir(output_path): + os.makedirs(output_path, mode=0o750) + self.compare.compare_converted_dir(my_dump_dir, golden_dump_dir, output_path) + + @catch_exception + def do_convert_api_dir(self, argv=None): + parser = argparse.ArgumentParser() + parser.add_argument( + "-m", "--my_dump_path", dest="my_dump_path", default=None, + help=" my dump path, the data need to convert to npy files.", + required=True + ) + parser.add_argument( + '-out', '--output_path', dest='output_path', required=False, default=None, help='output path') + parser.add_argument( + "-asc", "--msaccucmp_path", dest="msaccucmp_path", default=None, + help=" the msaccucmp.py file path", required=False) + args = parser.parse_args(argv) + self.util.check_path_valid(args.my_dump_path) + self.util.check_files_in_path(args.my_dump_path) + output_path = self.util.path_strip(args.output_path) if args.output_path else \ + os.path.join(Const.BATCH_DUMP_CONVERT_DIR, self.util.localtime_str()) + msaccucmp_path = self.util.path_strip( + args.msaccucmp_path) if args.msaccucmp_path else Const.MS_ACCU_CMP_PATH + self.util.check_path_valid(msaccucmp_path) + self.util.check_executable_file(msaccucmp_path) + self.compare.convert_api_dir_to_npy(args.my_dump_path, None, output_path, msaccucmp_path) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/utils.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/utils.py index 2d512224f7e5d70173e0f716d37864be6687708f..aa69a4780c96d942853696d4a4cc63f2447d393f 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/utils.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/utils.py @@ -16,9 +16,13 @@ """ import logging import os +import io import re import sys import subprocess +import hashlib +import csv +import time import numpy as np from .config import Const from .file_desc import DumpDecodeFileDesc, FileDesc @@ -26,6 +30,7 @@ from .parse_exception import ParseException from ...common.file_check_util import change_mode, check_other_user_writable,\ check_path_executable, check_path_owner_consistent from ...common.file_check_util import FileCheckConst +from ...common.file_check_util import FileOpen try: from rich.traceback import install @@ -33,6 +38,7 @@ try: from rich.table import Table from rich import print as rich_print from rich.columns import Columns + install() except ImportError as err: install = None @@ -40,7 +46,8 @@ except ImportError as err: Table = None Columns = None rich_print = None - print("[Warning] Failed to import rich, Some features may not be available. Please run 'pip install rich' to fix it.") + print( + "[Warning] Failed to import rich, Some features may not be available. Please run 'pip install rich' to fix it.") class Util: @@ -126,7 +133,7 @@ class Util: shape, dtype, max_data, min_data, mean = \ self.npy_info(source_data) return \ - '[Shape: %s] [Dtype: %s] [Max: %s] [Min: %s] [Mean: %s]' % (shape, dtype, max_data, min_data, mean) + '[Shape: %s] [Dtype: %s] [Max: %s] [Min: %s] [Mean: %s]' % (shape, dtype, max_data, min_data, mean) def save_npy_to_txt(self, data, dst_file='', align=0): if os.path.exists(dst_file): @@ -189,6 +196,7 @@ class Util: if path.endswith(Const.NPY_SUFFIX) and file_size > Const.TEN_GB: self.log.error('The file {} size is greater than 10GB.'.format(path)) raise ParseException(ParseException.PARSE_INVALID_PATH_ERROR) + return True def check_files_in_path(self, path): if os.path.isdir(path) and len(os.listdir(path)) == 0: @@ -251,3 +259,88 @@ class Util: if not re.match(Const.FILE_PATTERN, param): self.log.error('The parameter {} contains special characters.'.format(param)) raise ParseException(ParseException.PARSE_INVALID_PARAM_ERROR) + + def get_subdir_count(self, directory): + subdir_count = 0 + for root, dirs, files in os.walk(directory): + subdir_count += len(dirs) + break + return subdir_count + + def get_subfiles_count(self, directory): + file_count = 0 + for root, dirs, files in os.walk(directory): + file_count += len(files) + return file_count + + def is_subdir_count_equal(self, dir1, dir2): + dir1_count = self.get_subdir_count(dir1) + dir2_count = self.get_subdir_count(dir2) + return dir1_count == dir2_count + + def get_sorted_subdirectories_names(self, directory): + subdirectories = [] + for item in os.listdir(directory): + item_path = os.path.join(directory, item) + if os.path.isdir(item_path): + subdirectories.append(item) + return sorted(subdirectories) + + def get_sorted_files_names(self, directory): + files = [] + for item in os.listdir(directory): + item_path = os.path.join(directory, item) + if os.path.isfile(item_path): + files.append(item) + return sorted(files) + + def check_npy_files_valid_in_dir(self, dir_path): + for file_name in os.listdir(dir_path): + file_path = os.path.join(dir_path, file_name) + if not self.check_path_valid(file_path): + return False + _, file_extension = os.path.splitext(file_path) + if not file_extension == '.npy': + return False + return True + + def get_md5_for_numpy(self, obj): + np_bytes = obj.tobytes() + md5_hash = hashlib.md5(np_bytes) + return md5_hash.hexdigest() + + def write_csv(self, data, filepath): + with FileOpen(filepath, 'a') as f: + writer = csv.writer(f) + writer.writerows(data) + + def deal_with_dir_or_file_inconsistency(self, output_path): + if os.path.exists(output_path): + os.remove(output_path) + raise ParseException("Inconsistent directory structure or file.") + + def deal_with_value_if_has_zero(self, data): + if data.dtype in Const.FLOAT_TYPE: + zero_mask = (data == 0) + # 给0的地方加上eps防止除0 + data[zero_mask] += np.finfo(data.dtype).eps + else: + # int type + float eps 会报错,所以这里要强转 + data = data.astype(float) + zero_mask = (data == 0) + data[zero_mask] += np.finfo(float).eps + return data + + def dir_contains_only(self, path, endfix): + for root, dirs, files in os.walk(path): + for file in files: + if not file.endswith(endfix): + return False + return True + + def localtime_str(self): + return time.strftime("%Y%m%d%H%M%S", time.localtime()) + + def change_filemode_safe(self, path): + change_mode(path, FileCheckConst.DATA_FILE_AUTHORITY) +