From 8ee2a5c2d766585ed6bdfb80bde6e53d40c5e715 Mon Sep 17 00:00:00 2001 From: louyujing Date: Thu, 30 Nov 2023 11:39:31 +0000 Subject: [PATCH 01/11] update debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py. Signed-off-by: louyujing --- .../api_accuracy_checker/run_ut/run_ut.py | 131 +++++++++++++++--- 1 file changed, 112 insertions(+), 19 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index c8dee8760..f111a76b9 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -1,9 +1,9 @@ import argparse import os -import copy +import csv +import re import sys import time -import torch_npu import yaml import torch from tqdm import tqdm @@ -16,11 +16,15 @@ from api_accuracy_checker.hook_module.wrap_functional import FunctionalOPTemplat from api_accuracy_checker.hook_module.wrap_torch import TorchOPTemplate from api_accuracy_checker.run_ut.ut_api_info import UtAPIInfo from api_accuracy_checker.common.config import msCheckerConfig +from api_accuracy_checker.compare.compare_utils import CompareConst from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileOpen, FileCheckConst, FileChecker, \ change_mode, check_file_suffix, check_link -ut_error_data_dir = 'ut_error_data' +current_time = time.strftime("%Y%m%d%H%M%S") +UT_ERROR_DATA_DIR = 'ut_error_data' + current_time +RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" +DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv" def init_environment(): @@ -104,13 +108,14 @@ def generate_cpu_params(input_args, input_kwargs, need_backward): return cpu_args, cpu_kwargs -def run_ut(forward_file, backward_file, out_path, save_error_data): +def run_ut(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data, api_in_csv_num=-1, test_result_cnt=None): print_info_log("start UT test") - forward_content = get_json_contents(forward_file) - backward_content = get_json_contents(backward_file) api_setting_dict = get_json_contents("torch_ut_setting.json") - compare = Comparator(out_path) - for api_full_name, api_info_dict in tqdm(forward_content.items()): + is_continue_run_ut = True if api_in_csv_num != -1 else False + compare = Comparator(result_csv_path, details_csv_path, is_continue_run_ut, test_result_cnt) + for i, (api_full_name, api_info_dict) in enumerate(tqdm(forward_content.items())): + if i < api_in_csv_num: + continue try: data_info = run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_dict) is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, @@ -137,12 +142,12 @@ def do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) if not is_fwd_success or not is_bwd_success: api_full_name = api_full_name.replace("*", ".") for element in data_info.in_fwd_data_list: - UtAPIInfo(api_full_name + '.forward.input', element, ut_error_data_dir) - UtAPIInfo(api_full_name + '.forward.output.bench', data_info.bench_out, ut_error_data_dir) - UtAPIInfo(api_full_name + '.forward.output.npu', data_info.npu_out, ut_error_data_dir) - UtAPIInfo(api_full_name + '.backward.input', data_info.grad_in, ut_error_data_dir) - UtAPIInfo(api_full_name + '.backward.output.bench', data_info.bench_grad_out, ut_error_data_dir) - UtAPIInfo(api_full_name + '.backward.output.npu', data_info.npu_grad_out, ut_error_data_dir) + UtAPIInfo(api_full_name + '.forward.input', element, UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.forward.output.bench', data_info.bench_out, UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.forward.output.npu', data_info.npu_out, UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.backward.input', data_info.grad_in, UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.backward.output.bench', data_info.bench_grad_out, UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.backward.output.npu', data_info.npu_grad_out, UT_ERROR_DATA_DIR) def run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_dict): @@ -217,9 +222,83 @@ def initialize_save_error_data(): error_data_path_checker = FileChecker(msCheckerConfig.error_data_path, FileCheckConst.DIR, ability=FileCheckConst.WRITE_ABLE) error_data_path = error_data_path_checker.common_check() - global ut_error_data_dir - ut_error_data_dir = 'ut_error_data' + time.strftime("%Y%m%d%H%M%S") - initialize_save_path(error_data_path, ut_error_data_dir) + initialize_save_path(error_data_path, UT_ERROR_DATA_DIR) + + +def validate_continue_run_ut_required_files_and_folders(result_csv_path, forward_content, save_error_data): + result_csv_path_checker = FileChecker(result_csv_path, FileCheckConst.FILE, ability=FileCheckConst.READ_WRITE_ABLE, + file_type=FileCheckConst.CSV_SUFFIX) + result_csv_path = result_csv_path_checker.common_check() + result_csv_name = os.path.basename(result_csv_path) + pattern = r"^accuracy_checking_result_\d{14}\.csv$" + if not re.match(pattern, result_csv_name): + raise ValueError("When continue run ut, please do not modify the result csv name.") + details_csv_name = result_csv_name.replace('result', 'details') + details_csv_path = os.path.join(os.path.dirname(result_csv_path), details_csv_name) + details_csv_path_checker = FileChecker(details_csv_path, FileCheckConst.FILE, + ability=FileCheckConst.READ_WRITE_ABLE, file_type=FileCheckConst.CSV_SUFFIX) + details_csv_path = details_csv_path_checker.common_check() + if save_error_data: + time_info = result_csv_path.split('.')[0].split('_')[-1] + ut_error_data_dir_name = 'ut_error_data' + time_info + ut_error_data_dir_path = os.path.join(os.path.dirname(result_csv_path), ut_error_data_dir_name) + global UT_ERROR_DATA_DIR + UT_ERROR_DATA_DIR = ut_error_data_dir_path + initialize_save_error_data() + with open(result_csv_path, 'r') as file: + reader = csv.reader(file) + result_csv_rows = [row for row in reader] + if not result_csv_rows: + # If result csv is empty, details csv should also be empty + with open(details_csv_path, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow([]) + compare = Comparator(result_csv_path, details_csv_path, True) + compare.write_csv_title() + api_in_csv_num = len(result_csv_rows) - 1 if len(result_csv_rows) - 1 > 0 else 0 + if api_in_csv_num > 0: + if api_in_csv_num > len(forward_content): + raise ValueError( + "% data is abnormal, the number of rows is greater than the number of rows in forward_info json", + result_csv_name) + result_csv_api_list = [] + forward_json_api_list = [] + for item in result_csv_rows[1:]: + if not item: + raise ValueError("% data is abnormal, the API name has a null value", result_csv_name) + result_csv_api_list.append(item[0]) + for item in list(forward_content.items())[:api_in_csv_num]: + if not item: + raise ValueError("forward_info json data is abnormal, the API name has a null value") + forward_json_api_list.append(item[0]) + if result_csv_api_list != forward_json_api_list: + raise ValueError("The saved api data in % is not from forward_info json", result_csv_name) + test_result_cnt = get_statistics_from_result_csv(result_csv_rows[1:], result_csv_name) + return result_csv_path, details_csv_path, api_in_csv_num, test_result_cnt + + +def get_statistics_from_result_csv(result_csv_rows: list, result_csv_name: str): + test_result_cnt = { + "forward_fail_num": 0, "backward_fail_num": 0, "forward_and_backward_fail_num": 0, "success_num": 0, + "total_num": 0, "forward_or_backward_fail_num": 0 + } + for item in result_csv_rows: + if not isinstance(item, list) or len(item) < 3: + raise ValueError("The number of columns in % is incorrect", result_csv_name) + if item[1] not in ['True', 'False', CompareConst.NA] or item[2] not in ['True', 'False', CompareConst.NA]: + raise ValueError("The value in the 2nd or 3rd column of % is wrong, it must be TRUE, FALSE or N/A", + result_csv_name) + if item[1] == 'True' and item[2] in ['True', 'N/A']: + test_result_cnt['success_num'] += 1 + elif item[1] == 'False' and item[2] == 'False': + test_result_cnt['forward_and_backward_fail_num'] += 1 + elif item[1] == 'False': + test_result_cnt['forward_fail_num'] += 1 + test_result_cnt['forward_or_backward_fail_num'] += 1 + else: + test_result_cnt['backward_fail_num'] += 1 + test_result_cnt['forward_or_backward_fail_num'] += 1 + return test_result_cnt def _run_ut_parser(parser): @@ -240,6 +319,10 @@ def _run_ut_parser(parser): help=" whether to turn on jit compile", required=False) parser.add_argument("-d", "--device", dest="device_id", type=int, help=" set NPU device id to run ut", default=0, required=False) + parser.add_argument("-c", "--continue_run_ut", dest="continue_run_ut", default="", type=str, + help=" The path of accuracy_checking_result.csv, when run ut is interrupted, " + "enter the file path to continue run ut.", + required=False) def _run_ut(): @@ -263,9 +346,19 @@ def _run_ut(): out_path_checker = FileChecker(out_path, FileCheckConst.DIR, ability=FileCheckConst.WRITE_ABLE) out_path = out_path_checker.common_check() save_error_data = args.save_error_data - if save_error_data: + forward_content = get_json_contents(forward_file) + backward_content = get_json_contents(backward_file) + result_csv_path = os.path.join(out_path, RESULT_FILE_NAME) + details_csv_path = os.path.join(out_path, DETAILS_FILE_NAME) + if save_error_data and not args.continue_run_ut: initialize_save_error_data() - run_ut(forward_file, backward_file, out_path, save_error_data) + api_in_csv_num = -1 + test_result_cnt = None + if args.continue_run_ut: + result_csv_path, details_csv_path, api_in_csv_num, test_result_cnt = \ + validate_continue_run_ut_required_files_and_folders(args.continue_run_ut, forward_content, save_error_data) + run_ut(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data, api_in_csv_num, + test_result_cnt) class UtDataInfo: -- Gitee From 6ca24bddb44d2bb13537bcdde1febd203b614fcb Mon Sep 17 00:00:00 2001 From: louyujing Date: Thu, 30 Nov 2023 11:41:04 +0000 Subject: [PATCH 02/11] update debug/accuracy_tools/api_accuracy_checker/compare/compare.py. Signed-off-by: louyujing --- .../api_accuracy_checker/compare/compare.py | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index dcbb24b1a..b95ec8fa3 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -1,6 +1,5 @@ # 进行比对及结果展示 import os -import time from rich.table import Table from rich.console import Console from api_accuracy_checker.compare.algorithm import compare_core @@ -10,22 +9,22 @@ from api_accuracy_checker.common.config import msCheckerConfig class Comparator: - TEST_FILE_NAME = "accuracy_checking_result_" + time.strftime("%Y%m%d%H%M%S") + ".csv" - DETAIL_TEST_FILE_NAME = "accuracy_checking_details_" + time.strftime("%Y%m%d%H%M%S") + ".csv" - # consts for result csv COLUMN_API_NAME = "API name" COLUMN_FORWARD_SUCCESS = "Forward Test Success" COLUMN_BACKWARD_SUCCESS = "Backward Test Success" COLUMN_STACK_INFO = "Traceback callstack info" - def __init__(self, result_save_path, stack_info_json_path=None): - self.save_path = os.path.join(result_save_path, self.TEST_FILE_NAME) - if os.path.exists(self.save_path): - raise ValueError(f"file {self.save_path} already exists, please remove it first or use a new dump path") - self.detail_save_path = os.path.join(result_save_path, self.DETAIL_TEST_FILE_NAME) - if os.path.exists(self.detail_save_path): - raise ValueError(f"file {self.detail_save_path} already exists, please remove it first or use a new dump path") + def __init__(self, result_csv_path, details_csv_path, is_continue_run_ut, test_result_cnt=None, stack_info_json_path=None): + self.save_path = result_csv_path + self.detail_save_path = details_csv_path + if not is_continue_run_ut: + if os.path.exists(self.save_path): + raise ValueError(f"file {self.save_path} already exists, please remove it first or use a new dump path") + if os.path.exists(self.detail_save_path): + raise ValueError( + f"file {self.detail_save_path} already exists, please remove it first or use a new dump path") + self.write_csv_title() if stack_info_json_path: self.stack_info = get_json_contents(stack_info_json_path) else: @@ -34,9 +33,7 @@ class Comparator: self.test_result_cnt = { "forward_fail_num": 0, "backward_fail_num": 0, "forward_and_backward_fail_num": 0, "success_num": 0, "total_num": 0, "forward_or_backward_fail_num": 0 - } - self.result_save_path = result_save_path - self.write_csv_title() + } if not test_result_cnt else test_result_cnt def print_pretest_result(self): if self.test_result_cnt.get("total_num") != 0: -- Gitee From f4bd02007b38674d57d57fd7280417db48e100ad Mon Sep 17 00:00:00 2001 From: louyujing Date: Fri, 1 Dec 2023 02:41:20 +0000 Subject: [PATCH 03/11] update debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py. Signed-off-by: louyujing --- .../accuracy_tools/api_accuracy_checker/run_ut/run_ut.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index f111a76b9..b51fecd2d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -6,6 +6,7 @@ import sys import time import yaml import torch +import torch_npu from tqdm import tqdm from api_accuracy_checker.run_ut.data_generate import gen_api_params, gen_args from api_accuracy_checker.common.utils import print_info_log, print_warn_log, get_json_contents, api_info_preprocess, \ @@ -245,14 +246,13 @@ def validate_continue_run_ut_required_files_and_folders(result_csv_path, forward global UT_ERROR_DATA_DIR UT_ERROR_DATA_DIR = ut_error_data_dir_path initialize_save_error_data() - with open(result_csv_path, 'r') as file: + with FileOpen(result_csv_path, 'r') as file: reader = csv.reader(file) result_csv_rows = [row for row in reader] if not result_csv_rows: # If result csv is empty, details csv should also be empty - with open(details_csv_path, 'w', newline='') as file: - writer = csv.writer(file) - writer.writerow([]) + with FileOpen(details_csv_path, 'w'): + pass compare = Comparator(result_csv_path, details_csv_path, True) compare.write_csv_title() api_in_csv_num = len(result_csv_rows) - 1 if len(result_csv_rows) - 1 > 0 else 0 -- Gitee From dfadf19d7bbea786530102f93345cecd427887f2 Mon Sep 17 00:00:00 2001 From: louyujing Date: Fri, 1 Dec 2023 07:14:49 +0000 Subject: [PATCH 04/11] update debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py. Signed-off-by: louyujing --- .../api_accuracy_checker/run_ut/run_ut.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index b51fecd2d..396dc132d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -26,6 +26,8 @@ current_time = time.strftime("%Y%m%d%H%M%S") UT_ERROR_DATA_DIR = 'ut_error_data' + current_time RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv" +api_in_csv_num = -1 +test_result_cnt = None def init_environment(): @@ -109,7 +111,7 @@ def generate_cpu_params(input_args, input_kwargs, need_backward): return cpu_args, cpu_kwargs -def run_ut(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data, api_in_csv_num=-1, test_result_cnt=None): +def run_ut(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data): print_info_log("start UT test") api_setting_dict = get_json_contents("torch_ut_setting.json") is_continue_run_ut = True if api_in_csv_num != -1 else False @@ -255,6 +257,7 @@ def validate_continue_run_ut_required_files_and_folders(result_csv_path, forward pass compare = Comparator(result_csv_path, details_csv_path, True) compare.write_csv_title() + global api_in_csv_num api_in_csv_num = len(result_csv_rows) - 1 if len(result_csv_rows) - 1 > 0 else 0 if api_in_csv_num > 0: if api_in_csv_num > len(forward_content): @@ -273,11 +276,12 @@ def validate_continue_run_ut_required_files_and_folders(result_csv_path, forward forward_json_api_list.append(item[0]) if result_csv_api_list != forward_json_api_list: raise ValueError("The saved api data in % is not from forward_info json", result_csv_name) - test_result_cnt = get_statistics_from_result_csv(result_csv_rows[1:], result_csv_name) - return result_csv_path, details_csv_path, api_in_csv_num, test_result_cnt + get_statistics_from_result_csv(result_csv_rows[1:], result_csv_name) + return result_csv_path, details_csv_path def get_statistics_from_result_csv(result_csv_rows: list, result_csv_name: str): + global test_result_cnt test_result_cnt = { "forward_fail_num": 0, "backward_fail_num": 0, "forward_and_backward_fail_num": 0, "success_num": 0, "total_num": 0, "forward_or_backward_fail_num": 0 @@ -352,13 +356,11 @@ def _run_ut(): details_csv_path = os.path.join(out_path, DETAILS_FILE_NAME) if save_error_data and not args.continue_run_ut: initialize_save_error_data() - api_in_csv_num = -1 - test_result_cnt = None - if args.continue_run_ut: - result_csv_path, details_csv_path, api_in_csv_num, test_result_cnt = \ - validate_continue_run_ut_required_files_and_folders(args.continue_run_ut, forward_content, save_error_data) - run_ut(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data, api_in_csv_num, - test_result_cnt) + if args.continue_run_ut: + result_csv_path, details_csv_path = validate_continue_run_ut_required_files_and_folders(args.continue_run_ut, + forward_content, + save_error_data) + run_ut(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data) class UtDataInfo: -- Gitee From c2c06faf72b4ee2cf2b8e5d3c10a1f269bfec703 Mon Sep 17 00:00:00 2001 From: louyujing Date: Mon, 4 Dec 2023 02:25:33 +0000 Subject: [PATCH 05/11] update debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py. Signed-off-by: louyujing --- .../api_accuracy_checker/run_ut/run_ut.py | 51 ++++++++++++------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 396dc132d..9f473c04f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -228,7 +228,7 @@ def initialize_save_error_data(): initialize_save_path(error_data_path, UT_ERROR_DATA_DIR) -def validate_continue_run_ut_required_files_and_folders(result_csv_path, forward_content, save_error_data): +def validate_result_csv_path(result_csv_path): result_csv_path_checker = FileChecker(result_csv_path, FileCheckConst.FILE, ability=FileCheckConst.READ_WRITE_ABLE, file_type=FileCheckConst.CSV_SUFFIX) result_csv_path = result_csv_path_checker.common_check() @@ -236,27 +236,24 @@ def validate_continue_run_ut_required_files_and_folders(result_csv_path, forward pattern = r"^accuracy_checking_result_\d{14}\.csv$" if not re.match(pattern, result_csv_name): raise ValueError("When continue run ut, please do not modify the result csv name.") + return result_csv_path, result_csv_name + + +def validate_details_csv_path_by_validated_result_csv_path(validated_result_csv_path): + result_csv_name = os.path.basename(validated_result_csv_path) details_csv_name = result_csv_name.replace('result', 'details') - details_csv_path = os.path.join(os.path.dirname(result_csv_path), details_csv_name) + details_csv_path = os.path.join(os.path.dirname(validated_result_csv_path), details_csv_name) details_csv_path_checker = FileChecker(details_csv_path, FileCheckConst.FILE, ability=FileCheckConst.READ_WRITE_ABLE, file_type=FileCheckConst.CSV_SUFFIX) details_csv_path = details_csv_path_checker.common_check() - if save_error_data: - time_info = result_csv_path.split('.')[0].split('_')[-1] - ut_error_data_dir_name = 'ut_error_data' + time_info - ut_error_data_dir_path = os.path.join(os.path.dirname(result_csv_path), ut_error_data_dir_name) - global UT_ERROR_DATA_DIR - UT_ERROR_DATA_DIR = ut_error_data_dir_path - initialize_save_error_data() - with FileOpen(result_csv_path, 'r') as file: + return details_csv_path + + +def validate_result_csv_content_by_forward_json_content(validated_result_csv_path, forward_content): + result_csv_name = os.path.basename(validated_result_csv_path) + with FileOpen(validated_result_csv_path, 'r') as file: reader = csv.reader(file) result_csv_rows = [row for row in reader] - if not result_csv_rows: - # If result csv is empty, details csv should also be empty - with FileOpen(details_csv_path, 'w'): - pass - compare = Comparator(result_csv_path, details_csv_path, True) - compare.write_csv_title() global api_in_csv_num api_in_csv_num = len(result_csv_rows) - 1 if len(result_csv_rows) - 1 > 0 else 0 if api_in_csv_num > 0: @@ -276,6 +273,26 @@ def validate_continue_run_ut_required_files_and_folders(result_csv_path, forward forward_json_api_list.append(item[0]) if result_csv_api_list != forward_json_api_list: raise ValueError("The saved api data in % is not from forward_info json", result_csv_name) + return result_csv_rows + + +def validate_continue_run_ut_required_files_and_folders(result_csv_path, forward_content, save_error_data): + result_csv_path, result_csv_name = validate_result_csv_path(result_csv_path) + details_csv_path = validate_details_csv_path_by_validated_result_csv_path(result_csv_path) + if save_error_data: + time_info = result_csv_path.split('.')[0].split('_')[-1] + ut_error_data_dir_name = 'ut_error_data' + time_info + ut_error_data_dir_path = os.path.join(os.path.dirname(result_csv_path), ut_error_data_dir_name) + global UT_ERROR_DATA_DIR + UT_ERROR_DATA_DIR = ut_error_data_dir_path + initialize_save_error_data() + result_csv_rows = validate_result_csv_content_by_forward_json_content(result_csv_path, forward_content) + if not result_csv_rows: + # If result csv is empty, details csv should also be empty + with FileOpen(details_csv_path, 'w'): + pass + compare = Comparator(result_csv_path, details_csv_path, True) + compare.write_csv_title() get_statistics_from_result_csv(result_csv_rows[1:], result_csv_name) return result_csv_path, details_csv_path @@ -356,7 +373,7 @@ def _run_ut(): details_csv_path = os.path.join(out_path, DETAILS_FILE_NAME) if save_error_data and not args.continue_run_ut: initialize_save_error_data() - if args.continue_run_ut: + if args.continue_run_ut: result_csv_path, details_csv_path = validate_continue_run_ut_required_files_and_folders(args.continue_run_ut, forward_content, save_error_data) -- Gitee From a1320abde1f871c60a2b0aa22e38a7e7c953085d Mon Sep 17 00:00:00 2001 From: louyujing Date: Mon, 4 Dec 2023 06:52:50 +0000 Subject: [PATCH 06/11] update debug/accuracy_tools/api_accuracy_checker/README.md. Signed-off-by: louyujing --- .../api_accuracy_checker/README.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/README.md b/debug/accuracy_tools/api_accuracy_checker/README.md index 7a5bc6113..779f2f74d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/README.md +++ b/debug/accuracy_tools/api_accuracy_checker/README.md @@ -92,15 +92,23 @@ Ascend模型精度预检工具能在昇腾NPU上扫描用户训练模型中所 | -o或--out_path | 指指定run_ut执行结果存盘路径,默认“./”(相对于run_ut的路径)。 | 否 | | -j或--jit_compile | 开启jit编译。 | 否 | | -d或--device | 指定Device ID,选择UT代码运行所在的卡,默认值为0。 | 否 | + | -c或--continue_run_ut | 指定本次运行中断时生成的accuracy_checking_result_{timestamp}.csv文件路径,执行run_ut中断时,若想从中断处继续执行,配置此参数即可。 | 否 | - run_ut执行结果包括accuracy_checking_result.csv和accuracy_checking_details.csv两个文件。accuracy_checking_result.csv是API粒度的,标明每个API是否通过测试。建议用户先查看accuracy_checking_result.csv文件,对于其中没有通过测试的或者特定感兴趣的API,根据其API name字段在accuracy_checking_details.csv中查询其各个输出的达标情况以及比较指标。API达标情况介绍请参考“**API预检指标**”。 + run_ut执行结果包括accuracy_checking_result_{timestamp}.csv和accuracy_checking_details_{timestamp}.csv两个文件。accuracy_checking_result_{timestamp}.csv是API粒度的,标明每个API是否通过测试。建议用户先查看accuracy_checking_result_{timestamp}.csv文件,对于其中没有通过测试的或者特定感兴趣的API,根据其API name字段在accuracy_checking_details_{timestamp}.csv中查询其各个输出的达标情况以及比较指标。API达标情况介绍请参考“**API预检指标**”。 4. 如果需要保存比对不达标的输入和输出数据,可以在run_ut执行命令结尾添加-save_error_data,例如: ```bash python run_ut.py -forward ./forward_info_0.json -backward ./backward_info_0.json -save_error_data ``` - 数据默认会存盘到'./ut_error_data'路径下(相对于启动run_ut的路径),有需要的话,用户可以通过msCheckerConfig.update_config来配置保存路径,参数为error_data_path + 数据默认会存盘到'./ut_error_data{timestamp}'路径下(相对于启动run_ut的路径),有需要的话,用户可以通过msCheckerConfig.update_config来配置保存路径,参数为error_data_path。 + +5. 如果本次run_ut运行中断,需要从中断处继续执行,可以在run_ut执行命令结尾配置-c,例如: + + ```bash + python run_ut.py -forward ./forward_info_0.json -backward ./backward_info_0.json -c ./accuracy_checking_result_20231203211324.csv + ``` + run_ut将会从中断处继续执行,执行结果将追加写入到-c配置的accuracy_checking_result_20231203211324.csv以及相同时间戳后缀的accuracy_checking_details_20231203211324.csv中,若配置了-save_error_data,error_data将会保存到相同时间戳后缀的ut_error_data20231203211324文件夹中。 ## API预检白名单 @@ -110,7 +118,7 @@ support_wrap_ops.yaml文件当前记录所有PyTorch API名称,可以直接编 ## API预检指标 -API预检通过测试,则在accuracy_checking_details.csv文件中的“pass”列标记“pass”,否则标记“error”或“warning”,详细规则如下: +API预检通过测试,则在accuracy_checking_details_{timestamp}.csv文件中的“pass”列标记“pass”,否则标记“error”或“warning”,详细规则如下: 1. 余弦相似度 > 0.99:≤ 0.99为不达标,标记“error”,> 0.99达标,进行下一步; 2. 最大绝对误差 < 0.001:< 0.001达标,标记“pass”,≥ 0.001为不达标,进行下一步; @@ -118,7 +126,7 @@ API预检通过测试,则在accuracy_checking_details.csv文件中的“pass - 对于float16和bfloat16数据:双百指标不通过,标记“error”;双百指标通过,双千指标不通过,标记“warning”;双百、双千指标均通过,标记“pass”。 - 对于float32和float64数据:双千指标不通过,标记“error”;双千指标通过,双万指标不通过,标记“warning”;双千、双万指标均通过,标记“pass”。 -4. 在accuracy_checking_result.csv中以“Forward Test Success”和“Backward Test Success”字段统计该算子前向反向输出的测试结果,对于标记“pass”的算子,则在accuracy_checking_result.csv中标记“TRUE”表示测试通过,对于标记“error”或“warning”的算子,则在accuracy_checking_result.csv中标记“FALSE”表示测试不通过。由于一个算子可能有多个前向或反向的输入或输出,那么该类算子的输入或输出中必须全为“pass”,才能在accuracy_checking_result.csv中标记“TRUE”,只要有一个输入或输出标记“error”或“warning”,那么在accuracy_checking_result.csv中标记“FALSE”。 +4. 在accuracy_checking_result_{timestamp}.csv中以“Forward Test Success”和“Backward Test Success”字段统计该算子前向反向输出的测试结果,对于标记“pass”的算子,则在accuracy_checking_result_{timestamp}.csv中标记“TRUE”表示测试通过,对于标记“error”或“warning”的算子,则在accuracy_checking_result_{timestamp}.csv中标记“FALSE”表示测试不通过。由于一个算子可能有多个前向或反向的输入或输出,那么该类算子的输入或输出中必须全为“pass”,才能在accuracy_checking_result_{timestamp}.csv中标记“TRUE”,只要有一个输入或输出标记“error”或“warning”,那么在accuracy_checking_result_{timestamp}.csv中标记“FALSE”。 双百、双千、双万精度指标是指NPU的Tensor中的元素逐个与对应的标杆数据对比,相对误差大于百分之一、千分之一、万分之一的比例占总元素个数的比例小于百分之一、千分之一、万分之一。 -- Gitee From 329fcf46463e3d5d9fe662576a7e702ec459ea7a Mon Sep 17 00:00:00 2001 From: louyujing Date: Mon, 4 Dec 2023 07:51:05 +0000 Subject: [PATCH 07/11] update debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py. Signed-off-by: louyujing --- .../api_accuracy_checker/run_ut/run_ut.py | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index c1c6471e2..1fdd8e25a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -270,20 +270,20 @@ def validate_result_csv_content_by_forward_json_content(validated_result_csv_pat if api_in_csv_num > 0: if api_in_csv_num > len(forward_content): raise ValueError( - "% data is abnormal, the number of rows is greater than the number of rows in forward_info json", - result_csv_name) + "%s data is abnormal, the number of rows is greater than the number of rows in forward_info json" + % result_csv_name) result_csv_api_list = [] forward_json_api_list = [] for item in result_csv_rows[1:]: if not item: - raise ValueError("% data is abnormal, the API name has a null value", result_csv_name) + raise ValueError("%s data is abnormal, the API name has a null value" % result_csv_name) result_csv_api_list.append(item[0]) for item in list(forward_content.items())[:api_in_csv_num]: if not item: raise ValueError("forward_info json data is abnormal, the API name has a null value") forward_json_api_list.append(item[0]) if result_csv_api_list != forward_json_api_list: - raise ValueError("The saved api data in % is not from forward_info json", result_csv_name) + raise ValueError("The saved api data in %s is not from forward_info json" % result_csv_name) return result_csv_rows @@ -316,10 +316,13 @@ def get_statistics_from_result_csv(result_csv_rows: list, result_csv_name: str): } for item in result_csv_rows: if not isinstance(item, list) or len(item) < 3: - raise ValueError("The number of columns in % is incorrect", result_csv_name) - if item[1] not in ['True', 'False', CompareConst.NA] or item[2] not in ['True', 'False', CompareConst.NA]: - raise ValueError("The value in the 2nd or 3rd column of % is wrong, it must be TRUE, FALSE or N/A", - result_csv_name) + raise ValueError("The number of columns in %s is incorrect" % result_csv_name) + if item[1] not in ['True', 'False', CompareConst.NA, 'SKIP'] \ + or item[2] not in ['True', 'False', CompareConst.NA, 'SKIP']: + raise ValueError("The value in the 2nd or 3rd column of %s is wrong, it must be TRUE, FALSE or N/A" + % result_csv_name) + if item[1] == 'SKIP': + continue if item[1] == 'True' and item[2] in ['True', 'N/A']: test_result_cnt['success_num'] += 1 elif item[1] == 'False' and item[2] == 'False': @@ -352,21 +355,21 @@ def _run_ut_parser(parser): parser.add_argument("-d", "--device", dest="device_id", type=int, help=" set device id to run ut", default=0, required=False) parser.add_argument("-c", "--continue_run_ut", dest="continue_run_ut", default="", type=str, - help=" The path of accuracy_checking_result.csv, when run ut is interrupted, " - "enter the file path to continue run ut.", + help=" The path of accuracy_checking_result_{timestamp}.csv, " + "when run ut is interrupted, enter the file path to continue run ut.", required=False) def _run_ut(): parser = argparse.ArgumentParser() _run_ut_parser(parser) - args = parser.parse_args(sys.argv[1:]) + args = parser.parse_args(sys.argv[1:]) if not is_gpu: torch.npu.set_compile_mode(jit_compile=args.jit_compile) used_device = current_device + ":" + str(args.device_id) try: if is_gpu: - torch.cuda.set_device(used_device) + torch.cuda.set_device(used_device) else: torch.npu.set_device(used_device) except Exception as error: -- Gitee From 085da798fe74dddd19ca05447607812f82a5c380 Mon Sep 17 00:00:00 2001 From: louyujing Date: Mon, 4 Dec 2023 09:17:18 +0000 Subject: [PATCH 08/11] update debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py. Signed-off-by: louyujing --- .../api_accuracy_checker/run_ut/run_ut.py | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 1fdd8e25a..4b64dc699 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -239,32 +239,39 @@ def initialize_save_error_data(): initialize_save_path(error_data_path, UT_ERROR_DATA_DIR) -def validate_result_csv_path(result_csv_path): +def get_validated_result_csv_path(result_csv_path): result_csv_path_checker = FileChecker(result_csv_path, FileCheckConst.FILE, ability=FileCheckConst.READ_WRITE_ABLE, file_type=FileCheckConst.CSV_SUFFIX) - result_csv_path = result_csv_path_checker.common_check() - result_csv_name = os.path.basename(result_csv_path) + validated_result_csv_path = result_csv_path_checker.common_check() + result_csv_name = os.path.basename(validated_result_csv_path) pattern = r"^accuracy_checking_result_\d{14}\.csv$" if not re.match(pattern, result_csv_name): raise ValueError("When continue run ut, please do not modify the result csv name.") - return result_csv_path, result_csv_name + return validated_result_csv_path -def validate_details_csv_path_by_validated_result_csv_path(validated_result_csv_path): +def get_validated_details_csv_path(validated_result_csv_path): result_csv_name = os.path.basename(validated_result_csv_path) details_csv_name = result_csv_name.replace('result', 'details') details_csv_path = os.path.join(os.path.dirname(validated_result_csv_path), details_csv_name) details_csv_path_checker = FileChecker(details_csv_path, FileCheckConst.FILE, ability=FileCheckConst.READ_WRITE_ABLE, file_type=FileCheckConst.CSV_SUFFIX) - details_csv_path = details_csv_path_checker.common_check() - return details_csv_path + validated_details_csv_path = details_csv_path_checker.common_check() + return validated_details_csv_path -def validate_result_csv_content_by_forward_json_content(validated_result_csv_path, forward_content): +def validate_csv_content_by_forward_json_content(validated_result_csv_path, validated_details_csv_path, + forward_content): result_csv_name = os.path.basename(validated_result_csv_path) with FileOpen(validated_result_csv_path, 'r') as file: reader = csv.reader(file) result_csv_rows = [row for row in reader] + if not result_csv_rows: + # If result csv is empty, details csv should also be empty + with FileOpen(validated_details_csv_path, 'w'): + pass + compare = Comparator(validated_result_csv_path, validated_details_csv_path, True) + compare.write_csv_title() global api_in_csv_num api_in_csv_num = len(result_csv_rows) - 1 if len(result_csv_rows) - 1 > 0 else 0 if api_in_csv_num > 0: @@ -284,37 +291,35 @@ def validate_result_csv_content_by_forward_json_content(validated_result_csv_pat forward_json_api_list.append(item[0]) if result_csv_api_list != forward_json_api_list: raise ValueError("The saved api data in %s is not from forward_info json" % result_csv_name) - return result_csv_rows def validate_continue_run_ut_required_files_and_folders(result_csv_path, forward_content, save_error_data): - result_csv_path, result_csv_name = validate_result_csv_path(result_csv_path) - details_csv_path = validate_details_csv_path_by_validated_result_csv_path(result_csv_path) + validated_result_csv_path = get_validated_result_csv_path(result_csv_path) + validated_details_csv_path = get_validated_details_csv_path(validated_result_csv_path) if save_error_data: - time_info = result_csv_path.split('.')[0].split('_')[-1] + time_info = validated_result_csv_path.split('.')[0].split('_')[-1] ut_error_data_dir_name = 'ut_error_data' + time_info - ut_error_data_dir_path = os.path.join(os.path.dirname(result_csv_path), ut_error_data_dir_name) + ut_error_data_dir_path = os.path.join(os.path.dirname(validated_result_csv_path), ut_error_data_dir_name) global UT_ERROR_DATA_DIR UT_ERROR_DATA_DIR = ut_error_data_dir_path initialize_save_error_data() - result_csv_rows = validate_result_csv_content_by_forward_json_content(result_csv_path, forward_content) - if not result_csv_rows: - # If result csv is empty, details csv should also be empty - with FileOpen(details_csv_path, 'w'): - pass - compare = Comparator(result_csv_path, details_csv_path, True) - compare.write_csv_title() - get_statistics_from_result_csv(result_csv_rows[1:], result_csv_name) - return result_csv_path, details_csv_path + validate_csv_content_by_forward_json_content(validated_result_csv_path, validated_details_csv_path, + forward_content) + get_statistics_from_result_csv(validated_result_csv_path) + return validated_result_csv_path, validated_details_csv_path -def get_statistics_from_result_csv(result_csv_rows: list, result_csv_name: str): +def get_statistics_from_result_csv(validated_result_csv_path): global test_result_cnt test_result_cnt = { "forward_fail_num": 0, "backward_fail_num": 0, "forward_and_backward_fail_num": 0, "success_num": 0, "total_num": 0, "forward_or_backward_fail_num": 0 } - for item in result_csv_rows: + with FileOpen(validated_result_csv_path, 'r') as file: + reader = csv.reader(file) + result_csv_rows = [row for row in reader] + result_csv_name = os.path.basename(validated_result_csv_path) + for item in result_csv_rows[1:]: if not isinstance(item, list) or len(item) < 3: raise ValueError("The number of columns in %s is incorrect" % result_csv_name) if item[1] not in ['True', 'False', CompareConst.NA, 'SKIP'] \ @@ -333,7 +338,6 @@ def get_statistics_from_result_csv(result_csv_rows: list, result_csv_name: str): else: test_result_cnt['backward_fail_num'] += 1 test_result_cnt['forward_or_backward_fail_num'] += 1 - return test_result_cnt def _run_ut_parser(parser): -- Gitee From 0d22e78f12bfa69d58056528b4465a38a1688b04 Mon Sep 17 00:00:00 2001 From: louyujing Date: Wed, 6 Dec 2023 09:37:08 +0000 Subject: [PATCH 09/11] update debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py. Signed-off-by: louyujing --- .../api_accuracy_checker/run_ut/run_ut.py | 98 ++++++------------- 1 file changed, 32 insertions(+), 66 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 4b64dc699..292d104e0 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -15,6 +15,7 @@ else: import yaml import torch from tqdm import tqdm +from collections import namedtuple from api_accuracy_checker.run_ut.data_generate import gen_api_params, gen_args from api_accuracy_checker.common.utils import print_info_log, print_warn_log, get_json_contents, api_info_preprocess, \ print_error_log, check_file_or_directory_path, initialize_save_path, Const @@ -33,8 +34,8 @@ current_time = time.strftime("%Y%m%d%H%M%S") UT_ERROR_DATA_DIR = 'ut_error_data' + current_time RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv" -api_in_csv_num = -1 -test_result_cnt = None +RunUTConfig = namedtuple('RunUTConfig', ['forward_content', 'backward_content', 'result_csv_path', 'details_csv_path', + 'save_error_data', 'is_continue_run_ut', 'test_result_cnt']) def init_environment(): @@ -122,22 +123,26 @@ def generate_cpu_params(input_args, input_kwargs, need_backward): return cpu_args, cpu_kwargs -def run_ut(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data): +def run_ut(config): print_info_log("start UT test") api_setting_dict = get_json_contents("torch_ut_setting.json") - is_continue_run_ut = True if api_in_csv_num != -1 else False - compare = Comparator(result_csv_path, details_csv_path, is_continue_run_ut, test_result_cnt) - for i, (api_full_name, api_info_dict) in enumerate(tqdm(forward_content.items())): - if i < api_in_csv_num: + compare = Comparator(config.result_csv_path, config.details_csv_path, config.is_continue_run_ut, + config.test_result_cnt) + with FileOpen(config.result_csv_path, 'r') as file: + csv_reader = csv.reader(file) + next(csv_reader) + api_name_set = {row[0] for row in csv_reader} + for i, (api_full_name, api_info_dict) in enumerate(tqdm(config.forward_content.items())): + if api_full_name in api_name_set: continue try: - data_info = run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_dict) + data_info = run_torch_api(api_full_name, api_setting_dict, config.backward_content, api_info_dict) is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info.bench_out, data_info.device_out, data_info.bench_grad_out, data_info.device_grad_out) - if save_error_data: + if config.save_error_data: do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) except Exception as err: [_, api_name, _] = api_full_name.split("*") @@ -260,57 +265,7 @@ def get_validated_details_csv_path(validated_result_csv_path): return validated_details_csv_path -def validate_csv_content_by_forward_json_content(validated_result_csv_path, validated_details_csv_path, - forward_content): - result_csv_name = os.path.basename(validated_result_csv_path) - with FileOpen(validated_result_csv_path, 'r') as file: - reader = csv.reader(file) - result_csv_rows = [row for row in reader] - if not result_csv_rows: - # If result csv is empty, details csv should also be empty - with FileOpen(validated_details_csv_path, 'w'): - pass - compare = Comparator(validated_result_csv_path, validated_details_csv_path, True) - compare.write_csv_title() - global api_in_csv_num - api_in_csv_num = len(result_csv_rows) - 1 if len(result_csv_rows) - 1 > 0 else 0 - if api_in_csv_num > 0: - if api_in_csv_num > len(forward_content): - raise ValueError( - "%s data is abnormal, the number of rows is greater than the number of rows in forward_info json" - % result_csv_name) - result_csv_api_list = [] - forward_json_api_list = [] - for item in result_csv_rows[1:]: - if not item: - raise ValueError("%s data is abnormal, the API name has a null value" % result_csv_name) - result_csv_api_list.append(item[0]) - for item in list(forward_content.items())[:api_in_csv_num]: - if not item: - raise ValueError("forward_info json data is abnormal, the API name has a null value") - forward_json_api_list.append(item[0]) - if result_csv_api_list != forward_json_api_list: - raise ValueError("The saved api data in %s is not from forward_info json" % result_csv_name) - - -def validate_continue_run_ut_required_files_and_folders(result_csv_path, forward_content, save_error_data): - validated_result_csv_path = get_validated_result_csv_path(result_csv_path) - validated_details_csv_path = get_validated_details_csv_path(validated_result_csv_path) - if save_error_data: - time_info = validated_result_csv_path.split('.')[0].split('_')[-1] - ut_error_data_dir_name = 'ut_error_data' + time_info - ut_error_data_dir_path = os.path.join(os.path.dirname(validated_result_csv_path), ut_error_data_dir_name) - global UT_ERROR_DATA_DIR - UT_ERROR_DATA_DIR = ut_error_data_dir_path - initialize_save_error_data() - validate_csv_content_by_forward_json_content(validated_result_csv_path, validated_details_csv_path, - forward_content) - get_statistics_from_result_csv(validated_result_csv_path) - return validated_result_csv_path, validated_details_csv_path - - def get_statistics_from_result_csv(validated_result_csv_path): - global test_result_cnt test_result_cnt = { "forward_fail_num": 0, "backward_fail_num": 0, "forward_and_backward_fail_num": 0, "success_num": 0, "total_num": 0, "forward_or_backward_fail_num": 0 @@ -328,6 +283,7 @@ def get_statistics_from_result_csv(validated_result_csv_path): % result_csv_name) if item[1] == 'SKIP': continue + test_result_cnt["total_num"] += 1 if item[1] == 'True' and item[2] in ['True', 'N/A']: test_result_cnt['success_num'] += 1 elif item[1] == 'False' and item[2] == 'False': @@ -338,6 +294,7 @@ def get_statistics_from_result_csv(validated_result_csv_path): else: test_result_cnt['backward_fail_num'] += 1 test_result_cnt['forward_or_backward_fail_num'] += 1 + return test_result_cnt def _run_ut_parser(parser): @@ -358,7 +315,7 @@ def _run_ut_parser(parser): help=" whether to turn on jit compile", required=False) parser.add_argument("-d", "--device", dest="device_id", type=int, help=" set device id to run ut", default=0, required=False) - parser.add_argument("-c", "--continue_run_ut", dest="continue_run_ut", default="", type=str, + parser.add_argument("-csv_path", "--result_csv_path", dest="result_csv_path", default="", type=str, help=" The path of accuracy_checking_result_{timestamp}.csv, " "when run ut is interrupted, enter the file path to continue run ut.", required=False) @@ -393,13 +350,22 @@ def _run_ut(): backward_content = get_json_contents(backward_file) result_csv_path = os.path.join(out_path, RESULT_FILE_NAME) details_csv_path = os.path.join(out_path, DETAILS_FILE_NAME) - if save_error_data and not args.continue_run_ut: + test_result_cnt = None + if args.result_csv_path: + result_csv_path = get_validated_result_csv_path(args.result_csv_path) + details_csv_path = get_validated_details_csv_path(result_csv_path) + test_result_cnt = get_statistics_from_result_csv(result_csv_path) + if save_error_data: + if args.result_csv_path: + time_info = result_csv_path.split('.')[0].split('_')[-1] + ut_error_data_dir_name = 'ut_error_data' + time_info + ut_error_data_dir_path = os.path.join(os.path.dirname(result_csv_path), ut_error_data_dir_name) + global UT_ERROR_DATA_DIR + UT_ERROR_DATA_DIR = ut_error_data_dir_path initialize_save_error_data() - if args.continue_run_ut: - result_csv_path, details_csv_path = validate_continue_run_ut_required_files_and_folders(args.continue_run_ut, - forward_content, - save_error_data) - run_ut(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data) + run_ut_config = RunUTConfig(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data, + args.result_csv_path, test_result_cnt) + run_ut(run_ut_config) class UtDataInfo: -- Gitee From 81ace50b3e194b3e4c905206e8605936a9d287ac Mon Sep 17 00:00:00 2001 From: louyujing Date: Wed, 6 Dec 2023 09:38:26 +0000 Subject: [PATCH 10/11] update debug/accuracy_tools/api_accuracy_checker/README.md. Signed-off-by: louyujing --- debug/accuracy_tools/api_accuracy_checker/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/README.md b/debug/accuracy_tools/api_accuracy_checker/README.md index 779f2f74d..bb475fe99 100644 --- a/debug/accuracy_tools/api_accuracy_checker/README.md +++ b/debug/accuracy_tools/api_accuracy_checker/README.md @@ -92,7 +92,7 @@ Ascend模型精度预检工具能在昇腾NPU上扫描用户训练模型中所 | -o或--out_path | 指指定run_ut执行结果存盘路径,默认“./”(相对于run_ut的路径)。 | 否 | | -j或--jit_compile | 开启jit编译。 | 否 | | -d或--device | 指定Device ID,选择UT代码运行所在的卡,默认值为0。 | 否 | - | -c或--continue_run_ut | 指定本次运行中断时生成的accuracy_checking_result_{timestamp}.csv文件路径,执行run_ut中断时,若想从中断处继续执行,配置此参数即可。 | 否 | + | -csv_path或--result_csv_path | 指定本次运行中断时生成的accuracy_checking_result_{timestamp}.csv文件路径,执行run_ut中断时,若想从中断处继续执行,配置此参数即可。 | 否 | run_ut执行结果包括accuracy_checking_result_{timestamp}.csv和accuracy_checking_details_{timestamp}.csv两个文件。accuracy_checking_result_{timestamp}.csv是API粒度的,标明每个API是否通过测试。建议用户先查看accuracy_checking_result_{timestamp}.csv文件,对于其中没有通过测试的或者特定感兴趣的API,根据其API name字段在accuracy_checking_details_{timestamp}.csv中查询其各个输出的达标情况以及比较指标。API达标情况介绍请参考“**API预检指标**”。 @@ -103,12 +103,12 @@ Ascend模型精度预检工具能在昇腾NPU上扫描用户训练模型中所 ``` 数据默认会存盘到'./ut_error_data{timestamp}'路径下(相对于启动run_ut的路径),有需要的话,用户可以通过msCheckerConfig.update_config来配置保存路径,参数为error_data_path。 -5. 如果本次run_ut运行中断,需要从中断处继续执行,可以在run_ut执行命令结尾配置-c,例如: +5. 如果本次run_ut运行中断,需要从中断处继续执行,可以在run_ut执行命令结尾配置-csv_path,例如: ```bash - python run_ut.py -forward ./forward_info_0.json -backward ./backward_info_0.json -c ./accuracy_checking_result_20231203211324.csv + python run_ut.py -forward ./forward_info_0.json -backward ./backward_info_0.json -csv_path ./accuracy_checking_result_20231203211324.csv ``` - run_ut将会从中断处继续执行,执行结果将追加写入到-c配置的accuracy_checking_result_20231203211324.csv以及相同时间戳后缀的accuracy_checking_details_20231203211324.csv中,若配置了-save_error_data,error_data将会保存到相同时间戳后缀的ut_error_data20231203211324文件夹中。 + run_ut将会从中断处继续执行,执行结果将追加写入到-csv_path配置的accuracy_checking_result_20231203211324.csv以及相同时间戳后缀的accuracy_checking_details_20231203211324.csv中,若配置了-save_error_data,error_data将会保存到相同时间戳后缀的ut_error_data20231203211324文件夹中。 ## API预检白名单 -- Gitee From 203c9bf101276a08b490713a5f30846d21dbc1db Mon Sep 17 00:00:00 2001 From: louyujing Date: Thu, 7 Dec 2023 01:04:37 +0000 Subject: [PATCH 11/11] update debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py. Signed-off-by: louyujing --- debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 0b2c0c1e2..fb885168d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -4,6 +4,7 @@ import csv import re import sys import time +from collections import namedtuple try: import torch_npu except ImportError: @@ -15,7 +16,6 @@ else: import yaml import torch from tqdm import tqdm -from collections import namedtuple from api_accuracy_checker.run_ut.data_generate import gen_api_params, gen_args from api_accuracy_checker.common.utils import print_info_log, print_warn_log, get_json_contents, api_info_preprocess, \ print_error_log, check_file_or_directory_path, initialize_save_path, Const -- Gitee