From 856e9a64fb46afc2e8a55cdc5f4434c2fb019237 Mon Sep 17 00:00:00 2001 From: s30048155 Date: Wed, 24 Jan 2024 10:11:17 +0800 Subject: [PATCH 1/4] add real_data_path --- .../api_accuracy_checker/run_ut/multi_run_ut.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py index 121c38722..22002240f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py @@ -43,14 +43,14 @@ signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) -ParallelUTConfig = namedtuple('ParallelUTConfig', ['forward_files', 'backward_files', 'out_path', 'num_splits', 'save_error_data_flag', 'jit_compile_flag', 'device_id', 'result_csv_path', 'total_items']) +ParallelUTConfig = namedtuple('ParallelUTConfig', ['forward_files', 'backward_files', 'out_path', 'num_splits', 'save_error_data_flag', 'jit_compile_flag', 'device_id', 'result_csv_path', 'total_items','real_data_path']) def run_parallel_ut(config): processes = [] device_id_cycle = cycle(config.device_id) if config.save_error_data_flag: - print_info_log(f"UT task error datas will be saved") + print_info_log("UT task error datas will be saved") print_info_log(f"Starting parallel UT with {config.num_splits} processes") progress_bar = tqdm(total=config.total_items, desc="Total items", unit="items") @@ -63,7 +63,8 @@ def run_parallel_ut(config): '-d', str(dev_id), *(['-j'] if config.jit_compile_flag else []), *(['-save_error_data'] if config.save_error_data_flag else []), - '-csv_path', config.result_csv_path + '-csv_path', config.result_csv_path, + *(['-real_data_path'] if config.real_data_path else []) ] return cmd @@ -96,7 +97,7 @@ def run_parallel_ut(config): for process in processes: process.communicate(timeout=None) except KeyboardInterrupt: - print_warn_log("Interrupted by user, terminating processes and clear up...") + print_warn_log("Interrupted by user, terminating processes and cleaning up...") except Exception as e: print_error_log(f"An unexpected error occurred: {e}") finally: @@ -132,7 +133,7 @@ def prepare_config(args): details_csv_path = get_validated_details_csv_path(result_csv_path) print_info_log(f"UT task result will be saved in {result_csv_path}") print_info_log(f"UT task details will be saved in {details_csv_path}") - return ParallelUTConfig(forward_splits, backward_splits, out_path, args.num_splits, args.save_error_data, args.jit_compile, args.device_id, result_csv_path, total_items) + return ParallelUTConfig(forward_splits, backward_splits, out_path, args.num_splits, args.save_error_data, args.jit_compile, args.device_id, result_csv_path, total_items, args.real_data_path) def main(): -- Gitee From 1427cd28395fde887f7378e0307a39bd7ba003cb Mon Sep 17 00:00:00 2001 From: s30048155 Date: Wed, 24 Jan 2024 11:28:51 +0800 Subject: [PATCH 2/4] update --- .../run_ut/multi_run_ut.py | 43 +++++++++++++------ 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py index 22002240f..6158847e3 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py @@ -5,6 +5,7 @@ import sys import argparse import time import signal +import threading from collections import namedtuple from itertools import cycle from tqdm import tqdm @@ -43,7 +44,7 @@ signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) -ParallelUTConfig = namedtuple('ParallelUTConfig', ['forward_files', 'backward_files', 'out_path', 'num_splits', 'save_error_data_flag', 'jit_compile_flag', 'device_id', 'result_csv_path', 'total_items','real_data_path']) +ParallelUTConfig = namedtuple('ParallelUTConfig', ['forward_files', 'backward_files', 'out_path', 'num_splits', 'save_error_data_flag', 'jit_compile_flag', 'device_id', 'result_csv_path', 'total_items', 'real_data_path']) def run_parallel_ut(config): @@ -64,14 +65,38 @@ def run_parallel_ut(config): *(['-j'] if config.jit_compile_flag else []), *(['-save_error_data'] if config.save_error_data_flag else []), '-csv_path', config.result_csv_path, - *(['-real_data_path'] if config.real_data_path else []) + *(['-real_data_path', config.real_data_path] if config.real_data_path else []) ] return cmd + def read_process_output(process): + while True: + output = process.stdout.readline() + if output == '': + break + if 'ERROR' in output: + print(output, end='') + + def update_progress_bar(progress_bar, result_csv_path): + while any(process.poll() is None for process in processes): + try: + with open(result_csv_path, 'r') as result_file: + completed_items = len(result_file.readlines()) - 1 + progress_bar.update(completed_items - progress_bar.n) + except FileNotFoundError: + print_warn_log(f"Result CSV file not found: {result_csv_path}.") + except Exception as e: + print_error_log(f"An unexpected error occurred while reading result CSV: {e}") + time.sleep(10) + for fwd, bwd in zip(config.forward_files, config.backward_files): cmd = create_cmd(fwd, bwd, next(device_id_cycle)) - process = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) processes.append(process) + threading.Thread(target=read_process_output, args=(process,), daemon=True).start() + + progress_bar_thread = threading.Thread(target=update_progress_bar, args=(progress_bar, config.result_csv_path)) + progress_bar_thread.start() def clean_up(): progress_bar.close() @@ -83,17 +108,6 @@ def run_parallel_ut(config): os.remove(file) try: - while any(process.poll() is None for process in processes): - try: - with open(config.result_csv_path, 'r') as result_file: - completed_items = len(result_file.readlines()) - 1 - progress_bar.update(completed_items - progress_bar.n) - except FileNotFoundError: - print_warn_log(f"Result CSV file not found: {config.result_csv_path}.") - except Exception as e: - print_error_log(f"An unexpected error occurred while reading result CSV: {e}") - time.sleep(10) - for process in processes: process.communicate(timeout=None) except KeyboardInterrupt: @@ -102,6 +116,7 @@ def run_parallel_ut(config): print_error_log(f"An unexpected error occurred: {e}") finally: clean_up() + progress_bar_thread.join() try: comparator = Comparator(config.result_csv_path, config.result_csv_path, False) comparator.print_pretest_result() -- Gitee From 1541fa46e2b8c162d0878f00e6577d5f6f81ceb9 Mon Sep 17 00:00:00 2001 From: s30048155 Date: Wed, 24 Jan 2024 11:37:55 +0800 Subject: [PATCH 3/4] update --- .../api_accuracy_checker/run_ut/multi_run_ut.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py index 6158847e3..f38c1638f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py @@ -91,7 +91,7 @@ def run_parallel_ut(config): for fwd, bwd in zip(config.forward_files, config.backward_files): cmd = create_cmd(fwd, bwd, next(device_id_cycle)) - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, bufsize=1) processes.append(process) threading.Thread(target=read_process_output, args=(process,), daemon=True).start() @@ -105,7 +105,10 @@ def run_parallel_ut(config): process.terminate() process.wait() for file in config.forward_files: - os.remove(file) + try: + os.remove(file) + except FileNotFoundError: + print_warn_log(f"File not found and could not be deleted: {file}") try: for process in processes: -- Gitee From 36e2d3020531ed479e1d9aad29b7aace6b53ce24 Mon Sep 17 00:00:00 2001 From: s30048155 Date: Wed, 24 Jan 2024 11:40:38 +0800 Subject: [PATCH 4/4] update --- .../api_accuracy_checker/run_ut/multi_run_ut.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py index f38c1638f..47e391f3f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py @@ -74,7 +74,7 @@ def run_parallel_ut(config): output = process.stdout.readline() if output == '': break - if 'ERROR' in output: + if '[ERROR]' in output: print(output, end='') def update_progress_bar(progress_bar, result_csv_path): @@ -94,7 +94,7 @@ def run_parallel_ut(config): process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, bufsize=1) processes.append(process) threading.Thread(target=read_process_output, args=(process,), daemon=True).start() - + progress_bar_thread = threading.Thread(target=update_progress_bar, args=(progress_bar, config.result_csv_path)) progress_bar_thread.start() -- Gitee