From 934c3d2a0396f62a80e456082e2aba9ebd283b0f Mon Sep 17 00:00:00 2001 From: l30036321 Date: Thu, 3 Aug 2023 11:31:51 +0800 Subject: [PATCH] add convert check --- .../api_accuracy_checker/common/utils.py | 1097 +++++++++-------- .../api_accuracy_checker/run_ut/run_ut.py | 311 ++--- 2 files changed, 714 insertions(+), 694 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index dac54a79fc..cb4e1b44d3 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -1,539 +1,558 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -# Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -import collections -import json -import os -import random -import re -import stat -import subprocess -import sys -import time -from datetime import datetime, timezone - -import numpy as np -import torch -import pandas as pd - -try: - import torch_npu -except ImportError: - IS_GPU = True -else: - IS_GPU = False - -if not IS_GPU: - from torch_npu.utils.device_guard import torch_device_guard as torch_npu_device_guard - -device = collections.namedtuple('device', ['type', 'index']) - - -class Const: - """ - Class for const - """ - MODEL_TYPE = ['.onnx', '.pb', '.om'] - DIM_PATTERN = r"^(-?[0-9]+)(,-?[0-9]+)*" - SEMICOLON = ";" - COLON = ":" - EQUAL = "=" - COMMA = "," - DOT = "." - DUMP_RATIO_MAX = 100 - SUMMERY_DATA_NUMS = 256 - ONE_HUNDRED_MB = 100*1024*1024 - FLOAT_EPSILON = np.finfo(float).eps - SUPPORT_DUMP_MODE = ['api', 'acl'] - ON = 'ON' - OFF = 'OFF' - BACKWARD = 'backward' - FORWARD = 'forward' - FLOAT_TYPE = [np.half, np.single, np.double, np.float64, np.longdouble] - - # dump mode - ALL = "all" - LIST = "list" - RANGE = "range" - STACK = "stack" - ACL = "acl" - API_LIST = "api_list" - API_STACK = "api_stack" - DUMP_MODE = [ALL, LIST, RANGE, STACK, ACL, API_LIST, API_STACK] - - API_PATTERN = r"^[A-Za-z0-9]+[_]+([A-Za-z0-9]+[_]*[A-Za-z0-9]+)[_]+[0-9]+[_]+[A-Za-z0-9]+" - WRITE_FLAGS = os.O_WRONLY | os.O_CREAT - WRITE_MODES = stat.S_IWUSR | stat.S_IRUSR - -class CompareConst: - """ - Class for compare module const - """ - # compare result column name - NPU_NAME = "NPU Name" - BENCH_NAME = "Bench Name" - NPU_DTYPE = "NPU Tensor Dtype" - BENCH_DTYPE = "Bench Tensor Dtype" - NPU_SHAPE = "NPU Tensor Shape" - BENCH_SHAPE = "Bench Tensor Shape" - NPU_MAX = "NPU max" - NPU_MIN = "NPU min" - NPU_MEAN = "NPU mean" - BENCH_MAX = "Bench max" - BENCH_MIN = "Bench min" - BENCH_MEAN = "Bench mean" - COSINE = "Cosine" - MAX_ABS_ERR = "MaxAbsErr" - ACCURACY = "Accuracy Reached or Not" - STACK = "NPU_Stack_Info" - ERROR_MESSAGE = "Err_message" - - # compare result data - NAN = 'Nan' - SHAPE_UNMATCH = 'shape unmatched' - DTYPE_UNMATCH = 'dtype unmatched' - - # accuracy standards - COS_THRESHOLD = 0.99 - MAX_ABS_ERR_THRESHOLD = 0.001 - COS_MAX_THRESHOLD = 0.9 - MAX_ABS_ERR_MAX_THRESHOLD = 1 - ACCURACY_CHECK_YES = "Yes" - ACCURACY_CHECK_NO = "No" - ACCURACY_CHECK_UNMATCH = "Unmatched" - - # error message - NO_BENCH = "No bench data matched." - - -class VersionCheck: - """ - Class for TorchVersion - """ - V1_8 = "1.8" - V1_11 = "1.11" - - @staticmethod - def check_torch_version(version): - torch_version = torch.__version__ - if torch_version.startswith(version): - return True - else: - return False - - -class CompareException(Exception): - """ - Class for Accuracy Compare Exception - """ - NONE_ERROR = 0 - INVALID_PATH_ERROR = 1 - OPEN_FILE_ERROR = 2 - CLOSE_FILE_ERROR = 3 - READ_FILE_ERROR = 4 - WRITE_FILE_ERROR = 5 - INVALID_FILE_ERROR = 6 - PERMISSION_ERROR = 7 - INDEX_OUT_OF_BOUNDS_ERROR = 8 - NO_DUMP_FILE_ERROR = 9 - INVALID_DATA_ERROR = 10 - INVALID_PARAM_ERROR = 11 - INVALID_DUMP_RATIO = 12 - INVALID_DUMP_FILE = 13 - UNKNOWN_ERROR = 14 - INVALID_DUMP_MODE = 15 - PARSE_FILE_ERROR = 16 - INVALID_COMPARE_MODE = 17 - - def __init__(self, code, error_info: str = ""): - super(CompareException, self).__init__() - self.code = code - self.error_info = error_info - - def __str__(self): - return self.error_info - -class DumpException(CompareException): - pass - -def read_json(file): - with open(file, 'r') as f: - obj = json.load(f) - return obj - -def write_csv(data, filepath): - data_frame = pd.DataFrame(columns=data) - data_frame.to_csv(filepath, index=False) - -def _print_log(level, msg): - current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))) - pid = os.getgid() - print(current_time + "(" + str(pid) + ")-[" + level + "]" + msg) - sys.stdout.flush() - - -def print_info_log(info_msg): - """ - Function Description: - print info log. - Parameter: - info_msg: the info message. - """ - _print_log("INFO", info_msg) - - -def print_error_log(error_msg): - """ - Function Description: - print error log. - Parameter: - error_msg: the error message. - """ - _print_log("ERROR", error_msg) - - -def print_warn_log(warn_msg): - """ - Function Description: - print warn log. - Parameter: - warn_msg: the warning message. - """ - _print_log("WARNING", warn_msg) - - -def check_mode_valid(mode): - if mode not in Const.DUMP_MODE: - msg = "Current mode '%s' is not supported. Please use the field in %s" % \ - (mode, Const.DUMP_MODE) - raise CompareException(CompareException.INVALID_DUMP_MODE, msg) - - -def check_object_type(check_object, allow_type): - """ - Function Description: - Check if the object belongs to a certain data type - Parameter: - check_object: the object to be checked - allow_type: legal data type - Exception Description: - when invalid data throw exception - """ - if not isinstance(check_object, allow_type): - print_error_log(f"{check_object} not of {allow_type} type") - raise CompareException(CompareException.INVALID_DATA_ERROR) - - -def check_file_or_directory_path(path, isdir=False): - """ - Function Description: - check whether the path is valid - Parameter: - path: the path to check - isdir: the path is dir or file - Exception Description: - when invalid data throw exception - """ - if isdir: - if not os.path.exists(path): - print_error_log('The path {} is not exist.'.format(path)) - raise CompareException(CompareException.INVALID_PATH_ERROR) - - if not os.path.isdir(path): - print_error_log('The path {} is not a directory.'.format(path)) - raise CompareException(CompareException.INVALID_PATH_ERROR) - - if not os.access(path, os.W_OK): - print_error_log( - 'The path {} does not have permission to write. Please check the path permission'.format(path)) - raise CompareException(CompareException.INVALID_PATH_ERROR) - else: - if not os.path.isfile(path): - print_error_log('{} is an invalid file or non-exist.'.format(path)) - raise CompareException(CompareException.INVALID_PATH_ERROR) - - if not os.access(path, os.R_OK): - print_error_log( - 'The path {} does not have permission to read. Please check the path permission'.format(path)) - raise CompareException(CompareException.INVALID_PATH_ERROR) - -def _check_pkl(pkl_file_handle, file_name): - tensor_line = pkl_file_handle.readline() - if len(tensor_line) == 0: - print_error_log("dump file {} have empty line!".format(file_name)) - raise CompareException(CompareException.INVALID_DUMP_FILE) - pkl_file_handle.seek(0, 0) - - -def check_file_mode(npu_pkl, bench_pkl, stack_mode): - npu_pkl_name = os.path.split(npu_pkl)[-1] - bench_pkl_name = os.path.split(bench_pkl)[-1] - - if not npu_pkl_name.startswith("api_stack") and not bench_pkl_name.startswith("api_stack"): - if stack_mode: - print_error_log("The current file does not contain stack information, please turn off the stack_mode") - raise CompareException(CompareException.INVALID_COMPARE_MODE) - elif npu_pkl_name.startswith("api_stack") and bench_pkl_name.startswith("api_stack"): - if not stack_mode: - print_error_log("The current file contains stack information, please turn on the stack_mode") - raise CompareException(CompareException.INVALID_COMPARE_MODE) - else: - print_error_log("The dump mode of the two files is not same, please check the dump files") - raise CompareException(CompareException.INVALID_COMPARE_MODE) - - -def check_file_size(input_file, max_size): - try: - file_size = os.path.getsize(input_file) - except OSError as os_error: - print_error_log('Failed to open "%s". %s' % (input_file, str(os_error))) - raise CompareException(CompareException.INVALID_FILE_ERROR) - if file_size > max_size: - print_error_log('The size (%d) of %s exceeds (%d) bytes, tools not support.' - % (file_size, input_file, max_size)) - raise CompareException(CompareException.INVALID_FILE_ERROR) - - -def get_dump_data_path(dump_dir): - """ - Function Description: - traverse directories and obtain the absolute path of dump data - Parameter: - dump_dir: dump data directory - Return Value: - dump data path,file is exist or file is not exist - """ - dump_data_path = None - file_is_exist = False - - check_file_or_directory_path(dump_dir, True) - for dir_path, sub_paths, files in os.walk(dump_dir): - if len(files) != 0: - dump_data_path = dir_path - file_is_exist = True - break - dump_data_path = dir_path - return dump_data_path, file_is_exist - - -def get_api_name_from_matcher(name): - api_matcher = re.compile(Const.API_PATTERN) - match = api_matcher.match(name) - return match.group(1) if match else "" - - -def modify_dump_path(dump_path, mode): - if mode == Const.ALL: - return dump_path - file_name = os.path.split(dump_path) - mode_file_name = mode + "_" + file_name[-1] - return os.path.join(file_name[0], mode_file_name) - - -def create_directory(dir_path): - """ - Function Description: - creating a directory with specified permissions - Parameter: - dir_path: directory path - Exception Description: - when invalid data throw exception - """ - if not os.path.exists(dir_path): - try: - os.makedirs(dir_path, mode=0o700) - except OSError as ex: - print_error_log( - 'Failed to create {}.Please check the path permission or disk space .{}'.format(dir_path, str(ex))) - raise CompareException(CompareException.INVALID_PATH_ERROR) - - -def execute_command(cmd): - """ - Function Description: - run the following command - Parameter: - cmd: command - Exception Description: - when invalid command throw exception - """ - print_info_log('Execute command:%s' % cmd) - process = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - while process.poll() is None: - line = process.stdout.readline() - line = line.strip() - if line: - print(line) - if process.returncode != 0: - print_error_log('Failed to execute command:%s' % " ".join(cmd)) - raise CompareException(CompareException.INVALID_DATA_ERROR) - - -def save_numpy_data(file_path, data): - """ - save_numpy_data - """ - if not os.path.exists(os.path.dirname(file_path)): - os.makedirs(os.path.dirname(file_path)) - np.save(file_path, data) - - -def parse_arg_value(values): - """ - parse dynamic arg value of atc cmdline - """ - value_list = [] - for item in values.split(Const.SEMICOLON): - value_list.append(parse_value_by_comma(item)) - return value_list - - -def parse_value_by_comma(value): - """ - parse value by comma, like '1,2,4,8' - """ - value_list = [] - value_str_list = value.split(Const.COMMA) - for value_str in value_str_list: - value_str = value_str.strip() - if value_str.isdigit() or value_str == '-1': - value_list.append(int(value_str)) - else: - print_error_log("please check your input shape.") - raise CompareException(CompareException.INVALID_PARAM_ERROR) - return value_list - - -def get_data_len_by_shape(shape): - data_len = 1 - for item in shape: - if item == -1: - print_error_log("please check your input shape, one dim in shape is -1.") - return -1 - data_len = data_len * item - return data_len - - -def add_time_as_suffix(name): - return '{}_{}.csv'.format(name, time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))) - - -def get_time(): - return datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S") - - -def format_value(value): - return '{:.6f}'.format(value) - - -def torch_device_guard(func): - if IS_GPU: - return func - # Parse args/kwargs matched torch.device objects - - @torch_npu_device_guard - def wrapper(*args, **kwargs): - return func(*args, **kwargs) - return wrapper - - -def seed_all(seed=1234, mode=False): - random.seed(seed) - os.environ['PYTHONHASHSEED'] = str(seed) - np.random.seed(seed) - torch.manual_seed(seed) - torch.use_deterministic_algorithms(mode) - if IS_GPU: - torch.cuda.manual_seed_all(seed) - torch.cuda.manual_seed(seed) - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.enable = False - torch.backends.cudnn.benchmark = False - else: - torch_npu.npu.manual_seed_all(seed) - torch_npu.npu.manual_seed(seed) - - -def get_process_rank(model): - print_info_log("Rank id is not provided. Trying to get the rank id of the model.") - try: - device = next(model.parameters()).device - except StopIteration: - print_warn_log('There is no parameter in the model. Fail to get rank id.') - return 0, False - if device.type == 'cpu': - print_warn_log("Warning: the debugger is unable to get the rank id. " - "This may cause the dumpped data to be corrupted in the " - "case of distributed training. (You may ignore this if you are using only one card.) " - "Transfer the model to npu or gpu before register_hook() to avoid this warning.") - return 0, False - else: - return device.index, True - - -def get_json_contents(file_path): - ops = get_file_content_bytes(file_path) - return json.loads(ops) - - -def get_file_content_bytes(file): - check_input_file_valid(file) - with open(file, 'rb') as file_handle: - return file_handle.read() - - -def islink(path): - path = os.path.abspath(path) - return os.path.islink(path) - - -class SoftlinkCheckException(Exception): - pass - - -MAX_JSON_FILE_SIZE = 10 * 1024 ** 2 -LINUX_FILE_NAME_LENGTH_LIMIT = 200 - - -def check_path_length_valid(path): - path = os.path.realpath(path) - return len(os.path.basename(path) <= LINUX_FILE_NAME_LENGTH_LIMIT) - - -def check_path_pattern_valid(path): - pattern = re.compile(r'(\.|/|:|_|-|\s|[~0-9a-zA-Z])+') - if not pattern.fullmatch(path): - raise ValueError('Only the following characters are allowed in the path: A-Z a-z 0-9 - _ . / :') - - -def check_input_file_valid(input_path, max_file_size=MAX_JSON_FILE_SIZE): - if islink(input_path): - raise SoftlinkCheckException("Input path doesn't support soft link.") - - input_path = os.path.realpath(input_path) - if not os.path.exists(input_path): - raise ValueError('Input file %s does not exist!' % input_path) - - if not os.access(input_path, os.R_OK): - raise PermissionError('Input file %s is not readable!' % input_path) - - check_path_pattern_valid(input_path) - - if not check_path_length_valid(input_path): - raise ValueError("The real path or file_name of input is too long.") - - if os.path.getsize(input_path) > max_file_size: - raise ValueError(f'The file is too large, exceeds {max_file_size // 1024 ** 2}MB') \ No newline at end of file +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +import collections +import json +import os +import random +import re +import stat +import subprocess +import sys +import time +from datetime import datetime, timezone + +import numpy as np +import torch +import pandas as pd + +try: + import torch_npu +except ImportError: + IS_GPU = True +else: + IS_GPU = False + +if not IS_GPU: + from torch_npu.utils.device_guard import torch_device_guard as torch_npu_device_guard + +device = collections.namedtuple('device', ['type', 'index']) + + +class Const: + """ + Class for const + """ + MODEL_TYPE = ['.onnx', '.pb', '.om'] + DIM_PATTERN = r"^(-?[0-9]+)(,-?[0-9]+)*" + SEMICOLON = ";" + COLON = ":" + EQUAL = "=" + COMMA = "," + DOT = "." + DUMP_RATIO_MAX = 100 + SUMMERY_DATA_NUMS = 256 + ONE_HUNDRED_MB = 100*1024*1024 + FLOAT_EPSILON = np.finfo(float).eps + SUPPORT_DUMP_MODE = ['api', 'acl'] + ON = 'ON' + OFF = 'OFF' + BACKWARD = 'backward' + FORWARD = 'forward' + FLOAT_TYPE = [np.half, np.single, np.double, np.float64, np.longdouble] + + # dump mode + ALL = "all" + LIST = "list" + RANGE = "range" + STACK = "stack" + ACL = "acl" + API_LIST = "api_list" + API_STACK = "api_stack" + DUMP_MODE = [ALL, LIST, RANGE, STACK, ACL, API_LIST, API_STACK] + + API_PATTERN = r"^[A-Za-z0-9]+[_]+([A-Za-z0-9]+[_]*[A-Za-z0-9]+)[_]+[0-9]+[_]+[A-Za-z0-9]+" + WRITE_FLAGS = os.O_WRONLY | os.O_CREAT + WRITE_MODES = stat.S_IWUSR | stat.S_IRUSR + + CONVERT = { + "fp16_to_fp32": ["torch.float16", "torch.float32"] + } + + CONVERT_API = { + "fp16_to_fp32": ["conv2d", "batch_norm", "relu"] + } + + +class CompareConst: + """ + Class for compare module const + """ + # compare result column name + NPU_NAME = "NPU Name" + BENCH_NAME = "Bench Name" + NPU_DTYPE = "NPU Tensor Dtype" + BENCH_DTYPE = "Bench Tensor Dtype" + NPU_SHAPE = "NPU Tensor Shape" + BENCH_SHAPE = "Bench Tensor Shape" + NPU_MAX = "NPU max" + NPU_MIN = "NPU min" + NPU_MEAN = "NPU mean" + BENCH_MAX = "Bench max" + BENCH_MIN = "Bench min" + BENCH_MEAN = "Bench mean" + COSINE = "Cosine" + MAX_ABS_ERR = "MaxAbsErr" + ACCURACY = "Accuracy Reached or Not" + STACK = "NPU_Stack_Info" + ERROR_MESSAGE = "Err_message" + + # compare result data + NAN = 'Nan' + SHAPE_UNMATCH = 'shape unmatched' + DTYPE_UNMATCH = 'dtype unmatched' + + # accuracy standards + COS_THRESHOLD = 0.99 + MAX_ABS_ERR_THRESHOLD = 0.001 + COS_MAX_THRESHOLD = 0.9 + MAX_ABS_ERR_MAX_THRESHOLD = 1 + ACCURACY_CHECK_YES = "Yes" + ACCURACY_CHECK_NO = "No" + ACCURACY_CHECK_UNMATCH = "Unmatched" + + # error message + NO_BENCH = "No bench data matched." + + +class VersionCheck: + """ + Class for TorchVersion + """ + V1_8 = "1.8" + V1_11 = "1.11" + + @staticmethod + def check_torch_version(version): + torch_version = torch.__version__ + if torch_version.startswith(version): + return True + else: + return False + + +class CompareException(Exception): + """ + Class for Accuracy Compare Exception + """ + NONE_ERROR = 0 + INVALID_PATH_ERROR = 1 + OPEN_FILE_ERROR = 2 + CLOSE_FILE_ERROR = 3 + READ_FILE_ERROR = 4 + WRITE_FILE_ERROR = 5 + INVALID_FILE_ERROR = 6 + PERMISSION_ERROR = 7 + INDEX_OUT_OF_BOUNDS_ERROR = 8 + NO_DUMP_FILE_ERROR = 9 + INVALID_DATA_ERROR = 10 + INVALID_PARAM_ERROR = 11 + INVALID_DUMP_RATIO = 12 + INVALID_DUMP_FILE = 13 + UNKNOWN_ERROR = 14 + INVALID_DUMP_MODE = 15 + PARSE_FILE_ERROR = 16 + INVALID_COMPARE_MODE = 17 + + def __init__(self, code, error_info: str = ""): + super(CompareException, self).__init__() + self.code = code + self.error_info = error_info + + def __str__(self): + return self.error_info + +class DumpException(CompareException): + pass + +def read_json(file): + with open(file, 'r') as f: + obj = json.load(f) + return obj + +def write_csv(data, filepath): + data_frame = pd.DataFrame(columns=data) + data_frame.to_csv(filepath, index=False) + +def _print_log(level, msg): + current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))) + pid = os.getgid() + print(current_time + "(" + str(pid) + ")-[" + level + "]" + msg) + sys.stdout.flush() + + +def print_info_log(info_msg): + """ + Function Description: + print info log. + Parameter: + info_msg: the info message. + """ + _print_log("INFO", info_msg) + + +def print_error_log(error_msg): + """ + Function Description: + print error log. + Parameter: + error_msg: the error message. + """ + _print_log("ERROR", error_msg) + + +def print_warn_log(warn_msg): + """ + Function Description: + print warn log. + Parameter: + warn_msg: the warning message. + """ + _print_log("WARNING", warn_msg) + + +def check_mode_valid(mode): + if mode not in Const.DUMP_MODE: + msg = "Current mode '%s' is not supported. Please use the field in %s" % \ + (mode, Const.DUMP_MODE) + raise CompareException(CompareException.INVALID_DUMP_MODE, msg) + + +def check_object_type(check_object, allow_type): + """ + Function Description: + Check if the object belongs to a certain data type + Parameter: + check_object: the object to be checked + allow_type: legal data type + Exception Description: + when invalid data throw exception + """ + if not isinstance(check_object, allow_type): + print_error_log(f"{check_object} not of {allow_type} type") + raise CompareException(CompareException.INVALID_DATA_ERROR) + + +def check_file_or_directory_path(path, isdir=False): + """ + Function Description: + check whether the path is valid + Parameter: + path: the path to check + isdir: the path is dir or file + Exception Description: + when invalid data throw exception + """ + if isdir: + if not os.path.exists(path): + print_error_log('The path {} is not exist.'.format(path)) + raise CompareException(CompareException.INVALID_PATH_ERROR) + + if not os.path.isdir(path): + print_error_log('The path {} is not a directory.'.format(path)) + raise CompareException(CompareException.INVALID_PATH_ERROR) + + if not os.access(path, os.W_OK): + print_error_log( + 'The path {} does not have permission to write. Please check the path permission'.format(path)) + raise CompareException(CompareException.INVALID_PATH_ERROR) + else: + if not os.path.isfile(path): + print_error_log('{} is an invalid file or non-exist.'.format(path)) + raise CompareException(CompareException.INVALID_PATH_ERROR) + + if not os.access(path, os.R_OK): + print_error_log( + 'The path {} does not have permission to read. Please check the path permission'.format(path)) + raise CompareException(CompareException.INVALID_PATH_ERROR) + +def _check_pkl(pkl_file_handle, file_name): + tensor_line = pkl_file_handle.readline() + if len(tensor_line) == 0: + print_error_log("dump file {} have empty line!".format(file_name)) + raise CompareException(CompareException.INVALID_DUMP_FILE) + pkl_file_handle.seek(0, 0) + + +def check_file_mode(npu_pkl, bench_pkl, stack_mode): + npu_pkl_name = os.path.split(npu_pkl)[-1] + bench_pkl_name = os.path.split(bench_pkl)[-1] + + if not npu_pkl_name.startswith("api_stack") and not bench_pkl_name.startswith("api_stack"): + if stack_mode: + print_error_log("The current file does not contain stack information, please turn off the stack_mode") + raise CompareException(CompareException.INVALID_COMPARE_MODE) + elif npu_pkl_name.startswith("api_stack") and bench_pkl_name.startswith("api_stack"): + if not stack_mode: + print_error_log("The current file contains stack information, please turn on the stack_mode") + raise CompareException(CompareException.INVALID_COMPARE_MODE) + else: + print_error_log("The dump mode of the two files is not same, please check the dump files") + raise CompareException(CompareException.INVALID_COMPARE_MODE) + + +def check_file_size(input_file, max_size): + try: + file_size = os.path.getsize(input_file) + except OSError as os_error: + print_error_log('Failed to open "%s". %s' % (input_file, str(os_error))) + raise CompareException(CompareException.INVALID_FILE_ERROR) + if file_size > max_size: + print_error_log('The size (%d) of %s exceeds (%d) bytes, tools not support.' + % (file_size, input_file, max_size)) + raise CompareException(CompareException.INVALID_FILE_ERROR) + + +def get_dump_data_path(dump_dir): + """ + Function Description: + traverse directories and obtain the absolute path of dump data + Parameter: + dump_dir: dump data directory + Return Value: + dump data path,file is exist or file is not exist + """ + dump_data_path = None + file_is_exist = False + + check_file_or_directory_path(dump_dir, True) + for dir_path, sub_paths, files in os.walk(dump_dir): + if len(files) != 0: + dump_data_path = dir_path + file_is_exist = True + break + dump_data_path = dir_path + return dump_data_path, file_is_exist + + +def get_api_name_from_matcher(name): + api_matcher = re.compile(Const.API_PATTERN) + match = api_matcher.match(name) + return match.group(1) if match else "" + + +def modify_dump_path(dump_path, mode): + if mode == Const.ALL: + return dump_path + file_name = os.path.split(dump_path) + mode_file_name = mode + "_" + file_name[-1] + return os.path.join(file_name[0], mode_file_name) + + +def create_directory(dir_path): + """ + Function Description: + creating a directory with specified permissions + Parameter: + dir_path: directory path + Exception Description: + when invalid data throw exception + """ + if not os.path.exists(dir_path): + try: + os.makedirs(dir_path, mode=0o700) + except OSError as ex: + print_error_log( + 'Failed to create {}.Please check the path permission or disk space .{}'.format(dir_path, str(ex))) + raise CompareException(CompareException.INVALID_PATH_ERROR) + + +def execute_command(cmd): + """ + Function Description: + run the following command + Parameter: + cmd: command + Exception Description: + when invalid command throw exception + """ + print_info_log('Execute command:%s' % cmd) + process = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + while process.poll() is None: + line = process.stdout.readline() + line = line.strip() + if line: + print(line) + if process.returncode != 0: + print_error_log('Failed to execute command:%s' % " ".join(cmd)) + raise CompareException(CompareException.INVALID_DATA_ERROR) + + +def save_numpy_data(file_path, data): + """ + save_numpy_data + """ + if not os.path.exists(os.path.dirname(file_path)): + os.makedirs(os.path.dirname(file_path)) + np.save(file_path, data) + + +def parse_arg_value(values): + """ + parse dynamic arg value of atc cmdline + """ + value_list = [] + for item in values.split(Const.SEMICOLON): + value_list.append(parse_value_by_comma(item)) + return value_list + + +def parse_value_by_comma(value): + """ + parse value by comma, like '1,2,4,8' + """ + value_list = [] + value_str_list = value.split(Const.COMMA) + for value_str in value_str_list: + value_str = value_str.strip() + if value_str.isdigit() or value_str == '-1': + value_list.append(int(value_str)) + else: + print_error_log("please check your input shape.") + raise CompareException(CompareException.INVALID_PARAM_ERROR) + return value_list + + +def get_data_len_by_shape(shape): + data_len = 1 + for item in shape: + if item == -1: + print_error_log("please check your input shape, one dim in shape is -1.") + return -1 + data_len = data_len * item + return data_len + + +def add_time_as_suffix(name): + return '{}_{}.csv'.format(name, time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))) + + +def get_time(): + return datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S") + + +def format_value(value): + return '{:.6f}'.format(value) + + +def torch_device_guard(func): + if IS_GPU: + return func + # Parse args/kwargs matched torch.device objects + + @torch_npu_device_guard + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + return wrapper + + +def seed_all(seed=1234, mode=False): + random.seed(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.use_deterministic_algorithms(mode) + if IS_GPU: + torch.cuda.manual_seed_all(seed) + torch.cuda.manual_seed(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.enable = False + torch.backends.cudnn.benchmark = False + else: + torch_npu.npu.manual_seed_all(seed) + torch_npu.npu.manual_seed(seed) + + +def get_process_rank(model): + print_info_log("Rank id is not provided. Trying to get the rank id of the model.") + try: + device = next(model.parameters()).device + except StopIteration: + print_warn_log('There is no parameter in the model. Fail to get rank id.') + return 0, False + if device.type == 'cpu': + print_warn_log("Warning: the debugger is unable to get the rank id. " + "This may cause the dumpped data to be corrupted in the " + "case of distributed training. (You may ignore this if you are using only one card.) " + "Transfer the model to npu or gpu before register_hook() to avoid this warning.") + return 0, False + else: + return device.index, True + + +def get_json_contents(file_path): + ops = get_file_content_bytes(file_path) + return json.loads(ops) + + +def get_file_content_bytes(file): + check_input_file_valid(file) + with open(file, 'rb') as file_handle: + return file_handle.read() + + +def islink(path): + path = os.path.abspath(path) + return os.path.islink(path) + + +class SoftlinkCheckException(Exception): + pass + + +MAX_JSON_FILE_SIZE = 10 * 1024 ** 2 +LINUX_FILE_NAME_LENGTH_LIMIT = 200 + + +def check_path_length_valid(path): + path = os.path.realpath(path) + return len(os.path.basename(path) <= LINUX_FILE_NAME_LENGTH_LIMIT) + + +def check_path_pattern_valid(path): + pattern = re.compile(r'(\.|/|:|_|-|\s|[~0-9a-zA-Z])+') + if not pattern.fullmatch(path): + raise ValueError('Only the following characters are allowed in the path: A-Z a-z 0-9 - _ . / :') + + +def check_input_file_valid(input_path, max_file_size=MAX_JSON_FILE_SIZE): + if islink(input_path): + raise SoftlinkCheckException("Input path doesn't support soft link.") + + input_path = os.path.realpath(input_path) + if not os.path.exists(input_path): + raise ValueError('Input file %s does not exist!' % input_path) + + if not os.access(input_path, os.R_OK): + raise PermissionError('Input file %s is not readable!' % input_path) + + check_path_pattern_valid(input_path) + + if not check_path_length_valid(input_path): + raise ValueError("The real path or file_name of input is too long.") + + if os.path.getsize(input_path) > max_file_size: + raise ValueError(f'The file is too large, exceeds {max_file_size // 1024 ** 2}MB') + + +def check_need_convert(api_name): + convert_type = None + for key, value in Const.CONVERT_API: + if api_name not in value: + continue + else: + convert_type = Const.CONVERT.get(key) + return convert_type diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 3a0c9c4515..1811885133 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -1,156 +1,157 @@ -import argparse -import os -import sys -sys.path.append("..") -import yaml -import torch -from data_generate import gen_api_params, gen_args -from common.utils import print_info_log, print_warn_log, get_json_contents -from compare.compare import Comparator - -cur_path = os.path.dirname(os.path.realpath(__file__)) -yaml_path = os.path.join(cur_path, "../hook_module/support_wrap_ops.yaml") -with open(yaml_path, 'r') as f: - WrapFunctionalOps = yaml.safe_load(f).get('functional') - -for f in dir(torch.nn.functional): - if f != "__name__": - locals().update({f: getattr(torch.nn.functional, f)}) - - -def exec_api(api_type, api_name, args, kwargs): - if api_type == "Functional": - out = eval(api_name)(*args, **kwargs) - if api_type == "Tensor": - out = getattr(torch._C._TensorBase, str(api_name))(*args, **kwargs) - if api_type == "Torch": - out = getattr(torch._C._VariableFunctionsClass, str(api_name))(*args, **kwargs) - return out - - -def generate_npu_params(cpu_args, cpu_kwargs, need_backward): - npu_args = [] - npu_kwargs = {} - if need_backward: - for arg_in in cpu_args: - arg_in = arg_to_npu(arg_in) - npu_args.append(arg_in) - for key, value in cpu_kwargs.items(): - value = arg_to_npu(value) - npu_kwargs[key] = value - else: - for arg_in in cpu_args: - if isinstance(arg_in, torch.Tensor): - arg_in = arg_in.clone().detach().to("npu") - npu_args.append(arg_in) - for key, value in cpu_kwargs.items(): - if isinstance(value, torch.Tensor): - value = value.clone().detach().to("npu") - npu_kwargs[key] = value - return npu_args, npu_kwargs - - -def arg_to_npu(arg_in): - if isinstance(arg_in, torch.Tensor) and arg_in.dtype in [torch.float, torch.float16, - torch.float64] and arg_in.requires_grad: - arg_in = arg_in.clone().detach().to("npu").requires_grad_() - elif isinstance(arg_in, torch.Tensor): - arg_in = arg_in.clone().detach().to("npu") - return arg_in - - -def run_ut(forward_file, backward_file, out_path, save_error_data): - print_info_log("start UT test") - forward_content = get_json_contents(forward_file) - backward_content = get_json_contents(backward_file) - api_setting_dict = get_json_contents("torch_ut_setting.json") - compare = Comparator(out_path) - for api_full_name, api_info_dict in forward_content.items(): - grad_out, npu_grad_out, npu_out, out = run_torch_api(api_full_name, api_setting_dict, backward_content, - api_info_dict) - compare.compare_output(api_full_name, out, npu_out, grad_out, npu_grad_out) - - compare.print_pretest_result() - compare.write_compare_csv() - - -def run_torch_api(api_full_name, api_setting_dict, backward_content, value): - [api_type, api_name, _] = api_full_name.split("*") - args, kwargs = gen_api_params(value, api_name[-1] != "_") - inplace = kwargs.get("inplace") if kwargs.get("inplace") else None - need_backward = api_full_name in backward_content and api_name[-1] != "_" and inplace is not True - if inplace or api_name[-1] == "_": - print_warn_log("%s involves in-place operations, skip backward" % api_full_name) - npu_args, npu_kwargs = generate_npu_params(args, kwargs, need_backward) - grad_out, npu_grad_out = None, None - out = exec_api(api_type, api_name, args, kwargs) - npu_out = exec_api(api_type, api_name, npu_args, npu_kwargs) - grad_input_index = api_setting_dict.get(api_name) - grad_index = None - if grad_input_index is not None: - grad_index = grad_input_index.get('grad_index') - - if need_backward: - backward_args = backward_content[api_full_name] - grad = gen_args(backward_args)[0] - if grad_index is not None: - out[grad_index].backward(grad) - elif isinstance(out, (list, tuple)): - raise NotImplementedError("Multiple backward is not supported.") - else: - out.backward(grad) - args_grad = [] - for arg in args: - if isinstance(arg, torch.Tensor): - args_grad.append(arg.grad) - grad_out = args_grad - - npu_grad = grad.clone().detach().npu() - if grad_index is not None: - npu_out[grad_index].backward(npu_grad) - else: - npu_out.backward(npu_grad) - npu_args_grad = [] - for arg in npu_args: - if isinstance(arg, torch.Tensor): - npu_args_grad.append(arg.grad) - npu_grad_out = npu_args_grad - return grad_out, npu_grad_out, npu_out, out - - -def _run_ut_parser(parser): - parser.add_argument("-forward", "--forward_input_file", dest="forward_input_file", default="", - help=" The api param tool forward result file: generate from api param tool, " - "a json file.", - required=True) - parser.add_argument("-backward", "--backward_input_file", dest="backward_input_file", default="", - help=" The api param tool backward result file: generate from api param tool, " - "a json file.", - required=True) - parser.add_argument("-o", "--out_path", dest="out_path", default="", - help=" The ut task result out path.", - required=False) - parser.add_argument('-save_error_data', dest="save_error_data", action="store_true", - help=" Save compare failed api output.", required=False) - parser.add_argument("-c", "--jit_compile", dest="jit_compile", help=" whether to turn on jit compile", - default=True, required=False) - - -def _run_ut(): - parser = argparse.ArgumentParser() - _run_ut_parser(parser) - args = parser.parse_args(sys.argv[1:]) - if not args.jit_compile: - torch.npu.set_compile_mode(jit_compile=False) - forward_file = os.path.realpath(args.forward_input_file) - backward_file = os.path.realpath(args.backward_input_file) - if not forward_file.endswith(".json") or not backward_file.endswith(".json"): - raise ValueError("The forward_input_file and backward_input_file should be a json file!") - out_path = os.path.realpath(args.out_path) if args.out_path else "./" - save_error_data = args.save_error_data - run_ut(forward_file, backward_file, out_path, save_error_data) - - -if __name__ == '__main__': - _run_ut() +import argparse +import os +import sys +sys.path.append("..") +import yaml +import torch +from data_generate import gen_api_params, gen_args +from common.utils import print_info_log, print_warn_log, get_json_contents, check_need_convert +from compare.compare import Comparator + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "../hook_module/support_wrap_ops.yaml") +with open(yaml_path, 'r') as f: + WrapFunctionalOps = yaml.safe_load(f).get('functional') + +for f in dir(torch.nn.functional): + if f != "__name__": + locals().update({f: getattr(torch.nn.functional, f)}) + + +def exec_api(api_type, api_name, args, kwargs): + if api_type == "Functional": + out = eval(api_name)(*args, **kwargs) + if api_type == "Tensor": + out = getattr(torch._C._TensorBase, str(api_name))(*args, **kwargs) + if api_type == "Torch": + out = getattr(torch._C._VariableFunctionsClass, str(api_name))(*args, **kwargs) + return out + + +def generate_npu_params(cpu_args, cpu_kwargs, need_backward): + npu_args = [] + npu_kwargs = {} + if need_backward: + for arg_in in cpu_args: + arg_in = arg_to_npu(arg_in) + npu_args.append(arg_in) + for key, value in cpu_kwargs.items(): + value = arg_to_npu(value) + npu_kwargs[key] = value + else: + for arg_in in cpu_args: + if isinstance(arg_in, torch.Tensor): + arg_in = arg_in.clone().detach().to("npu") + npu_args.append(arg_in) + for key, value in cpu_kwargs.items(): + if isinstance(value, torch.Tensor): + value = value.clone().detach().to("npu") + npu_kwargs[key] = value + return npu_args, npu_kwargs + + +def arg_to_npu(arg_in): + if isinstance(arg_in, torch.Tensor) and arg_in.dtype in [torch.float, torch.float16, + torch.float64] and arg_in.requires_grad: + arg_in = arg_in.clone().detach().to("npu").requires_grad_() + elif isinstance(arg_in, torch.Tensor): + arg_in = arg_in.clone().detach().to("npu") + return arg_in + + +def run_ut(forward_file, backward_file, out_path, save_error_data): + print_info_log("start UT test") + forward_content = get_json_contents(forward_file) + backward_content = get_json_contents(backward_file) + api_setting_dict = get_json_contents("torch_ut_setting.json") + compare = Comparator(out_path) + for api_full_name, api_info_dict in forward_content.items(): + grad_out, npu_grad_out, npu_out, out = run_torch_api(api_full_name, api_setting_dict, backward_content, + api_info_dict) + compare.compare_output(api_full_name, out, npu_out, grad_out, npu_grad_out) + + compare.print_pretest_result() + compare.write_compare_csv() + + +def run_torch_api(api_full_name, api_setting_dict, backward_content, value): + [api_type, api_name, _] = api_full_name.split("*") + convert_type = check_need_convert(api_name) + args, kwargs = gen_api_params(value, api_name[-1] != "_", convert_type) + inplace = kwargs.get("inplace") if kwargs.get("inplace") else None + need_backward = api_full_name in backward_content and api_name[-1] != "_" and inplace is not True + if inplace or api_name[-1] == "_": + print_warn_log("%s involves in-place operations, skip backward" % api_full_name) + npu_args, npu_kwargs = generate_npu_params(args, kwargs, need_backward) + grad_out, npu_grad_out = None, None + out = exec_api(api_type, api_name, args, kwargs) + npu_out = exec_api(api_type, api_name, npu_args, npu_kwargs) + grad_input_index = api_setting_dict.get(api_name) + grad_index = None + if grad_input_index is not None: + grad_index = grad_input_index.get('grad_index') + + if need_backward: + backward_args = backward_content[api_full_name] + grad = gen_args(backward_args)[0] + if grad_index is not None: + out[grad_index].backward(grad) + elif isinstance(out, (list, tuple)): + raise NotImplementedError("Multiple backward is not supported.") + else: + out.backward(grad) + args_grad = [] + for arg in args: + if isinstance(arg, torch.Tensor): + args_grad.append(arg.grad) + grad_out = args_grad + + npu_grad = grad.clone().detach().npu() + if grad_index is not None: + npu_out[grad_index].backward(npu_grad) + else: + npu_out.backward(npu_grad) + npu_args_grad = [] + for arg in npu_args: + if isinstance(arg, torch.Tensor): + npu_args_grad.append(arg.grad) + npu_grad_out = npu_args_grad + return grad_out, npu_grad_out, npu_out, out + + +def _run_ut_parser(parser): + parser.add_argument("-forward", "--forward_input_file", dest="forward_input_file", default="", + help=" The api param tool forward result file: generate from api param tool, " + "a json file.", + required=True) + parser.add_argument("-backward", "--backward_input_file", dest="backward_input_file", default="", + help=" The api param tool backward result file: generate from api param tool, " + "a json file.", + required=True) + parser.add_argument("-o", "--out_path", dest="out_path", default="", + help=" The ut task result out path.", + required=False) + parser.add_argument('-save_error_data', dest="save_error_data", action="store_true", + help=" Save compare failed api output.", required=False) + parser.add_argument("-c", "--jit_compile", dest="jit_compile", help=" whether to turn on jit compile", + default=True, required=False) + + +def _run_ut(): + parser = argparse.ArgumentParser() + _run_ut_parser(parser) + args = parser.parse_args(sys.argv[1:]) + if not args.jit_compile: + torch.npu.set_compile_mode(jit_compile=False) + forward_file = os.path.realpath(args.forward_input_file) + backward_file = os.path.realpath(args.backward_input_file) + if not forward_file.endswith(".json") or not backward_file.endswith(".json"): + raise ValueError("The forward_input_file and backward_input_file should be a json file!") + out_path = os.path.realpath(args.out_path) if args.out_path else "./" + save_error_data = args.save_error_data + run_ut(forward_file, backward_file, out_path, save_error_data) + + +if __name__ == '__main__': + _run_ut() print_info_log("UT task completed.") \ No newline at end of file -- Gitee