diff --git a/debug/accuracy_tools/api_accuracy_checker/common/base_api.py b/debug/accuracy_tools/api_accuracy_checker/common/base_api.py deleted file mode 100644 index 4d1ebff9198f1707bf76555801fd4df154ca8d98..0000000000000000000000000000000000000000 --- a/debug/accuracy_tools/api_accuracy_checker/common/base_api.py +++ /dev/null @@ -1,134 +0,0 @@ -import os -import torch -from api_accuracy_checker.common.utils import print_error_log, write_pt, create_directory -from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import check_path_before_create -from api_accuracy_checker.common.config import msCheckerConfig - - -class BaseAPIInfo: - def __init__(self, api_name, is_forward, is_save_data, save_path, forward_path, backward_path): - self.rank = os.getpid() - self.api_name = api_name - self.torch_object_key = {'device': self.analyze_device_in_kwargs, 'dtype': self.analyze_dtype_in_kwargs} - self.is_forward = is_forward - self.args_num = 0 - self.is_save_data = is_save_data - self.save_path = save_path - self.forward_path = forward_path - self.backward_path = backward_path - - def analyze_element(self, element): - if isinstance(element, (list, tuple)): - out = [] - for item in element: - out.append(self.analyze_element(item)) - return out - - if isinstance(element, dict): - out = {} - for key, value in element.items(): - if key in self.torch_object_key.keys(): - fun = self.torch_object_key[key] - out[key] = fun(value) - else: - out[key] = self.analyze_element(value) - return out - - if isinstance(element, torch.Tensor): - return self.analyze_tensor(element) - - if self.is_builtin_class(element): - return self.analyze_builtin(element) - - msg = f"Type {type(element)} is unsupported at analyze_element" - print_error_log(msg) - raise NotImplementedError(msg) - - def analyze_tensor(self, arg): - single_arg = {} - if not self.is_save_data: - single_arg.update({'type': 'torch.Tensor'}) - single_arg.update({'dtype': str(arg.dtype)}) - single_arg.update({'shape': arg.shape}) - single_arg.update({'Max': self.transfer_types(self.get_tensor_extremum(arg, 'max'), str(arg.dtype))}) - single_arg.update({'Min': self.transfer_types(self.get_tensor_extremum(arg, 'min'), str(arg.dtype))}) - single_arg.update({'requires_grad': arg.requires_grad}) - else: - api_args = self.api_name + '.' + str(self.args_num) - from api_accuracy_checker.dump.dump import DumpUtil - step_dir = "step" + str(DumpUtil.call_num - 1 if msCheckerConfig.enable_dataloader else DumpUtil.call_num) - rank_dir = f"rank{self.rank}" - if self.is_forward: - forward_real_data_path = os.path.join(self.save_path, step_dir, self.forward_path, rank_dir) - check_path_before_create(forward_real_data_path) - create_directory(forward_real_data_path) - file_path = os.path.join(forward_real_data_path, f'{api_args}.pt') - else: - backward_real_data_path = os.path.join(self.save_path, step_dir, self.backward_path, rank_dir) - check_path_before_create(backward_real_data_path) - create_directory(backward_real_data_path) - file_path = os.path.join(backward_real_data_path, f'{api_args}.pt') - self.args_num += 1 - pt_path = write_pt(file_path, arg.contiguous().cpu().detach()) - single_arg.update({'type': 'torch.Tensor'}) - single_arg.update({'datapath': pt_path}) - single_arg.update({'requires_grad': arg.requires_grad}) - return single_arg - - def analyze_builtin(self, arg): - single_arg = {} - if self.is_save_data: - self.args_num += 1 - if isinstance(arg, slice): - single_arg.update({'type': "slice"}) - single_arg.update({'value': [arg.start, arg.stop, arg.step]}) - else: - single_arg.update({'type': self.get_type_name(str(type(arg)))}) - single_arg.update({'value': arg}) - return single_arg - - def transfer_types(self, data, dtype): - if 'int' in dtype or 'bool' in dtype: - return int(data) - else: - return float(data) - - def is_builtin_class(self, element): - if element is None or isinstance(element, (bool, int, float, str, slice)): - return True - return False - - def analyze_device_in_kwargs(self, element): - single_arg = {} - single_arg.update({'type': 'torch.device'}) - if not isinstance(element, str): - if hasattr(element, "index"): - device_value = element.type + ":" + str(element.index) - single_arg.update({'value': device_value}) - else: - device_value = element.type - else: - single_arg.update({'value': element}) - return single_arg - - def analyze_dtype_in_kwargs(self, element): - single_arg = {} - single_arg.update({'type': 'torch.dtype'}) - single_arg.update({'value': str(element)}) - return single_arg - - def get_tensor_extremum(self, data, operator): - if data.dtype is torch.bool: - if operator == 'max': - return True in data - elif operator == 'min': - return False not in data - if operator == 'max': - return torch._C._VariableFunctionsClass.max(data.float()).item() - else: - return torch._C._VariableFunctionsClass.min(data.float()).item() - - def get_type_name(self, name): - left = name.index("'") - right = name.rindex("'") - return name[left + 1: right] diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index 03bbc954bd4f2336281fe1109b6252b6f2a3842c..e5a6b711004f4b2016cd30d28cdd3e4e15ac93ec 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -86,7 +86,6 @@ class Const: API_STACK = "api_stack" DUMP_MODE = [ALL, LIST, RANGE, STACK, ACL, API_LIST, API_STACK] - API_PATTERN = r"^[A-Za-z0-9]+[_]+([A-Za-z0-9]+[_]*[A-Za-z0-9]+)[_]+[0-9]+[_]+[A-Za-z0-9]+" WRITE_FLAGS = os.O_WRONLY | os.O_CREAT WRITE_MODES = stat.S_IWUSR | stat.S_IRUSR @@ -361,12 +360,6 @@ def get_dump_data_path(dump_dir): return dump_data_path, file_is_exist -def get_api_name_from_matcher(name): - api_matcher = re.compile(Const.API_PATTERN) - match = api_matcher.match(name) - return match.group(1) if match else "" - - def modify_dump_path(dump_path, mode): if mode == Const.ALL: return dump_path diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index ca0d4021a5cfd09a72cabaa993f1baf27aa41112..adb0c4b0f34215b93c2bcbcce1d8bbd4bdac877b 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -1,45 +1,177 @@ # 定义API INFO,保存基本信息,用于后续结构体的落盘,注意考虑random场景及真实数据场景 +import os import inspect +import torch from api_accuracy_checker.common.config import msCheckerConfig -from api_accuracy_checker.common.base_api import BaseAPIInfo +from api_accuracy_checker.common.utils import print_error_log, write_pt, create_directory, DumpException +from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import check_path_before_create -class APIInfo(BaseAPIInfo): - def __init__(self, api_name, is_forward, is_save_data, save_path, forward_path='forward_real_data', - backward_path='backward_real_data'): - super().__init__(api_name, is_forward, is_save_data, save_path, forward_path, backward_path) +def get_tensor_extremum(data, operator): + if data.dtype is torch.bool: + if operator == 'max': + return True in data + elif operator == 'min': + return False not in data + if operator == 'max': + return torch._C._VariableFunctionsClass.max(data.float()).item() + else: + return torch._C._VariableFunctionsClass.min(data.float()).item() + + +def get_type_name(name): + left = name.index("'") + right = name.rindex("'") + return name[left + 1: right] + + +def transfer_types(data, dtype): + if 'int' in dtype or 'bool' in dtype: + return int(data) + else: + return float(data) + + +def is_builtin_class(element): + return element is None or isinstance(element, (bool, int, float, str, slice)) + + +def analyze_device_in_kwargs(element): + single_arg = {} + single_arg.update({'type': 'torch.device'}) + if not isinstance(element, str): + if hasattr(element, "index"): + device_value = element.type + ":" + str(element.index) + else: + device_value = element.type + single_arg.update({'value': device_value}) + else: + single_arg.update({'value': element}) + return single_arg + + +def analyze_dtype_in_kwargs(element): + single_arg = {} + single_arg.update({'type': 'torch.dtype'}) + single_arg.update({'value': str(element)}) + return single_arg + + +class APIInfo: + def __init__(self, api_name, save_path, is_save_data=False): + self.api_name = api_name + self.torch_object_key = {'device': analyze_device_in_kwargs, 'dtype': analyze_dtype_in_kwargs} + self.rank = os.getpid() + self.is_save_data = is_save_data + self.save_path = save_path + self.args_num = 0 + + @staticmethod + def get_full_save_path(save_path, dir_name, contain_step=False): + if contain_step: + from api_accuracy_checker.dump.dump import DumpUtil + step_dir = "step" + str(DumpUtil.call_num - 1 if msCheckerConfig.enable_dataloader else DumpUtil.call_num) + rank_dir = f"rank{os.getpid()}" + return os.path.join(save_path, step_dir, dir_name, rank_dir) + else: + return os.path.join(save_path, dir_name) + + def analyze_element(self, element): + if isinstance(element, (list, tuple)): + out = [] + for item in element: + out.append(self.analyze_element(item)) + return out + + if isinstance(element, dict): + out_dict = {} + for key, value in element.items(): + if key in self.torch_object_key.keys(): + fun = self.torch_object_key[key] + out_dict[key] = fun(value) + else: + out_dict[key] = self.analyze_element(value) + return out_dict + + if isinstance(element, torch.Tensor): + return self._analyze_tensor(element) + + if is_builtin_class(element): + return self._analyze_builtin(element) + + msg = f"Type {type(element)} is unsupported at analyze_element" + print_error_log(msg) + raise DumpException(DumpException.INVALID_DATA_ERROR) + + def _analyze_tensor(self, arg): + single_arg = {} + if not self.is_save_data: + single_arg.update({'type': 'torch.Tensor'}) + single_arg.update({'dtype': str(arg.dtype)}) + single_arg.update({'shape': arg.shape}) + single_arg.update({'Max': transfer_types(get_tensor_extremum(arg, 'max'), str(arg.dtype))}) + single_arg.update({'Min': transfer_types(get_tensor_extremum(arg, 'min'), str(arg.dtype))}) + single_arg.update({'requires_grad': arg.requires_grad}) + else: + api_args = self.api_name + '.' + str(self.args_num) + check_path_before_create(self.save_path) + create_directory(self.save_path) + file_path = os.path.join(self.save_path, f'{api_args}.pt') + pt_path = write_pt(file_path, arg.contiguous().cpu().detach()) + self.args_num += 1 + single_arg.update({'type': 'torch.Tensor'}) + single_arg.update({'datapath': pt_path}) + single_arg.update({'requires_grad': arg.requires_grad}) + return single_arg + + def _analyze_builtin(self, arg): + single_arg = {} + if self.is_save_data: + self.args_num += 1 + if isinstance(arg, slice): + single_arg.update({'type': "slice"}) + single_arg.update({'value': [arg.start, arg.stop, arg.step]}) + else: + single_arg.update({'type': get_type_name(str(type(arg)))}) + single_arg.update({'value': arg}) + return single_arg class ForwardAPIInfo(APIInfo): def __init__(self, name, args, kwargs): - super().__init__(name, is_forward=True, is_save_data=msCheckerConfig.real_data, - save_path=msCheckerConfig.dump_path) + super().__init__(name, + self.get_full_save_path(msCheckerConfig.dump_path, 'forward_real_data', contain_step=True), + is_save_data=msCheckerConfig.real_data) + self.api_info_struct = {} + self.stack_info_struct = {} self.analyze_api_input(args, kwargs) - self.analyze_api_call_stack() - + self.analyze_api_call_stack() + def analyze_api_input(self, args, kwargs): args_info_list = self.analyze_element(args) kwargs_info_dict = self.analyze_element(kwargs) - self.api_info_struct = {self.api_name: {"args":args_info_list, "kwargs":kwargs_info_dict}} + self.api_info_struct = {self.api_name: {"args": args_info_list, "kwargs": kwargs_info_dict}} def analyze_api_call_stack(self): stack_str = [] for (_, path, line, func, code, _) in inspect.stack()[3:]: - if not code: + if not code: continue stack_line = " ".join([ "File", ", ".join([path, " ".join(["line", str(line)]), " ".join(["in", func]), - " ".join(["\n", code[0].strip()])])]) + " ".join(["\n", code[0].strip()])])]) stack_str.append(stack_line) self.stack_info_struct = {self.api_name: stack_str} - + class BackwardAPIInfo(APIInfo): def __init__(self, name, grads): - super().__init__(name, is_forward=False, is_save_data=msCheckerConfig.real_data, - save_path=msCheckerConfig.dump_path) + super().__init__(name, + self.get_full_save_path(msCheckerConfig.dump_path, 'backward_real_data', contain_step=True), + is_save_data=msCheckerConfig.real_data) + self.grad_info_struct = {} self.analyze_api_input(grads) - + def analyze_api_input(self, grads): grads_info_list = self.analyze_element(grads) - self.grad_info_struct = {self.api_name:grads_info_list} + self.grad_info_struct = {self.api_name: grads_info_list} diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 7097f95ed85d774509c95b0d91d3ddf12e839402..cf9bfe29bdfd054db96ae878813c099892430bbb 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -22,9 +22,10 @@ from api_accuracy_checker.compare.compare import Comparator from api_accuracy_checker.hook_module.wrap_tensor import TensorOPTemplate from api_accuracy_checker.hook_module.wrap_functional import FunctionalOPTemplate from api_accuracy_checker.hook_module.wrap_torch import TorchOPTemplate -from api_accuracy_checker.run_ut.ut_api_info import UtAPIInfo from api_accuracy_checker.common.config import msCheckerConfig from api_accuracy_checker.compare.compare_utils import CompareConst +from api_accuracy_checker.dump.api_info import APIInfo + from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileOpen, FileCheckConst, FileChecker, \ change_mode, check_file_suffix, check_link @@ -174,12 +175,12 @@ def do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) if not is_fwd_success or not is_bwd_success: api_full_name = api_full_name.replace("*", ".") for element in data_info.in_fwd_data_list: - UtAPIInfo(api_full_name + '.forward.input', element, UT_ERROR_DATA_DIR) - UtAPIInfo(api_full_name + '.forward.output.bench', data_info.bench_out, UT_ERROR_DATA_DIR) - UtAPIInfo(api_full_name + '.forward.output.device', data_info.device_out, UT_ERROR_DATA_DIR) - UtAPIInfo(api_full_name + '.backward.input', data_info.grad_in, UT_ERROR_DATA_DIR) - UtAPIInfo(api_full_name + '.backward.output.bench', data_info.bench_grad_out, UT_ERROR_DATA_DIR) - UtAPIInfo(api_full_name + '.backward.output.device', data_info.device_grad_out, UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.forward.input', element) + UtAPIInfo(api_full_name + '.forward.output.bench', data_info.bench_out) + UtAPIInfo(api_full_name + '.forward.output.device', data_info.device_out) + UtAPIInfo(api_full_name + '.backward.input', data_info.grad_in) + UtAPIInfo(api_full_name + '.backward.output.bench', data_info.bench_grad_out) + UtAPIInfo(api_full_name + '.backward.output.device', data_info.device_grad_out) def run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_dict): @@ -399,6 +400,14 @@ class UtDataInfo: self.in_fwd_data_list = in_fwd_data_list +class UtAPIInfo(APIInfo): + def __init__(self, api_name, element): + super().__init__(api_name, + save_path=self.get_full_save_path(msCheckerConfig.error_data_path, UT_ERROR_DATA_DIR), + is_save_data=True) + self.analyze_element(element) + + if __name__ == '__main__': _run_ut() print_info_log("UT task completed.") diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/ut_api_info.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/ut_api_info.py deleted file mode 100644 index 7d345ac0ab8c7d40f6663739c9363f75e06886be..0000000000000000000000000000000000000000 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/ut_api_info.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from api_accuracy_checker.common.config import msCheckerConfig -from api_accuracy_checker.common.base_api import BaseAPIInfo -from api_accuracy_checker.common.utils import write_pt, create_directory -from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import check_path_before_create - - -class UtAPIInfo(BaseAPIInfo): - def __init__(self, api_name, element, ut_error_data_dir): - super().__init__(api_name, True, True, msCheckerConfig.error_data_path, '', '') - self.ut_error_data_dir = ut_error_data_dir - self.analyze_element(element) - - def analyze_tensor(self, arg): - single_arg = {} - api_args = self.api_name + '.' + str(self.args_num) - ut_error_data_path = os.path.join(self.save_path, self.ut_error_data_dir) - check_path_before_create(ut_error_data_path) - create_directory(ut_error_data_path) - file_path = os.path.join(ut_error_data_path, f'{api_args}.pt') - self.args_num += 1 - pt_path = write_pt(file_path, arg.contiguous().cpu().detach()) - single_arg.update({'type': 'torch.Tensor'}) - single_arg.update({'datapath': pt_path}) - single_arg.update({'requires_grad': arg.requires_grad}) - return single_arg diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_base_api.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_base_api.py deleted file mode 100644 index 6ff6d65c48205c98bb80e8e01e28f2e19ee2bdfb..0000000000000000000000000000000000000000 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_base_api.py +++ /dev/null @@ -1,66 +0,0 @@ -import unittest -import torch -import os -import shutil -from api_accuracy_checker.common.base_api import BaseAPIInfo - -class TestBaseAPI(unittest.TestCase): - def setUp(self): - if os.path.exists('./forward'): - shutil.rmtree('./forward') - os.makedirs('./forward', mode=0o755) - self.api = BaseAPIInfo("test_api", True, True, "./", "forward", "backward") - - def test_analyze_element(self): - element = [1, 2, 3] - result = self.api.analyze_element(element) - self.assertEqual(result, [{'type': 'int', 'value': 1}, {'type': 'int', 'value': 2}, {'type': 'int', 'value': 3}]) - - def test_analyze_tensor(self): - tensor = torch.tensor([1, 2, 3], dtype=torch.float32, requires_grad=True) - result = self.api.analyze_tensor(tensor) - self.assertEqual(result.get('type'), 'torch.Tensor') - self.assertTrue(result.get('requires_grad')) - self.assertTrue(os.path.exists(result.get('datapath'))) - - def test_analyze_builtin(self): - arg = slice(1, 10, 2) - result = self.api.analyze_builtin(arg) - self.assertEqual(result, {'type': 'slice', 'value': [1, 10, 2]}) - - def test_transfer_types(self): - data = 10 - dtype = 'int' - result = self.api.transfer_types(data, dtype) - self.assertEqual(result, 10) - - def test_is_builtin_class(self): - element = 10 - result = self.api.is_builtin_class(element) - self.assertEqual(result, True) - - def test_analyze_device_in_kwargs(self): - element = torch.device('cuda:0') - result = self.api.analyze_device_in_kwargs(element) - self.assertEqual(result, {'type': 'torch.device', 'value': 'cuda:0'}) - - def test_analyze_dtype_in_kwargs(self): - element = torch.float32 - result = self.api.analyze_dtype_in_kwargs(element) - self.assertEqual(result, {'type': 'torch.dtype', 'value': 'torch.float32'}) - - def test_get_tensor_extremum(self): - data = torch.tensor([1, 2, 3]) - result_max = self.api.get_tensor_extremum(data, 'max') - result_min = self.api.get_tensor_extremum(data, 'min') - self.assertEqual(result_max, 3) - self.assertEqual(result_min, 1) - - def test_get_type_name(self): - name = "" - result = self.api.get_type_name(name) - self.assertEqual(result, 'int') - - def tearDown(self): - if os.path.exists('./forward'): - shutil.rmtree('./forward') \ No newline at end of file diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_common_utils.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_common_utils.py index f8bae77bcc632d6f06519e52b29f4d6ab02c1448..ea1465473cdb868cf602bcf7b3ba1060c8616af4 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_common_utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_common_utils.py @@ -50,10 +50,6 @@ class TestUtils(unittest.TestCase): path, exist = get_dump_data_path(os.path.dirname(__file__)) self.assertTrue(exist) - def test_get_api_name_from_matcher(self): - api_name = get_api_name_from_matcher("api_stack_1_add") - self.assertEqual(api_name, "stack") - def test_create_directory(self): create_directory('test_dir') self.assertTrue(os.path.exists('test_dir')) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py index 5ed5cd64452a11e7cf1cfda9b9f0b775eb011f8e..fb0511b8e2e7e3a93934e93791d2e51c527cc20a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py @@ -1,29 +1,87 @@ +import os +import shutil import unittest -from api_accuracy_checker.dump.api_info import APIInfo, ForwardAPIInfo, BackwardAPIInfo +import torch +from api_accuracy_checker.dump.api_info import APIInfo, ForwardAPIInfo, BackwardAPIInfo, transfer_types, \ + get_tensor_extremum, get_type_name, is_builtin_class, analyze_device_in_kwargs, analyze_dtype_in_kwargs from api_accuracy_checker.common.config import msCheckerConfig + class TestAPIInfo(unittest.TestCase): - def test_APIInfo(self): - api_info = APIInfo("test_api", True, True, "save_path") - self.assertEqual(api_info.api_name, "test_api") - self.assertEqual(api_info.is_forward, True) - self.assertEqual(api_info.is_save_data, True) - self.assertEqual(api_info.save_path, "save_path") - self.assertEqual(api_info.forward_path, "forward_real_data") - self.assertEqual(api_info.backward_path, "backward_real_data") + def setUp(self): + if os.path.exists('./step-1'): + shutil.rmtree('./step-1') + self.api = APIInfo("test_api", APIInfo.get_full_save_path("./", "forward", True), True) + + def test_analyze_element(self): + element = [1, 2, 3] + result = self.api.analyze_element(element) + self.assertEqual(result, + [{'type': 'int', 'value': 1}, {'type': 'int', 'value': 2}, {'type': 'int', 'value': 3}]) + + def test_analyze_tensor(self): + tensor = torch.tensor([1, 2, 3], dtype=torch.float32, requires_grad=True) + result = self.api._analyze_tensor(tensor) + self.assertEqual(result.get('type'), 'torch.Tensor') + self.assertTrue(result.get('requires_grad')) + self.assertTrue(os.path.exists(result.get('datapath'))) + + def test_analyze_builtin(self): + arg = slice(1, 10, 2) + result = self.api._analyze_builtin(arg) + self.assertEqual(result, {'type': 'slice', 'value': [1, 10, 2]}) + + def test_transfer_types(self): + data = 10 + dtype = 'int' + result = transfer_types(data, dtype) + self.assertEqual(result, 10) + + def test_is_builtin_class(self): + element = 10 + result = is_builtin_class(element) + self.assertTrue(result) + + def test_analyze_device_in_kwargs(self): + element = torch.device('cuda:0') + result = analyze_device_in_kwargs(element) + self.assertEqual(result, {'type': 'torch.device', 'value': 'cuda:0'}) + + def test_analyze_dtype_in_kwargs(self): + element = torch.float32 + result = analyze_dtype_in_kwargs(element) + self.assertEqual(result, {'type': 'torch.dtype', 'value': 'torch.float32'}) + + def test_get_tensor_extremum(self): + data = torch.tensor([1, 2, 3]) + result_max = get_tensor_extremum(data, 'max') + result_min = get_tensor_extremum(data, 'min') + self.assertEqual(result_max, 3) + self.assertEqual(result_min, 1) + + def test_get_type_name(self): + name = "" + result = get_type_name(name) + self.assertEqual(result, 'int') def test_ForwardAPIInfo(self): forward_api_info = ForwardAPIInfo("test_forward_api", [1, 2, 3], {"a": 1, "b": 2}) self.assertEqual(forward_api_info.api_name, "test_forward_api") - self.assertEqual(forward_api_info.is_forward, True) - self.assertEqual(forward_api_info.is_save_data, msCheckerConfig.real_data) - self.assertEqual(forward_api_info.save_path, msCheckerConfig.dump_path) - self.assertEqual(forward_api_info.api_info_struct, {"test_forward_api": {"args": [{'type': 'int', 'value': 1},{'type': 'int', 'value': 2},{'type': 'int', 'value': 3},], "kwargs": {'a': {'type': 'int', 'value': 1}, 'b': {'type': 'int', 'value': 2}}}}) + self.assertEqual(forward_api_info.save_path, + APIInfo.get_full_save_path(msCheckerConfig.dump_path, 'forward_real_data', True)) + self.assertEqual(forward_api_info.api_info_struct, {"test_forward_api": { + "args": [{'type': 'int', 'value': 1}, {'type': 'int', 'value': 2}, {'type': 'int', 'value': 3}, ], + "kwargs": {'a': {'type': 'int', 'value': 1}, 'b': {'type': 'int', 'value': 2}}}}) def test_BackwardAPIInfo(self): backward_api_info = BackwardAPIInfo("test_backward_api", [1, 2, 3]) self.assertEqual(backward_api_info.api_name, "test_backward_api") - self.assertEqual(backward_api_info.is_forward, False) - self.assertEqual(backward_api_info.is_save_data, msCheckerConfig.real_data) - self.assertEqual(backward_api_info.save_path, msCheckerConfig.dump_path) - self.assertEqual(backward_api_info.grad_info_struct, {"test_backward_api": [{'type': 'int', 'value': 1},{'type': 'int', 'value': 2},{'type': 'int', 'value': 3}]}) + self.assertEqual(backward_api_info.save_path, + APIInfo.get_full_save_path(msCheckerConfig.dump_path, 'backward_real_data', True)) + self.assertEqual(backward_api_info.grad_info_struct, { + "test_backward_api": [{'type': 'int', 'value': 1}, {'type': 'int', 'value': 2}, + {'type': 'int', 'value': 3}]}) + + def tearDown(self): + if os.path.exists('./step-1'): + shutil.rmtree('./step-1') diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_info_dump.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_info_dump.py index 0dd0898c167704a72a430f4779b13ce87e5dc9f4..06257422e8b181060f9ea1d363810b1df769cbc6 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_info_dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_info_dump.py @@ -1,10 +1,9 @@ import unittest import os -import fcntl from unittest.mock import patch -from api_accuracy_checker.dump.api_info import APIInfo, ForwardAPIInfo, BackwardAPIInfo -from api_accuracy_checker.dump.info_dump import write_api_info_json, write_json, initialize_output_json -from api_accuracy_checker.common.utils import check_file_or_directory_path, initialize_save_path +from api_accuracy_checker.dump.api_info import APIInfo, BackwardAPIInfo +from api_accuracy_checker.dump.info_dump import write_api_info_json + class TestInfoDump(unittest.TestCase): @@ -16,13 +15,14 @@ class TestInfoDump(unittest.TestCase): mock_write_json.assert_called_with(f'./step2/backward_info_{rank}.json', api_info.grad_info_struct) def test_write_api_info_json_invalid_type(self): - api_info = APIInfo("test_api", True, True, "save_path") + api_info = APIInfo("test_api", APIInfo.get_full_save_path("save_path", "forward_real_data", contain_step=True), + is_save_data=True) with self.assertRaises(ValueError): write_api_info_json(api_info) - + def tearDown(self): rank = os.getpid() files = [f'./step2/backward_info_{rank}.json'] for file in files: if os.path.exists(file): - os.remove(file) + os.remove(file) \ No newline at end of file diff --git a/debug/accuracy_tools/ptdbg_ascend/CMakeLists.txt b/debug/accuracy_tools/ptdbg_ascend/CMakeLists.txt index 13cf18f7fd7116687241c79d53e1b6dc2d719d4d..3cb2357463afed702d76c61bf88c2d35a1705cc0 100644 --- a/debug/accuracy_tools/ptdbg_ascend/CMakeLists.txt +++ b/debug/accuracy_tools/ptdbg_ascend/CMakeLists.txt @@ -16,4 +16,4 @@ add_custom_target(ptdbg_ascend ALL VERBATIM ) -install(CODE "execute_process(COMMAND ${PYTHON_BIN_PATH} -m pip install ${CMAKE_BINARY_DIR}/ptdbg_ascend/dist/ptdbg_ascend-4.0.T2-py3-none-any.whl --upgrade)") +install(CODE "execute_process(COMMAND ${PYTHON_BIN_PATH} -m pip install ${CMAKE_BINARY_DIR}/ptdbg_ascend/dist/ptdbg_ascend-5.0.T1-py3-none-any.whl --upgrade)") diff --git a/debug/accuracy_tools/ptdbg_ascend/README.md b/debug/accuracy_tools/ptdbg_ascend/README.md index 80a9da216cbcaff177e64f6fd340493cf3d1b53e..f9ca19daf7d99f4967d383a82fc95146ea449908 100644 --- a/debug/accuracy_tools/ptdbg_ascend/README.md +++ b/debug/accuracy_tools/ptdbg_ascend/README.md @@ -10,8 +10,7 @@ | ptdbg_ascend版本 | 发布日期 | 支持PyTorch版本 | 下载链接 | 参考指南 | 校验码 | | ---------------- | ---------- | -------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | - | 4.0.T2 | 2023-11-29 | 1.11.0/2.0/2.1 | [ptdbg_ascend-4.0.T2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/4.0/ptdbg_ascend-4.0.T2-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v4.0.T2](doc/ptdbg_ascend精度工具功能说明_v4.0.T2.md) | d7d8a5e6e75b488e5b476532546bbef8af9e5ac0988e00a0d09118d7854d66ea | - | 4.0.T1 | 2023-11-09 | 1.11.0/2.0/2.1 | [ptdbg_ascend-4.0.T1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/4.0/ptdbg_ascend-4.0.T1-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v4.0.T1](doc/ptdbg_ascend精度工具功能说明_v4.0.T1.md) | 2a3dd23c9573fbd6b78c128cbfcd9f2fdf09b66d0ca5619e4095781d3ba5761c | + | 4.0 | 2023-11-23 | 1.11.0/2.0/2.1 | [ptdbg_ascend-4.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/4.0/ptdbg_ascend-4.0-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v4.0](doc/ptdbg_ascend精度工具功能说明_v4.0.md) | ba7ff7a1acffb1a2fab02fea76b6f957b2868bc6b66d72365622f6a8950406c6 | | 3.0 | 2023-10-16 | 1.8.1/1.11.0/2.0/2.1 | [ptdbg_ascend-3.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/3.0/ptdbg_ascend-3.0-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v3.0](doc/ptdbg_ascend精度工具功能说明_v3.0.md) | eb177ec795f8ae8b0c937a3cf543914f535bb64c76ba2e520fc6f0456ff6740b | | 2.0 | 2023-7-07 | 1.8.1/1.11.0/2.0 | [ptdbg_ascend-2.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/2.0/ptdbg_ascend-2.0-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v2.0](doc/ptdbg_ascend精度工具功能说明_v2.0.md) | 85e046f133f0f40ed660337ce8207249b1dac47ac668910625bea49809f31d66 | | 1.0 | 2023-3-30 | 1.8.1/1.11.0 | [ptdbg_ascend-1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/1.0/ptdbg_ascend-1.0-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v1.0](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend%E7%B2%BE%E5%BA%A6%E5%B7%A5%E5%85%B7%E5%8A%9F%E8%83%BD%E8%AF%B4%E6%98%8E_v1.0.md) | 0559e12ba7accf80d182f227698163ee0de88bf86b1e9cd9f33b16fdead14759 | @@ -118,8 +117,7 @@ ptdbg_ascend精度工具的安装方式包括:**下载whl包安装**和**源 | ptdbg_ascend版本 | 发布日期 | 支持PyTorch版本 | 下载链接 | 参考指南 | 校验码 | | ---------------- | ---------- | -------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | - | 4.0.T2 | 2023-11-29 | 1.11.0/2.0/2.1 | [ptdbg_ascend-4.0.T2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/4.0/ptdbg_ascend-4.0.T2-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v4.0.T2](doc/ptdbg_ascend精度工具功能说明_v4.0.T2.md) | d7d8a5e6e75b488e5b476532546bbef8af9e5ac0988e00a0d09118d7854d66ea | - | 4.0.T1 | 2023-11-09 | 1.11.0/2.0/2.1 | [ptdbg_ascend-4.0.T1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/4.0/ptdbg_ascend-4.0.T1-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v4.0.T1](doc/ptdbg_ascend精度工具功能说明_v4.0.T1.md) | 2a3dd23c9573fbd6b78c128cbfcd9f2fdf09b66d0ca5619e4095781d3ba5761c | + | 4.0 | 2023-11-23 | 1.11.0/2.0/2.1 | [ptdbg_ascend-4.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/4.0/ptdbg_ascend-4.0-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v4.0](doc/ptdbg_ascend精度工具功能说明_v4.0.md) | ba7ff7a1acffb1a2fab02fea76b6f957b2868bc6b66d72365622f6a8950406c6 | | 3.0 | 2023-10-16 | 1.8.1/1.11.0/2.0/2.1 | [ptdbg_ascend-3.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/3.0/ptdbg_ascend-3.0-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v3.0](doc/ptdbg_ascend精度工具功能说明_v3.0.md) | eb177ec795f8ae8b0c937a3cf543914f535bb64c76ba2e520fc6f0456ff6740b | | 2.0 | 2023-7-07 | 1.8.1/1.11.0/2.0 | [ptdbg_ascend-2.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/2.0/ptdbg_ascend-2.0-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v2.0](doc/ptdbg_ascend精度工具功能说明_v2.0.md) | 85e046f133f0f40ed660337ce8207249b1dac47ac668910625bea49809f31d66 | | 1.0 | 2023-3-30 | 1.8.1/1.11.0 | [ptdbg_ascend-1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/package/ptdbg_ascend/1.0/ptdbg_ascend-1.0-py3-none-any.whl) | [ptdbg_ascend精度工具功能说明_v1.0](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend精度工具功能说明_v1.0.md) | 0559e12ba7accf80d182f227698163ee0de88bf86b1e9cd9f33b16fdead14759 | @@ -218,6 +216,7 @@ ptdbg_ascend精度工具的安装方式包括:**下载whl包安装**和**源 5. 执行编译。 ```bash + # 请安装65.0.0及以下版本的setuptools,例如pip install setuptools==65.0.0 make ``` diff --git a/debug/accuracy_tools/ptdbg_ascend/RELEASE.md b/debug/accuracy_tools/ptdbg_ascend/RELEASE.md index a875d954a2a59cdbd69276a95f8ced51f5a51891..8c2baf0f358d9ca2e8d0d794181e21591d1aa956 100644 --- a/debug/accuracy_tools/ptdbg_ascend/RELEASE.md +++ b/debug/accuracy_tools/ptdbg_ascend/RELEASE.md @@ -1,4 +1,4 @@ -# Release 4.0.T2 +# Release 5.0.T1 This is the initial release of Pytorch precision compare tools which was designed by the researchers and engineers in Huawei Technologies Co.,Ltd. \ No newline at end of file diff --git a/debug/accuracy_tools/ptdbg_ascend/doc/FAQ.md b/debug/accuracy_tools/ptdbg_ascend/doc/FAQ.md index 83274ec35440aa14884f8fe1058d350492022a7e..07f72654a07d7ef4ba395b19cf33a78f0496d099 100644 --- a/debug/accuracy_tools/ptdbg_ascend/doc/FAQ.md +++ b/debug/accuracy_tools/ptdbg_ascend/doc/FAQ.md @@ -26,15 +26,11 @@ def npu_forward_fused_softmax(self, input_, mask): 会,同一个目录多次dump,会覆盖上一次结果,可以使用dump_tag参数修改dump目录名称。 -### 2. 一个网络中包含多个model,register hook中传入哪一个model? - -传入任意一个model即可,工具会自动dump所有model。 - -### 3. 如何dump算子级的数据? +### 2. 如何dump算子级的数据? 需要使用acl dump模式,即在dump操作中配置mode="acl"或dump_mode='acl'。 -### 4. 工具比对发现NPU和标杆数据的API无法完全对齐? +### 3. 工具比对发现NPU和标杆数据的API无法完全对齐? torch版本和硬件差异属于正常情况 @@ -63,25 +59,7 @@ echo $PYTHONPATH unset PYTHONPATH ``` -### 2. 单机多卡场景dump目录下只生成一个rank目录或pkl文件格式损坏 - -**故障现象** - -dump目录下只生成一个rank目录或dump目录下的pkl文件格式损坏、内容不完整。 - -**故障原因** - -通常是因为register_hook没有正确配置,带着工具没有获取正确的`rank_id`(从rank参数读取或从模型参数的device_id读取)。 - -**故障处理** - -register_hook需要在set_dump_path之后调用,也需要在每个进程上被调用,建议在搬运模型数据到卡之后调用。识别方法如下: - -- 找到训练代码中遍历epoch的for循环或遍历数据集的for循环,把register_hook放到循环开始前即可。 -- 找到训练代码中调用DDP或者DistributedDataParallel的代码行,把register_hook放到该代码行所在的代码块之后。 -- 若代码中均无以上两种情况,那么尽可能把这行代码往后放,并配置register_hook的rank参数。 - -### 3. HCCL 报错: error code: EI0006 +### 2. HCCL 报错: error code: EI0006 **故障现象** @@ -94,7 +72,7 @@ CANN软件版本较低导致不兼容。 **故障处理** 升级新版CANN软件版本。 -### 4. torch_npu._C._clear_overflow_npu() RuntimeError NPU error,error code is 107002 +### 3. torch_npu._C._clear_overflow_npu() RuntimeError NPU error,error code is 107002 如果运行溢出检测功能遇到这个报错,采取以下解决方法: 如果是单卡运行,添加如下代码,0是卡号,选择自己空闲的卡号。 @@ -108,24 +86,24 @@ torch.npu.set_device(f'npu:{rank}') ``` 如果运行精度比对功能遇到这个报错,尝试安装最新版本的ptdbg_ascend -### 5. 运行compare.py时报错:json.decoder.JSONDecodeError: Extra data: line 1 column 37(char 36) +### 4. 运行compare.py时报错:json.decoder.JSONDecodeError: Extra data: line 1 column 37(char 36) 遇到这种情况,先更新工具版本为最新版本,再重新运行训练代码dump数据,再用新的dump数据进行精度比对,如果最新版本未能解决问题,请联系ptdbg工具开发人员。 -### 6. AssertionError: assert set(WrapTensorOps) <= set(_tensor_ops) +### 5. AssertionError: assert set(WrapTensorOps) <= set(_tensor_ops) 遇到这种情况,先检查安装的torch版本,建议先更新工具版本为2.2以上,版本2.2的工具支持torch1.8、1.11和2.0 -### 7. dump得到的VF_lstm_99_forward_input.1.0.npy、VF_lstm_99_forward_input.1.1.npy类似的数据是否正常? +### 6. dump得到的VF_lstm_99_forward_input.1.0.npy、VF_lstm_99_forward_input.1.1.npy类似的数据是否正常? 带1.0/1.1/1.2后缀的npy是正常现象,例如当输入数据为[[tensor1, tensor2, tensor3]]会生成这样的后缀 -### 8. dump数据时,dump输出目录只得到了.npy文件,不生成pkl文件 +### 7. dump数据时,dump输出目录只得到了.npy文件,不生成pkl文件 - 检查set_dump_switch("ON"),set_dump_switch("OFF")是否都配置了; - 如果都配置了,观察模型运行日志结尾是否打印“Dump switch is turned off”,如果没有,则表明代码没有执行到set_dump_switch("OFF"),请检查模型代码中是否有exit()操作。 -### 9. 进行compare报错:The current file contains stack information, please turn on the stack_mode +### 8. 进行compare报错:The current file contains stack information, please turn on the stack_mode 在比对脚本中,设置stack_mode=True,例如: ``` @@ -139,46 +117,70 @@ dump_result_param={ } compare(dump_result_param, "./output", stack_mode=True) ``` -### 10. dump指定反向API的ACL级别的数据报错:NameError:name 'torch_npu' is not defined +### 9. dump指定反向API的ACL级别的数据报错:NameError:name 'torch_npu' is not defined - 如果是npu环境,请安装torch_npu; - 如果是gpu环境,暂不支持dump指定API的ACL级别的数据 -### 11. 配置dump_path后,使用工具报错:[ERROR]The file path /home/xxx/dump contains special characters +### 10. 配置dump_path后,使用工具报错:[ERROR]The file path /home/xxx/dump contains special characters - 请检查你设置的dump绝对路径是否包含特殊字符,确保路径名只包含大小写字母、数字、下划线、斜杠、点和短横线 - 注意,如果你执行脚本的路径为/home/abc++/,你设置的dump_path="./dump",工具实际校验的路径为绝对路径/home/abc++/dump,++为特殊字符,会引发本条报错 -### 12. 报错:'IsADirectoryError: [Errno 21] Is a directory: '/data/rank0/api_stack_xxx'' - -- 请检查register_hook是否写在了set_dump_path前面,register_hook必须在set_dump_path后调用 -- 请检查是否写了多个register_hook或者set_dump_path,如有,请保留一个register_hook或者set_dump_path - -### 13. 无法dump matmul权重的反向梯度数据 +### 11. 无法dump matmul权重的反向梯度数据 - matmul期望的输入是二维,当输入不是二维时,会将输入通过view操作展成二维,再进行matmul运算,因此在反向求导时,backward_hook能拿到的是UnsafeViewBackward这步操作里面数据的梯度信息,取不到MmBackward这步操作里面数据的梯度信息,即权重的反向梯度数据。 - 典型的例子有,当linear的输入不是二维,且无bias时,会调用output = input.matmul(weight.t()),因此拿不到linear层的weight的反向梯度数据。 -### 14. pkl文件中的某些api的dtype类型为float16,但是读取此api的npy文件显示的dtype类型为float32 +### 12. pkl文件中的某些api的dtype类型为float16,但是读取此api的npy文件显示的dtype类型为float32 - ptdbg工具在dump数据时需要将原始数据从npu to cpu上再转换为numpy类型,npu to cpu的逻辑和gpu to cpu是保持一致的,都存在dtype可能从float16变为float32类型的情况,如果出现dtype不一致的问题,最终dump数据的dtype以pkl文件为准。 -### 15. 使用dataloader后raise异常Exception: ptdbg: exit after iteration [x, x, x] +### 13. 使用dataloader后raise异常Exception: ptdbg: exit after iteration [x, x, x] - 正常现象,dataloader通过raise结束程序,堆栈信息可忽略。 -### 16. 工具报错:AssertionError: Please register hooks to nn.Module - -- 请在model示例化之后配置register hook。 - -### 17. 添加ptdbg_ascend工具后截取操作报错:`IndexError: too many indices for tensor of dimension x` 或 `TypeError: len() of a 0-d tensor`。 +### 14. 添加ptdbg_ascend工具后截取操作报错:`IndexError: too many indices for tensor of dimension x` 或 `TypeError: len() of a 0-d tensor`。 - 注释工具目录ptdbg_ascend/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- __getitem__`,工具会跳过dump该API。如果是需要dump的关键位置api也可以考虑根据报错堆栈信息注释引发报错的类型检查。 -### 18. 添加ptdbg_ascend工具后F.gelu触发ValueError报错:`activation_func must be F.gelu`等。 +### 15. 添加ptdbg_ascend工具后F.gelu触发ValueError报错:`activation_func must be F.gelu`等。 - 注释工具目录ptdbg_ascend/hook_module/support_wrap_ops.yaml文件中functional:下的的`- gelu`,工具会跳过dump该API。如果是需要dump的关键位置api也可以考虑根据报错堆栈信息注释引发报错的类型检查。 -### 19. 添加ptdbg_ascend工具后触发AsStrided算子相关的报错,或者编译相关的报错,如:`Failed to compile Op [AsStrided]`。 +### 16. 添加ptdbg_ascend工具后触发AsStrided算子相关的报错,或者编译相关的报错,如:`Failed to compile Op [AsStrided]`。 + +- 注释工具目录ptdbg_ascend/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- t`和`- transpose`。 + +## 旧接口的异常问题 + +### 1. 单机多卡场景dump目录下只生成一个rank目录或pkl文件格式损坏 + +**故障现象** + +dump目录下只生成一个rank目录或dump目录下的pkl文件格式损坏、内容不完整。 + +**故障原因** + +通常是因为register_hook没有正确配置,带着工具没有获取正确的`rank_id`(从rank参数读取或从模型参数的device_id读取)。 + +**故障处理** + +register_hook需要在set_dump_path之后调用,也需要在每个进程上被调用,建议在搬运模型数据到卡之后调用。识别方法如下: + +- 找到训练代码中遍历epoch的for循环或遍历数据集的for循环,把register_hook放到循环开始前即可。 +- 找到训练代码中调用DDP或者DistributedDataParallel的代码行,把register_hook放到该代码行所在的代码块之后。 +- 若代码中均无以上两种情况,那么尽可能把这行代码往后放,并配置register_hook的rank参数。 + +### 2. 一个网络中包含多个model,register hook中传入哪一个model? + +传入任意一个model即可,工具会自动dump所有model。 + +### 3. 报错:'IsADirectoryError: [Errno 21] Is a directory: '/data/rank0/api_stack_xxx'' + +- 请检查register_hook是否写在了set_dump_path前面,register_hook必须在set_dump_path后调用 +- 请检查是否写了多个register_hook或者set_dump_path,如有,请保留一个register_hook或者set_dump_path + +### 4. 工具报错:AssertionError: Please register hooks to nn.Module -- 注释工具目录ptdbg_ascend/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- t`和`- transpose`。 \ No newline at end of file +- 请在model示例化之后配置register hook。 \ No newline at end of file diff --git "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.T1.md" "b/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.T1.md" deleted file mode 100644 index 20c61de585e044f85f7bfa289634c85c462a27ef..0000000000000000000000000000000000000000 --- "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.T1.md" +++ /dev/null @@ -1,1569 +0,0 @@ -# **PyTorch精度工具使用指南** - -本文主要介绍PyTorch精度工具精度工具ptdbg_ascend的使用以及精度比对场景示例。 - -ptdbg_ascend工具的原理及安装请参见《[PyTorch精度工具](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/README.md)》。 - -## PyTorch精度比对总体流程 - -1. 准备CPU或GPU训练工程。 - -2. 在环境下安装ptdbg_ascend工具。 - -3. 在训练脚本内插入ptdbg_ascend工具dump接口。 - -4. 执行训练dump数据。 - -5. 将CPU或GPU训练工程迁移为NPU训练工程。 - - 请参见《[PyTorch模型迁移和训练指南](https://www.hiascend.com/document/detail/zh/canncommercial/63RC1/modeldevpt/ptmigr/ptmigr_0001.html)》。 - -6. 在NPU环境下安装ptdbg_ascend工具。 - -7. 在NPU训练脚本内插入ptdbg_ascend工具dump接口。 - -8. NPU环境下执行训练dump数据。 - -9. 创建并配置精度比对脚本,例如compare.py。 - -10. 执行CPU或GPU dump与NPU dump数据的精度比对。 - -11. 比对结果分析。 - -## 场景化示例 - -本章节主要介绍通过ptdbg_ascend工具进行精度比对和分析,主要使用“**CPU或GPU及NPU精度数据dump**”和“**CPU或GPU与NPU精度数据比对**”章节中介绍的ptdbg_ascend工具接口。 - -### 单卡场景精度比对 - -**精度分析建议** - -PyTorch训练场景的精度问题分析建议参考以下思路进行精度比对和比对结果分析: - -1. 整网比对:dump整网数据并进行精度比对,初步定位异常范围。 -2. 缩小范围:根据Accuracy Reached or Not找出不符合精度标准的API。 -3. 范围比对:对不符合精度标准的API重新dump。 -4. 分析原因并优化:分析API精度不符合标准的原因并进行优化调整。 -5. 整网比对:重新进行整网比对,判断优化后的API是否已符合精度标准以及是否出现新的精度问题。 -6. 重复1~5步,直到不存在精度问题为止。 - -**精度分析示例** - -1. dump整网数据。 - - 分别dump CPU或GPU以及NPU数据,在PyTorch训练脚本插入dump接口,示例代码如下(下面以NPU为例,CPU或GPU dump基本相同): - - ```python - from ptdbg_ascend import * - - # 在main函数开始前固定随机数 - seed_all() - - # 配置dump数据目录路径和名称 - set_dump_path("./npu_dump", dump_tag='all') - - # 注册dump回调函数 - register_hook(model, acc_cmp_dump) - - ... - - # 在第一个迭代开始的位置开启dump和堆栈模式,同时为保证数据完整性开启dump bool和整型的tensor以及浮点、bool和整型的标量 - set_dump_switch("ON", mode="api_stack", filter_switch="OFF") - - ... - - # 在第一个迭代结束的位置关闭dump - set_dump_switch("OFF") - ``` - -2. 比对整网数据。 - - 第1步中的NPU dump数据文件为npu_dump.pkl,假设NPU dump npy数据目录为npu_dump,GPU dump数据文件为gpu_dump.pkl,GPU dump npy数据目录为gpu_dump。 - - 创建并配置精度比对脚本,以创建compare.py为例,示例代码如下: - - ```python - from ptdbg_ascend import * - dump_result_param={ - "npu_pkl_path": "./npu_dump/all_v2.0/rank0/api_stack_dump.pkl", - "bench_pkl_path": "./gpu_dump/all_v2.0/rank0/api_stack_dump.pkl", - "npu_dump_data_dir": "./npu_dump/all_v2.0/rank0/api_stack_dump", - "bench_dump_data_dir": "./gpu_dump/all_v2.0/rank0/api_stack_dump", - "is_print_compare_log": True - } - compare(dump_result_param, "./output") - ``` - - 执行比对: - - ```bash - python3 compare.py - ``` - - 在output目录下生成结果文件,包括:`compare_result_{timestamp}.csv`和`advisor_{timestamp}.txt` - -3. 找出存在问题的API。 - - 1. 根据`advisor_{timestamp}.txt`或打屏信息的提示,可找到存在精度问题的算子(Suspect Nodes)和专家建议(Expert Advice) - - ![auto_analyze_log](img/auto_analyze_log.png) - - 2. 根据第2步结果文件`compare_result_{timestamp}.csv`中的Accuracy Reached or No字段显示为NO的API,针对该API执行后续比对操作,分析该API存在的精度问题。 - -4. (可选)提取指定API的堆栈信息和dump数据统计信息。 - - 通过parse接口可以清晰的显示特定API的堆栈信息和dump数据统计信息,结合堆栈信息分析代码中可能存在的精度问题。 - - 创建并配置提取脚本,以创建parse.py为例,示例代码如下: - - ```python - from ptdbg_ascend import * - - # 提取dump信息中第1次调用的API:Torch_batch_normal的堆栈信息及数据统计信息 - parse("./npu_dump/all_v2.0/rank0/api_stack_dump.pkl", "Torch_batch_normal_1_forward") - ``` - - 执行提取: - - ```bash - python3 parse.py - ``` - - - -5. (可选)指定API dump数据。 - - - dump指定前向API的ACL级别数据 - - ```python - from ptdbg_ascend import * - - # 固定随机数,开启确定性计算 - seed_all(mode=True) - set_dump_path("./dump_path", dump_tag='forward') - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - - # dump指定前向API的ACL级别数据、bool和整型的tensor以及浮点、bool和整型的标量 - set_dump_switch("ON", mode="acl", scope=["Tensor_permute_1_forward"], filter_switch="OFF") - - ... - - set_dump_switch("OFF") - ``` - - - dump指定反向API的ACL级别数据 - - ```python - from ptdbg_ascend import * - - # 固定随机数,开启确定性计算 - seed_all(mode=True) - set_dump_path("./dump_path", dump_tag='backward') - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - - # dump指定反向API的ACL级别数据、bool和整型的tensor以及浮点、bool和整型的标量 - set_dump_switch("ON", mode="acl", scope=["Functional_conv2d_1_backward"], filter_switch="OFF") - set_backward_input(["./npu_dump/all_v2.0/rank0/api_stack_dump/Functional_conv2d_1_backward_input.0.npy"]) - - ... - - set_dump_switch("OFF") - ``` - -6. (可选)重新比对。 - - 根据第4或5步的dump数据重新配置compare.py并执行比对,可以对单API模型进行问题复现。 - -**注意事项** - -* dump_mode="acl"场景下,会增加npu的内存消耗,请谨慎开启。 -* 部分API存在调用嵌套关系,比如functional.batch_norm实际调用torch.batch_norm,该场景会影响acl init初始化多次,导致功能异常。 - -### 多卡场景精度比对 - -精度工具支持多卡场景的精度比对,多卡场景的dump步骤与单卡场景完全一致,请参见“**单卡场景精度比对**”章节,不同的是多卡数据精度比对时需要使用“compare_distributed”函数进行比对。如下示例: - -说明:多机多卡场景需要每个设备单独执行比对操作。 - -假设NPU dump npy数据目录为npu_dump/dump_conv2d_v1.0,GPU dump npy数据目录为gpu_dump/dump_conv2d_v1.0。 - -1. 创建比对脚本,例如compare_distributed.py,拷贝如下代码。 - - ```python - from ptdbg_ascend import * - compare_distributed('./npu_dump/ptdbg_dump_v2.0', './gpu_dump/ptdbg_dump_v2.0', './output') - ``` - -2. 执行比对: - - ```bash - python3 compare_distributed.py - ``` - -两次运行须用相同数量的卡,传入`compare_distributed`的两个文件夹下须有相同个数的rank文件夹,且不包含其他无关文件,否则将无法比对。 - -**多卡set_dump_path注意事项** - -多卡一般为多进程,须保证每个进程都正确调用set_dump_path,或把set_dump_path插入到import语句后,如: - -```python -from ptdbg_ascend import * -seed_all() -set_dump_path('./dump_resnet') -``` - -如此可保证set_dump_path在每个进程都被调用。 - -**多卡register_hook注意事项** - -register_hook需要在set_dump_path之后调用,也需要在每个进程上被调用,建议在搬运模型数据到卡之后调用。识别方法如下: - -- 找到训练代码中遍历epoch的for循环或遍历数据集的for循环,把register_hook放到循环开始前即可。 -- 找到训练代码中调用DDP或者DistributedDataParallel的代码行,把register_hook放到该代码行所在的代码块之后。 -- 若代码中均无以上两种情况,需要保证register_hook在模型定义之后插入,并配置rank参数。rank参数获取rank_id请参见“**[rank_id获取方法](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/doc/rank_id获取方法.md)**”。 - -### NPU vs NPU精度比对 - -对于NPU vs NPU场景,是针对同一模型,进行迭代(模型、API版本升级或设备硬件升级)时存在的精度下降问题,对比相同模型在迭代前后版本的API计算数值,进行问题定位。 - -一般情况下迭代涉及NPU自定义算子,因此,可以仅dump NPU自定义算子进行比对。比对精度问题分析请参见“**单卡场景精度比对**”章节。 - -工具当前支持dump NPU自定义算子如下: - -| 序号 | NPU自定义算子 | -| :--- | ----------------------------------- | -| 1 | torch_npu.one_ | -| 2 | torch_npu.npu_sort_v2 | -| 3 | torch_npu.npu_transpose | -| 4 | torch_npu.npu_broadcast | -| 5 | torch_npu.npu_dtype_cast | -| 6 | torch_npu.empty_with_format | -| 7 | torch_npu.npu_one_hot | -| 8 | torch_npu.npu_stride_add | -| 9 | torch_npu.npu_ps_roi_pooling | -| 10 | torch_npu.npu_roi_align | -| 11 | torch_npu.npu_nms_v4 | -| 12 | torch_npu.npu_iou | -| 13 | torch_npu.npu_nms_with_mask | -| 14 | torch_npu.npu_pad | -| 15 | torch_npu.npu_bounding_box_encode | -| 16 | torch_npu.npu_bounding_box_decode | -| 17 | torch_npu.npu_batch_nms | -| 18 | torch_npu.npu_slice | -| 19 | torch_npu._npu_dropout | -| 20 | torch_npu.npu_indexing | -| 21 | torch_npu.npu_ifmr | -| 22 | torch_npu.npu_max | -| 23 | torch_npu.npu_scatter | -| 24 | torch_npu.npu_layer_norm_eval | -| 25 | torch_npu.npu_alloc_float_status | -| 26 | torch_npu.npu_get_float_status | -| 27 | torch_npu.npu_clear_float_status | -| 28 | torch_npu.npu_confusion_transpose | -| 29 | torch_npu.npu_bmmV2 | -| 30 | torch_npu.fast_gelu | -| 31 | torch_npu.npu_sub_sample | -| 32 | torch_npu.npu_deformable_conv2d | -| 33 | torch_npu.npu_mish | -| 34 | torch_npu.npu_anchor_response_flags | -| 35 | torch_npu.npu_yolo_boxes_encode | -| 36 | torch_npu.npu_grid_assign_positive | -| 37 | torch_npu.npu_normalize_batch | -| 38 | torch_npu.npu_masked_fill_range | -| 39 | torch_npu.npu_linear | -| 40 | torch_npu.npu_bert_apply_adam | -| 41 | torch_npu.npu_giou | -| 42 | torch_npu.npu_ciou | -| 43 | torch_npu.npu_ciou_backward | -| 44 | torch_npu.npu_diou | -| 45 | torch_npu.npu_diou_backward | -| 46 | torch_npu.npu_sign_bits_pack | -| 47 | torch_npu.npu_sign_bits_unpack | - -### 通信API的数据dump - -通信类API数据可以使用全量dump方式获取,若只dump通信类API数据,可以使用如下示例: - -```python -debugger.configure_hook(mode="api_list", api_list=["distributed"]) -``` - -或 - -```python -set_dump_switch("ON", mode="api_list", api_list=["distributed"]) -``` - -通信类API支持列表: - -| 序号 | Distributed | -| :--- | -------------- | -| 1 | send | -| 2 | recv | -| 3 | broadcast | -| 4 | all_reduce | -| 5 | reduce | -| 6 | all_gather | -| 7 | gather | -| 8 | isend | -| 9 | irecv | -| 10 | scatter | -| 11 | reduce_scatter | - -### 溢出检测场景 - -溢出检测是针对NPU的PyTorch API,检测是否存在溢出的情况。当前仅支持识别aicore浮点溢出。 - -溢出检测原理:针对溢出阶段,开启acl dump模式,重新对溢出阶段执行,落盘数据。 - -建议按照如下步骤操作: - -1. 在NPU环境下安装ptdbg_ascend工具。 - -2. 在NPU训练脚本内插入ptdbg_ascend工具溢出检测接口。 - - - 示例1:全量溢出检测 - - ```python - from ptdbg_ascend import * - seed_all() - # 配置溢出数据目录路径和名称 - set_dump_path("./overflow_dump") - ... - # 设置检测到3次溢出后退出训练 - register_hook(model, overflow_check, overflow_nums=3) - - ... - ``` - - 多卡使用时各卡单独计算溢出次数。 - - - 示例2:dump指定API的ACL级别溢出数据 - - ```python - from ptdbg_ascend import * - seed_all() - # 配置溢出数据目录路径和名称 - set_dump_path("./overflow_dump") - ... - # dump指定API的ACL级别溢出数据 - register_hook(model, overflow_check, dump_mode='acl', dump_config='./dump.json') - - # 在期望溢出检测的step位置开始前打开溢出检测开关 - set_overflow_check_switch("ON") - - ... - - # 在step结束的位置关闭溢出检测开关 - set_overflow_check_switch("OFF") - - ... - ``` - - - 示例3:dump指定反向API的ACL级别的溢出数据 - - 1. 进行全量溢出检测 - - ```python - from ptdbg_ascend import * - seed_all() - # 配置溢出数据目录路径和名称 - set_dump_path("./overflow_dump") - ... - # 设置检测到3次溢出后退出训练 - register_hook(model, overflow_check) - - ... - ``` - - 2. dump指定反向API的ACL级别的溢出数据 - - ```python - from ptdbg_ascend import * - seed_all() - # 配置溢出数据目录路径和名称 - set_dump_path("./overflow_dump") - ... - # dump指定反向API的ACL级别溢出数据 - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - set_dump_switch("ON", mode="acl", scope=["Functional_conv2d_1_backward"]) - set_backward_input(["./npu_dump/ptdbg_dump_v2.0/rank0/dump/Functional_conv2d_1_backward_input.0.npy"]) - ``` - - 针对前向溢出API,可以通过overflow_nums,配置允许的溢出次数,并将每次溢出API的全部ACL数据dump下来,到达指定溢出次数后停止,停止后会看到堆栈打印包含如下字段。 - - ```bash - ValueError: [overflow xxx times]: dump file is saved in 'xxxxx.pkl'. - ``` - - 其中xxx times为用户设置的次数,xxxxx.pkl为文件生成路径。 - -3. NPU环境下执行训练dump溢出数据。 - - 针对输入正常但输出存在溢出的API,会训练执行目录下将溢出的API信息dump并保存为`forward_info_{pid}.json`和`backward_info_{pid}.json`,通过 [Ascend模型精度预检工具](https://gitee.com/ascend/att/tree/master/debug/accuracy_tools/api_accuracy_checker)对json文件进行解析,输出溢出API为正常溢出还是非正常溢出,从而帮助用户快速判断。 - - 精度预检工具执行命令如下: - - ```bash - # 下载att代码仓后执行如下命令 - export PYTHONPATH=$PYTHONPATH:$ATT_HOME/debug/accuracy_tools/ - cd $ATT_HOME/debug/accuracy_tools/api_accuracy_checker/run_ut - python run_overflow_check.py -forward ./forward_info_0.json - ``` - - 反向过程溢出的API暂不支持精度预检功能。 - - 当重复执行溢出检测dump操作时,需要删除上一次dump目录下的溢出检测dump数据,否则将因重名而报错。 - -**注意事项** - -* dump_mode="acl"场景下,会增加npu的内存消耗,请谨慎开启。 -* 部分API存在调用嵌套关系,比如functional.batch_norm实际调用torch.batch_norm,该场景会影响acl init初始化多次,导致功能异常。 - -## debugger方式dump和溢出检测(推荐) - -### PrecisionDebugger模块 - -**功能说明** - -PrecisionDebugger模块包含dump和溢出检测功能的总体配置项。可以指定dump目录,设置dump或溢出检测功能,指定dump的卡和迭代。 - -可以在from ptdbg_ascend import *和模型初始化之间的任意位置添加该模块。 - -**原型** - -```python -PrecisionDebugger(dump_path=None, hook_name=None, rank=None, step=[], enable_dataloader=False): -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ----------------- | ------------------------------------------------------------ | -------- | -| dump_path | 设置dump数据目录路径,参数示例:"./dump_path"。
默认在dump_path目录下生成`ptdbg_dump_{version}`目录,并在该目录下生成`dump.pkl`文件以及`dump`数据文件保存目录。
当**configure_hook**函数配置了mode参数时,`dump.pkl`文件以及`dump`数据文件保存目录名称添加mode参数值为前缀,详情请参见“**dump数据存盘说明**”。
未配置dump_path时,也可以通过环境变量ASCEND_WORK_PATH配置dump路径,此时dump数据将落盘在${ASCEND_WORK_PATH}/dump_data下,自定义配置dump_path优先级高于环境变量,dump_path和环境变量需要二选一。 | 否 | -| hook_name | dump模式,可取值dump和overflow_check,表示dump和溢出检测功能,二选一。 | 是 | -| rank | 指定对某张卡上的数据进行dump或溢出检测,默认未配置(表示dump所有卡的数据),须根据实际卡的Rank ID配置。应配置为大于0的正整数,且须根据实际卡的Rank ID配置,若所配置的值大于实际训练所运行的卡的Rank ID,则dump数据为空,比如当前环境Rank ID为0~7,实际训练运行0~3卡,此时若配置Rank ID为4或不存在的10等其他值,此时dump数据为空。 | 否 | -| step | 指定dump某个step的数据。 | 否 | -| enable_dataloader | 自动控制开关,可取值True(开启)或False(关闭),默认为False。配置为True后自动识别dump step参数指定的迭代,并在该迭代执行完成后退出训练,此时start和stop函数可不配置,开启该开关要求训练脚本是通过torch.utils.data.dataloader方式加载数据;配置为False则需要配置start和stop函数,并在最后一个stop函数后或一个step结束的位置添加debugger.step()。 | 否 | - -### configure_hook函数(可选) - -**功能说明** - -设置dump范围。 - -建议在**PrecisionDebugger**模块与模型初始化之间的任意位置添加,不添加此函数时默认使用mode="api_stack" dump整网数据。 - -**原型** - -dump: - -```python -debugger.configure_hook(mode="api_stack", scope=[], api_list=[], filter_switch="OFF", acl_config=None, backward_input=[], input_output_mode=["all"], summary_only=False) -``` - -溢出检测: - -```python -debugger.configure_hook(mode=None, acl_config=None, overflow_nums=1, need_replicate=False) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ----------------- | ------------------------------------------------------------ | -------- | -| mode | dump模式。可取值"all"、"list"、"range"、"stack"、"acl"、"api_list"、"api_stack",各参数含义请参见本节的“**函数示例**”。参数示例:mode="list"。默认为api_stack。该参数配置值将作为dump数据文件名的前缀,详情请参见“**dump数据存盘说明**”。 | 否 | -| scope或api_list | dump范围。根据model配置的模式选择dump的API范围,mode="api_list"时,需要配置api_list=[],其他模式有需要时配置scope=[]。参数示例:scope=["Tensor_permute_1_forward", "Tensor_transpose_2_forward"]、api_list=["relu"]。默认为空。 | 否 | -| filter_switch | dump bool和整型的tensor以及浮点、bool和整型的标量的过滤开关。可取值"ON"(表示开启过滤,即不dump)或"OFF"(表示关闭过滤)。参数示例:filter_switch="ON"。默认不配置,即filter_switch="OFF",表示dump上述数据。 | 否 | -| acl_config | acl dump的配置文件。mode="acl"时,该参数必选;mode为其他值时,该参数不选。参数示例:acl_config='./dump.json'。dump.json配置文件详细介绍请参见“**dump.json配置文件说明**”。 | 否 | -| backward_input | 该输入文件为首次运行训练dump得到反向API输入的.npy文件。例如若需要dump Functional_conv2d_1 API的反向过程的输入输出,则需要在dump目录下查找命名包含Functional_conv2d_1、backward和input字段的.npy文件。 | 否 | -| input_output_mode | dump数据过滤。可取值"all"、"forward"、"backward"、"input"和"output",表示仅保存dump的数据中文件名包含"forward"、"backward"、"input"和"output"的前向、反向、输入或输出的.npy文件。参数示例input_output_mode=["backward"]或input_output_mode=["forward", "backward"]。默认为all,即保存所有dump的数据。除了all参数只能单独配置外,其他参数可以自由组合。 | 否 | -| summary_only | dump npy文件过滤,可取值True或False,配置为True后仅dump保存API统计信息的pkl文件,参数示例:summary_only=False,默认为False。 | 否 | -| overflow_nums | 控制溢出次数,表示第N次溢出时,停止训练,过程中检测到溢出API对应ACL数据均dump。参数示例:overflow_nums=3。配置overflow_check时可配置,默认不配置,即检测到1次溢出,训练停止,配置为-1时,表示持续检测溢出直到训练结束。 | 否 | -| need_replicate | 过程dump数据生成开关,执行溢出检测时,dump目录下会生成forward_real_data和backward_real_data的过程dump数据目录,可取值True(生成)或False(不生成),默认不生成。 | 否 | - -**函数示例** - -configure_hook可配置多种dump模式,示例如下: - -说明:以下均以dump部分API数据为例,API名可以从首次dump整网数据的结果csv文件中的NPU Name或Bench Name列获取。 - -- 示例1:dump指定API列表 - - ```python - debugger.configure_hook(mode="list", scope=["Tensor_permute_1_forward", "Tensor_transpose_2_forward", "Torch_relu_3_backward"]) - ``` - -- 示例2:dump指定范围 - - ```python - debugger.configure_hook(mode="range", scope=["Tensor_abs_1_forward", "Tensor_transpose_3_forward"]) - ``` - -- 示例3:STACK模式,只dump堆栈信息 - - ```python - debugger.configure_hook(mode="stack", scope=["Tensor_abs_1_forward", "Tensor_transpose_3_forward"]) - ``` - -- 示例4:dump指定前向API的ACL级别数据 - - ```python - debugger.configure_hook(mode="acl", scope=["Tensor_permute_1_forward"], acl_config="./dump.json") - ``` - -- 示例4:dump指定反向API的ACL级别数据 - - ```python - debugger.configure_hook(mode="acl", scope=["Functional_conv2d_1_backward"], acl_config="./dump.json", backward_input=["./npu_dump/dump_conv2d_v2.0/rank0/dump/Functional_conv2d_1_backward_input.0.npy"]) - ``` - -- 示例5:dump指定某一类API的API级别输入输出数据 - - ```python - debugger.configure_hook(mode="api_list", api_list=["relu"]) - ``` - - mode="api_list"时不配置scope。 - -- 示例6:dump全部API级别输入输出数据以及相应堆栈信息 - - ```python - debugger.configure_hook(mode="api_stack") - ``` - - mode="api_stack"时不配置scope。 - -- 示例7: dump全部API级别输入输出数据并包含bool和整型的tensor以及浮点、bool和整型的标量,配置为OFF,会dump bool和整型数据 - - ```python - debugger.configure_hook(filter_switch="OFF") - ``` - - 配置filter_switch="OFF"同时也可以配置mode、scope和api_list,除dump ACL级别数据。 - -- 示例8:仅保存dump的数据文件名包含“backward”的反向.npy文件 - - ```python - debugger.configure_hook(input_output_mode=["backward"]) - ``` - -- 示例9:仅dump pkl文件 - - ```python - debugger.configure_hook(summary_only=True) - ``` - -- 示例10:溢出检测dump - - ```python - debugger.configure_hook(overflow_nums=1) - ``` - - dump执行时会在**PrecisionDebugger**模块的dump_path参数指定的目录下生成ptdbg_dump_{version}目录,保存溢出数据。 - - 多卡场景时,需要检测到至少有一张卡溢出次数达到overflow_nums时,训练结束。 - - 仅支持NPU环境。 - -- 示例11:dump溢出API的ACL级别数据 - - ```python - debugger.configure_hook(mode="acl", acl_config="./dump.json") - ``` - - 该场景会在原有数据基础上,额外在dump.json文件配置的dump_path目录下生成一份ACL算子数据,该数据可通过“**ptdbg_ascend.parse**”工具进行解析。 - - 仅支持NPU环境。 - -### start函数(可选) - -**功能说明** - -dump或溢出检测启动函数。 - -在模型初始化之后的任意位置添加。 - -**原型** - -```python -debugger.start() -``` - -该函数为类函数,可以使用debugger.start()也可以使用PrecisionDebugger.start()。 - -### stop函数(可选) - -**功能说明** - -dump或溢出检测停止函数。 - -在**start**函数之后的任意位置添加。 - -**原型** - -```python -debugger.stop() -``` - -该函数为类函数,可以使用debugger.stop()也可以使用PrecisionDebugger.stop()。 - -### 示例代码(自动模式) - -- 示例1:开启dump - - ```python - from ptdbg_ascend import * - debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0,2], enable_dataloader=True) - # 请勿将以上初始化流程插入到循环代码中 - ``` - -- 示例2:开启溢出检测dump - - ```python - from ptdbg_ascend import * - debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="overflow_check", step=[0,2], enable_dataloader=True) - # 请勿将以上初始化流程插入到循环代码中 - ``` - -### 示例代码(手动模式) - -一般情况下使用自动模式可以快速方便进行dump操作,但个别大模型可能在部分卡的训练操作中没有调用dataloader,这会导致自动模式无法dump指定迭代的数据,此时需要关闭自动模式手动在迭代前后插入start()和stop()函数,并在最后一个stop函数后或一个step结束的位置添加debugger.step()以标识dump结束。 - -- 示例1:开启dump - - ```python - from ptdbg_ascend import * - debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) - # 请勿将以上初始化流程插入到循环代码中 - - # 模型初始化 - # 下面代码也可以用PrecisionDebugger.start()和PrecisionDebugger.stop() - debugger.start() - - # 需要dump的代码片段1 - - debugger.stop() - debugger.start() - - # 需要dump的代码片段1 - - debugger.stop() - debugger.step() - ``` - -- 示例2:开启溢出检测dump - - ```python - from ptdbg_ascend import * - debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="overflow_check", step=[0]) - # 请勿将以上初始化流程插入到循环代码中 - - # 模型初始化 - # 下面代码也可以用PrecisionDebugger.start()和PrecisionDebugger.stop() - debugger.start() - - # 需要dump的代码片段1 - - debugger.stop() - debugger.start() - - # 需要dump的代码片段1 - - debugger.stop() - debugger.step() - ``` - -## CPU或GPU及NPU精度数据dump - -### 总体说明 - -- 本节主要介绍CPU或GPU及NPU精度数据dump所需要的函数以及示例。 - -- ptdbg_ascend工具默认情况下仅dump PyTorch模型的API输入输出数据进行精度比对,若在比对结果中发现某个API下可能存在ACL的精度问题,那么可以选择dump该API的ACL级别数据进行精度分析。 - -- 某些torch api的输出不是Tensor类型的数据。对于此类API的反向过程进行ACL dump,工具会在运行日志中给出对应的Warning(is not of tensor type and cannot be automatically derived)提示。如若想要进行该类API反向ACL dump,可以通过手动构建单API用例的方式进行ACL dump,具体用例可参见“**[反向ACL dump用例说明](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/doc/%E5%8F%8D%E5%90%91ACL%20dump%E7%94%A8%E4%BE%8B%E8%AF%B4%E6%98%8E.md)**”。 - -- 工具性能:dump数据量较小时(小于5G),参考dump速度0.1GB/s;dump数据量较大时,参考dump速度0.2GB/s。 - 推荐环境配置:独占环境,CPU核心数192,固态硬盘(IO速度参考:固态硬盘 > 500MB/s,机械硬盘60 ~ 170MB/s)。 - - 用户环境性能弱于标准约束或非独占使用的比对速度酌情向下浮动。Dump速度的计算方式:Dump数据量/(单个step添加Dump耗时-原始单个step耗时)。 - -### 约束 -- 进行CPU或GPU数据dump时,请安装torch包而非torch_npu包,避免工具无法识别使用场景,导致失败。 - -- TASK_QUEUE_ENABLE环境变量会导致API下发和执行异步进行,因此在ACL dump前需要将TASK_QUEUE_ENABLE关闭,即export TASK_QUEUE_ENABLE=0。 - -- 不建议在PyTorch训练脚本中同时添加dump接口和性能数据采集(如Ascend PyThon Profiler)接口,二者可能相互影响导致数据不准确。 - -### seed_all - -**功能说明** - -固定随机数。通过固定随机数保证模型的输入或输出一致。在训练主函数开始前调用,避免随机数固定不全。 - -dump操作必选。 - -**函数原型** - -```python -seed_all(seed=1234, mode=False) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ------ | ------------------------------------------------------------ | -------- | -| seed | 随机数种子。参数示例:seed=1000。默认值为:1234。 | 否 | -| mode | 确定性计算模式。可配置True或False。参数示例:mode=True。默认为False。
即使在相同的硬件和输入下,API多次执行的结果也可能不同,开启确定性计算是为了保证在相同的硬件和输入下,API多次执行的结果相同。
确定性计算会导致API执行性能降低,建议在发现模型多次执行结果不同的情况下开启。
rnn类算子、ReduceSum、ReduceMean等算子可能与确定性计算存在冲突,若开启确定性计算后多次执行的结果不相同,则考虑存在这些算子。 | 否 | - -**函数示例** - -seed_all函数的随机数种子,取默认值即可,无须配置;第二个参数默认关闭,不开启确定性计算时也无须配置。 - -- 示例1:仅固定随机数,不开启确定性计算 - - ```python - seed_all() - ``` - -- 示例2:固定随机数,开启确定性计算 - - ```python - seed_all(mode=True) - ``` - -**固定随机数范围** - -seed_all函数可固定随机数的范围如下表。 - -| API | 固定随机数 | -| ---------------------------------------- | --------------------------- | -| os.environ['PYTHONHASHSEED'] = str(seed) | 禁止Python中的hash随机化 | -| random.seed(seed) | 设置random随机生成器的种子 | -| np.random.seed(seed) | 设置numpy中随机生成器的种子 | -| torch.manual_seed(seed) | 设置当前CPU的随机种子 | -| torch.cuda.manual_seed(seed) | 设置当前GPU的随机种子 | -| torch.cuda.manual_seed_all(seed) | 设置所有GPU的随机种子 | -| torch_npu.npu.manual_seed(seed) | 设置当前NPU的随机种子 | -| torch_npu.npu.manual_seed_all(seed) | 设置所有NPU的随机种子 | -| torch.backends.cudnn.enable=False | 关闭cuDNN | -| torch.backends.cudnn.benchmark=False | cuDNN确定性地选择算法 | -| torch.backends.cudnn.deterministic=True | cuDNN仅使用确定性的卷积算法 | - -需要保证CPU或GPU以及NPU的模型输入完全一致,dump数据的比对才有意义,seed_all并不能保证模型输入完全一致,如下表所示场景需要保证输入的一致性。 - -| 场景 | 固定方法 | -| --------------- | ------------- | -| 数据集的shuffle | 关闭shuffle。 | -| dropout | 关闭dropout。 | - -关闭shuffle示例: - -```python -train_loader = torch.utils.data.DataLoader( - train_dataset, - batch_size = batch_size, - shuffle = False, - num_workers = num_workers -) -``` - -关闭dropout: - -在使用from ptdbg import *后,工具会自动将torch.nn.functional.dropout、torch.nn.functional.dropout2d、torch.nn.functional.dropout3d、torch.nn.Dropout、torch.nn.Dropout2d、torch.nn.Dropout3d的接口参数p置为0。 - -### set_dump_path - -**功能说明** - -设置数据保存目录。建议在seed_all函数之后调用且需要保证训练进程能够调用该函数;多卡时须保证每个进程都能调用该函数。 - -**函数原型** - -```python -set_dump_path(fpath=None, dump_tag='ptdbg_dump') -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| -------- | ------------------------------------------------------------ | -------- | -| fpath | 设置数据目录路径。参数示例:'./dump_path'。
默认在dump_path目录下生成`ptdbg_dump_{version}`目录,并在该目录下生成`dump.pkl`文件以及`dump`数据文件保存目录。
当set_dump_switch函数配置了mode参数时,`dump.pkl`文件以及`dump`数据文件保存目录名称添加mode参数值为前缀,详情请参见“**dump数据存盘说明**”。
未配置fpath时,也可以通过环境变量ASCEND_WORK_PATH配置dump路径,此时数据将落盘在${ASCEND_WORK_PATH}/dump_data下,自定义配置dump_path优先级高于环境变量,fpath和环境变量需要二选一。 | 否 | -| dump_tag | 设置数据目录名称。参数示例:dump_tag='dump_conv2d'。默认数据目录命名为ptdbg_dump_{version}。
{version}为当前安装ptdbg_ascend工具版本。目录结构参见“**dump数据存盘说明**”。
配置该参数会将生成的`ptdbg_dump_{version}`目录名称变更为dump_tag配置的值,如`dump_conv2d_{version}`。 | 否 | - -**函数示例** - -- 示例1:设置数据目录路径 - - ```python - set_dump_path('./dump_path') - ``` - -- 示例2:设置数据目录名称 - - ```python - set_dump_path('./dump_path', dump_tag='dump_conv2d') - ``` - - -若以相同的数据目录多次dump,则会因同名导致覆盖;多次dump建议配置不同的dump_tag。 - -### register_hook - -**功能说明** - -注册工具钩子函数。在set_dump_path之后调用。 - -dump操作必选。 - -**函数原型** - -```python -register_hook(model, hook, overflow_nums=overflow_nums, dump_mode=dump_mode, dump_config=dump_config_file) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ------------- | ------------------------------------------------------------ | -------- | -| hook | 注册工具的dump和溢出检测钩子。可取值overflow_check(表示溢出检测)和acc_cmp_dump(表示dump数据),二选一。 | 是 | -| overflow_nums | 控制溢出次数,表示第N次溢出时,停止训练,过程中检测到溢出API对应ACL数据均dump。参数示例:overflow_nums=3。配置overflow_check时可配置,默认不配置,即检测到1次溢出,训练停止,配置为-1时,表示持续检测溢出直到训练结束。 | 否 | -| dump_mode | 控制针对溢出API的dump模式。可取值"api"或"acl",配置acl时表示dump ACL级别的溢出数据,此时set_dump_path参数不生效,dump数据目录由dump_config的.json文件配置,参数示例:dump_mode="acl"。默认不配置,即dump API级别的溢出数据。 | 否 | -| dump_config | acl dump的配置文件。dump_mode="acl"时,该参数必选;dump_mode="api"时,该参数不选。参数示例:dump_config='./dump.json'。 | 否 | - -**函数示例** - -- 示例1:注册工具钩子函数 - - ```python - register_hook(model, acc_cmp_dump) - ``` - -- 示例2:dump指定API的ACL级别数据 - - ```python - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - ``` - - 需要配置set_dump_switch的mode="acl"以及scope指定为前向或反向API,请参见“**set_dump_switch”**的示例。 - - 该场景set_dump_path不生效,由dump_config中的dump.json文件配置dump数据目录。 - -- 示例3:溢出检测dump - - ```python - register_hook(model, overflow_check, overflow_nums=3) - ``` - - dump执行时会在set_dump_path的fpath参数指定的目录下生成ptdbg_dump_{version}目录,保存溢出数据。 - - 多卡场景时,需要检测到至少有一张卡溢出次数达到overflow_nums时,训练结束。 - - 仅支持NPU环境。 - -- 示例4:dump指定API的ACL级别溢出数据 - - ```python - register_hook(model, overflow_check, dump_mode='acl', dump_config='./dump.json') - ``` - - 该场景会在原有数据基础上,额外在dump.json文件配置的dump_path目录下生成一份ACL算子数据,该数据可通过“**ptdbg_ascend.parse**”工具进行解析。 - - 仅支持NPU环境。 - -### set_dump_switch - -**功能说明** - -设置dump范围。建议在register_hook函数之后的脚本内任意位置插入,但进行精度问题排查建议参照“场景化示例 > 单卡场景精度比对”章节的顺序,先从第一个迭代开始的位置调用并dump整网数据。 - -dump操作必选。 - -**函数原型** - -```python -def set_dump_switch(switch, mode="all", scope=[], api_list=[], filter_switch="OFF", dump_mode=["all"], summary_only=False): -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| --------------- | ------------------------------------------------------------ | -------- | -| switch | dump开关。可取值"ON"或"OFF"。须在选定dump开始的位置配置set_dump_switch("ON");dump结束的位置设置set_dump_switch("OFF")。 | 是 | -| mode | dump模式。可取值"all"、"list"、"range"、"stack"、"acl"、"api_list"、"api_stack",各参数含义请参见本节的“**函数示例**”。参数示例:mode="list"。默认为all。该参数配置值将作为dump数据文件名的前缀,详情请参见“**dump数据存盘说明**”。 | 否 | -| scope或api_list | dump范围。根据model配置的模式选择dump的API范围。参数示例:scope=["Tensor_permute_1_forward", "Tensor_transpose_2_forward"]、api_list=["relu"]。默认为空。 | 否 | -| filter_switch | dump bool和整型的tensor以及浮点、bool和整型的标量的过滤开关。可取值"ON"或"OFF"。参数示例:filter_switch="ON"。默认不配置,即filter_switch="OFF",表示dump上述数据。 | 否 | -| dump_mode | dump数据过滤。可取值"all"、"forward"、"backward"、"input"和"output",表示仅保存dump的数据中文件名包含"forward"、"backward"、"input"和"output"的前向、反向、输入或输出的.npy文件。参数示例dump_mode=["backward"]或dump_mode=["forward", "backward"]。默认为all,即保存所有dump的数据。除了all参数只能单独配置外,其他参数可以自由组合。 | 否 | -| summary_only | dump npy文件过滤,可取值True或False,配置为True后仅dump保存API统计信息的pkl文件,参数示例:summary_only=False,默认为False。 | 否 | - -**推荐配置** - -```python -set_dump_switch("ON", mode="api_stack", filter_switch="OFF") -``` - -开启dump数据和堆栈模式,同时为保证数据完整性开启dump bool和整型的tensor以及浮点、bool和整型的标量。 - -**函数示例** - -set_dump_switch可配置多种dump模式,示例如下: - -说明:以下均以dump部分API数据为例,API名可以从首次dump整网数据的结果csv文件中的NPU Name或Bench Name列获取。 - -- 示例1:dump指定API列表 - - ```python - set_dump_switch("ON", mode="list", scope=["Tensor_permute_1_forward", "Tensor_transpose_2_forward", "Torch_relu_3_backward"]) - ``` - -- 示例2:dump指定范围 - - ```python - set_dump_switch("ON", mode="range", scope=["Tensor_abs_1_forward", "Tensor_transpose_3_forward"]) - ``` - -- 示例3:STACK模式,只dump堆栈信息 - - ```python - set_dump_switch("ON", mode="stack", scope=["Tensor_abs_1_forward", "Tensor_transpose_3_forward"]) - ``` - -- 示例4:dump指定前向API的ACL级别数据 - - ```python - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - set_dump_switch("ON", mode="acl", scope=["Tensor_permute_1_forward"]) - ``` - - 需要配置register_hook的dump_mode='acl'和dump_config配置文件。 - -- 示例4:dump指定反向API的ACL级别数据 - - ```python - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - set_dump_switch("ON", mode="acl", scope=["Functional_conv2d_1_backward"]) - set_backward_input(["./npu_dump/dump_conv2d_v2.0/rank0/dump/Functional_conv2d_1_backward_input.0.npy"]) - ``` - - 需要配置register_hook的dump_mode='acl'和dump_config配置文件,并通过set_backward_input设置反向API输入的.npy文件。 - -- 示例5:dump指定某一类API的API级别输入输出数据 - - ```python - set_dump_switch("ON", mode="api_list", api_list=["relu"]) - ``` - - mode="api_list"时不配置scope。 - -- 示例6:dump全部API级别输入输出数据以及相应堆栈信息 - - ```python - set_dump_switch("ON", mode="api_stack") - ``` - - mode="api_stack"时不配置scope。 - -- 示例7: dump全部API级别输入输出数据并包含bool和整型的tensor以及浮点、bool和整型的标量,配置为OFF,会dump bool和整型数据 - - ```python - set_dump_switch("ON", filter_switch="OFF") - ``` - - 配置filter_switch="OFF"同时也可以配置mode、scope和api_list,除dump ACL级别数据。 - -- 示例8:仅保存dump的数据文件名包含“backward”的反向.npy文件 - - ```python - set_dump_switch("ON", dump_mode=["backward"]) - ``` - -- 示例9:仅dump pkl文件 - - ```python - set_dump_switch("ON", summary_only=True) - ``` - -以上示例均需要在结束dump的位置插入set_dump_switch("OFF")。 - -set_dump_switch配置mode为all或api_stack时,结束dump后,在dump目录下会自动生成compare_data.py比对脚本模板,示例如下: - -```python -from ptdbg_ascend import compare - -pkl_path = "%s" -dump_data_dir = "%s" - -dump_path_param = { - "npu_pkl_path": , - "bench_pkl_path": , - "npu_dump_data_dir": , - "bench_dump_data_dir": , - "is_print_compare_log": True -} - -compare(dump_path_param, output_path="", stack_mode="%s") -``` - -pkl_path和dump_data_dir字段会自动识别pkl和dump目录的路径,用户需要判断当前dump的环境是NPU、CPU或GPU,并将pkl_path和dump_data_dir字段填入下方dump_path_param函数对应的字段中,例如当前设备为NPU,那么填写方式如下: - -```python -from ptdbg_ascend import compare - -pkl_path = "%s" -dump_data_dir = "%s" - -dump_path_param = { - "npu_pkl_path": pkl_path, - "bench_pkl_path": , - "npu_dump_data_dir": dump_data_dir, - "bench_dump_data_dir": , - "is_print_compare_log": True -} - -compare(dump_path_param, output_path="", stack_mode="%s") -``` - -此时,另一侧数据的路径,需要用户另外识别并填入。 - -### set_overflow_check_switch - -**功能说明** - -置溢出检测范围。默认不配置该函数,全量进行溢出检测。 - -仅支持NPU环境。 - -**函数原型** - -```python -set_overflow_check_switch(switch, filter_switch='OFF') -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ------------- | ------------------------------------------------------------ | -------- | -| switch, | 检测开关。可取值"ON"或"OFF"。如果只在特定的step溢出检测,则在期望溢出检测的step位置开始前插入set_overflow_check_switch("ON"),在step结束的位置插入set_overflow_check_switch("OFF")。 | 是 | -| filter_switch | dump bool和整型的tensor以及浮点、bool和整型的标量的过滤开关。可取值"ON"或"OFF"。参数示例:filter_switch="ON"。默认不配置,即filter_switch="OFF",表示dump上述数据。 | 否 | - -**函数示例** - -- 示例1:指定范围溢出检测 - - ```python - register_hook(model, overflow_check) - set_overflow_check_switch("ON") - - ... - - set_overflow_check_switch("OFF") - ``` - - 该场景set_dump_path不生效,dump执行时会在当前目录自动生成ptdbg_dump_{version}目录,保存溢出数据。 - -- 示例2:前向API的ACL级别范围溢出检测 - - ```python - register_hook(model, overflow_check, dump_mode='acl', dump_config='./dump.json') - set_overflow_check_switch("ON") - - ... - - set_overflow_check_switch("OFF") - ``` - - 该场景set_dump_path不生效,由dump_config中的dump.json文件配置溢出数据目录。 - -### set_backward_input - -**功能说明** - -设置反向ACL级别dump时需要的反向输入的.npy文件。 - -**函数原型** - -```python -set_backward_input(backward_input) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| -------------- | ------------------------------------------------------------ | -------- | -| backward_input | 该输入文件为首次运行训练dump得到反向API输入的.npy文件。例如若需要dump Functional_conv2d_1 API的反向过程的输入输出,则需要在dump目录下查找命名包含Functional_conv2d_1、backward和input字段的.npy文件。 | 是 | - -**函数示例** - -```python -register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') -set_dump_switch("ON", mode="acl", scope=["Functional_conv2d_1_backward"]) -set_backward_input(["./npu_dump/dump_conv2d_v2.0/rank0/dump/Functional_conv2d_1_backward_input.0.npy"]) -``` - -## dump.json配置文件说明 - -**dump.json配置示例** - -```python -{ - "dump": - { - "dump_list":[], - "dump_path":"./dump/output", - "dump_mode":"all", - "dump_op_switch":"on" - } -} -``` - -**dump.json参数说明** - -| 字段名 | 说明 | -| -------------- | ------------------------------------------------------------ | -| dump_list | 待dump数据的API模型。为空,无需配置。 | -| dump_path | dump数据文件存储到运行环境的目录,主要用于指定ACL dump数据路径。支持配置绝对路径或相对路径。dump_path须为已存在目录。 | -| dump_mode | dump数据模式,配置如下:
- output:dump API的输出数据。默认值。
- input:dump API的输入数据。
- all:dump API的输入、输出数据。 | -| dump_op_switch | 单API模型dump数据开关,配置如下: * off:关闭单API模型dump,默认值。 * on:开启单API模型dump。 | - -**dump目录说明** - -配置register_hook的dump_config后,采集的dump数据会在{dump_path}/{time}/{deviceid}/{model_id}目录下生成,例如“/home/HwHiAiUser/output/20200808163566/0/0” - -```bash -├── 20230131172437 -│   └── 1 -│   ├── 0 -│   │   ├── Add.Add.45.0.1675157077183551 -│   │   ├── Cast.trans_Cast_0.31.0.1675157077159449 -│   │   ├── Cast.trans_Cast_5.43.0.1675157077180129 -│   │   ├── MatMul.MatMul.39.0.1675157077172961 -│   │   ├── Mul.Mul.29.0.1675157077155731 -│   │   ├── NPUAllocFloatStatus.NPUAllocFloatStatus.24.0.1675157077145262 -│   │   ├── TransData.trans_TransData_1.33.0.1675157077162791 -│   │   └── TransData.trans_TransData_4.41.0.1675157077176648 -│   ├── 1701737061 -│   │   └── Cast.trans_Cast_2.35.0.1675157077166214 -│   ├── 25 -│   │   └── NPUClearFloatStatus.NPUClearFloatStatus.26.0.1675157077150342 -│   └── 68 -│   └── TransData.trans_TransData_3.37.0.1675157077169473 -``` - -## dump数据存盘说明 - -dump结果目录结构示例如下: - -```bash -├── dump_path -│ └── ptdbg_dump_{version} -│ ├── rank0 -│ │ ├── dump -| | | ├── Tensor_permute_1_forward.npy -| | | ... -| | | └── Fcuntion_linear_5_backward_output.npy -│ │ └── dump.pkl -│ ├── rank1 -| | ├── dump -| | | └── ... -| | └── dump.pkl -│ ├── ... -│ | -| └── rank7 -``` - -其中ptdbg_dump_{version}为未设置set_dump_path的dump_tag参数时的默认命名;rank为设备上各卡的ID,每张卡上dump的数据会生成对应dump目录。 - -当使用debugger方式dump数据时,配置了PrecisionDebugger模块的step=[]参数,dump结果目录则以step为父目录,例如配置step=[0,1,2]时,dump结果目录为: - -``` -├── dump_path -│ └── step0 -│ | └── ptdbg_dump_{version} -│ | | ├── rank0 -│ | | ├── ... -│ | | ├── rank7 -| ├── step1 -| | | ├── ... -│ └── step2 -``` - -**精度比对dump场景** - -精度比对dump场景的结果如下: - -* dump.pkl文件:包含dump数据的API名称、dtype、 shape以及各数据的max、min、mean统计信息。 - -* dump目录:目录下为npy格式的dump数据。 - - npy文件保存的前缀和PyTorch对应关系如下 - - | 前缀 | Torch模块 | - | ---------- | ------------------- | - | Tensor | torch.Tensor | - | Torch | torch | - | Functional | torch.nn.functional | - | NPU | NPU亲和算子 | - | VF | torch._VF | - -当set_dump_switch或configure_hook配置mode参数(例如:mode="api_stack" )时,dump结果的文件名会添加api_stack前缀,dump结果如下: - -* api_stack_dump.pkl -* api_stack_dump目录 - -**溢出检测dump场景** - -register_hook设置了overflow_check时,检测API溢出,dump结果的文件名格式为:`{api_type}___{api_name}___{API调用次数}_{前向反向}_{当前溢出次数}`,dump结果示例如下: - -* `Tensor___add___1_forward_1.pkl` -* `Tensor___add___1_forward_1`目录 - -## CPU或GPU与NPU精度数据比对 - -### 总体说明 - -- 本节主要介绍CPU或GPU与NPU精度数据比对的函数以及示例。 - -- 比对函数均通过单独创建精度比对脚本执行,可支持单卡和多卡场景的精度数据比对。 - -- 工具性能:比对数据量较小时(参考值单份文件小于10GB),参考比对速度0.1GB/s;比对数据量较大时,参考比对速度0.3GB/s。 - 推荐环境配置:独占环境,CPU核心数192,固态硬盘(IO速度参考:固态硬盘 > 500MB/s,机械硬盘60 ~ 170MB/s)。 - - 用户环境性能弱于标准约束或非独占使用的比对速度酌情向下浮动。比对速度的计算方式:两份比对文件大小/比对耗时。 - -### 约束 - -- NPU自研API,在CPU或GPU若没有对应的API,该API的dump数据不比对。 - -- NPU与CPU或GPU的计算结果误差可能会随着模型的执行不断累积,最终会出现同一个API因为输入的数据差异较大而无法比对的情况。 - -- CPU或GPU与NPU中两个相同的API会因为调用次数不同导致无法比对或比对到错误的API,不影响整体运行,该API忽略。 - -### compare_distributed - -**功能说明** - -将CPU或GPU与NPU的dump文件进行比对,支持单卡和多卡,可同时比对多卡的dump数据。多机场景需要每个设备单独执行比对操作。可自动检索和匹配对应卡和进程所dump的数据文件,再调用compare进行比对。单机单卡时与compare函数二选一。 - -**函数原型** - -```python -compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| -------------- | ------------------------------------------------------------ | -------- | -| npu_dump_dir | 配置NPU环境下的dump目录,即set_dump_path函数的dump_tag参数对应的目录名称。参数示例:'./npu_dump/dump_conv2d_v2.0'。 | 是 | -| bench_dump_dir | 配置CPU、GPU或NPU环境下的dump目录,即set_dump_path函数的dump_tag参数对应的目录名称。参数示例:'./gpu_dump/dump_conv2d_v2.0'。 | 是 | -| output_path | 配置比对结果csv文件存盘目录。需要预先创建output_path目录。参数示例:'./output'。文件名称基于时间戳自动生成,格式为:`compare_result_rank{npu_ID}-rank{cpu/gpu/npu_ID}_{timestamp}.csv`。 | 是 | -| **kwargs | 支持compare的所有可选参数。 | 否 | - -**函数示例** - -创建比对脚本,例如compare_distributed.py,拷贝如下代码,具体参数请根据实际环境修改。 - -```python -from ptdbg_ascend import * -compare_distributed('./npu_dump/ptdbg_dump_v2.0', './gpu_dump/ptdbg_dump_v2.0', './output') -``` - -### compare - -**功能说明** - -将CPU或GPU与NPU的dump文件进行比对,仅支持单机单卡。 - -**函数原型** - -```python -compare(input_param, output_path, stack_mode=False, auto_analyze=True, fuzzy_match=False) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ------------ | ------------------------------------------------------------ | -------- | -| input_param | 配置dump数据文件及目录。配置参数包括:
- "npu_pkl_path":指定NPU dump目录下的.pkl文件。参数示例:"npu_pkl_path": "./npu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump.pkl"。必选。
- "bench_pkl_path":指定CPU、GPU或NPU dump目录下的.pkl文件。参数示例:"bench_pkl_path": "./gpu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump.pkl"。必选。
- "npu_dump_data_dir":"指定NPU dump目录下的dump数据目录。参数示例:"npu_dump_data_dir": "./npu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump"。必选。
- "bench_dump_data_dir":"指定CPU、GPU或NPU dump目录下的dump数据目录。参数示例:"npu_dump_data_dir": "./gpu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump"。必选。
- "is_print_compare_log":配置是否开启日志打屏。可取值True或False。可选。 | 是 | -| output_path | 配置比对结果csv文件存盘目录。参数示例:'./output'。文件名称基于时间戳自动生成,格式为:`compare_result_{timestamp}.csv`。 | 是 | -| stack_mode | 配置stack_mode的开关。仅当dump数据时配置set_dump_switch的mode="api_stack"时需要开启。参数示例:stack_mode=True,默认为False。 | 否 | -| auto_analyze | 自动精度分析,开启后工具自动针对比对结果进行分析,识别到第一个精度不达标节点(在比对结果文件中的“Accuracy Reached or Not”列显示为No),并给出问题可能产生的原因(打屏展示并生成advisor_{timestamp}.txt文件)。可取值True或False,参数示例:auto_analyze=False,默认为True。 | 否 | -| fuzzy_match | 模糊匹配。开启后,对于网络中同一层级且命名仅调用次数不同的API,可匹配并进行比对。可取值True或False,参数示例:fuzzy_match=True,默认为False。 | 否 | - -**函数示例** - -单机单卡场景下创建比对脚本,例如compare.py,拷贝如下代码,具体参数请根据实际环境修改。 - -```python -from ptdbg_ascend import * -dump_result_param={ -"npu_pkl_path": "./npu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump.pkl", -"bench_pkl_path": "./gpu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump.pkl", -"npu_dump_data_dir": "./npu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump", -"bench_dump_data_dir": "./gpu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump", -"is_print_compare_log": True -} -compare(dump_result_param, "./output", stack_mode=True) -``` - -### parse - -**功能说明** - -解析并提取dump信息中的堆栈信息及数据统计信息。 - -**函数原型** - -```python -parse(pkl_file, moudule_name_prefix) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ------------------- | ------------------------------------------------------------ | -------- | -| pkl_file | 指定dump数据文件中的pkl文件名。参数示例:"./npu_dump/ptdbg_dump_v2.0/rank0/dump.pkl"。 | 是 | -| moudule_name_prefix | 指定待提取的API接口前缀。参数示例:"Torch_norm_1_forward"。 | 是 | - -**函数示例** - -创建堆栈信息及数据统计信息提取脚本,例如parse.py,拷贝如下代码,具体参数请根据实际环境修改。 - -```python -from ptdbg_ascend import * -parse("./npu_dump/ptdbg_dump_v2.0/rank0/dump.pkl", "Torch_batch_normal_1_forward") -``` - -### 计算精度评价指标 - -PyTorch精度比对是以CPU或GPU的计算结果为标杆,计算Cosine(余弦相似度)、MaxAbsErr(最大绝对误差)和MaxRelativeErr(最大相对误差),根据这两个结果判断API在运行时是否存在精度问题。 - -计算精度评价指标: - -1. Cosine:通过计算两个向量的余弦值来判断其相似度,数值越接近于1说明计算出的两个张量越相似,实际可接受阈值为大于0.99。在计算中可能会存在nan,主要由于可能会出现其中一个向量为0。 - -2. MaxAbsErr:当最大绝对误差越接近0表示其计算的误差越小,实际可接受阈值为小于0.001。 - -3. MaxRelativeErr:当最大相对误差越接近0表示其计算的误差越小。 - - 当dump数据中存在0或Nan时,比对结果中最大相对误差则出现inf或Nan的情况,属于正常现象。 - -精度比对结果csv文件中只需要通过Accuracy Reached or Not来判断计算精度是否达标,判断标准如下: - -1. Cosine < 0.99 且 MaxAbsError > 0.001时,精度不达标,标记为“No”。 -2. Cosine < 0.9,精度不达标,标记为“No”。 -3. MaxAbsError > 1,精度不达标,标记为“No”。 -4. 其余情况下记为精度达标,标记为“Yes”。 - -## ptdbg_ascend.parse数据解析功能 - -ptdbg_ascend.parse为命令行交互式界面解析工具,提供更多的数据解析功能并且展示结果。 - -主要的使用场景包括: - -- 支持指定ACL层级算子数据比对。 -- 支持指定ACL层级算子数据转换及展示。 -- 支持交互式指定pkl文件中API对应dump数据查看。 -- 支持API进行可选层级比对和打印(统计级和像素级)。 - -安装ptdbg_ascend工具后,可以通过使用命令 **python -m ptdbg_ascend.parse** 进入交互式界面,可在parse的界面中执行Shell命令,以及上述场景的相关解析命令。Ctrl+C可以退出该界面。 - -### ACL层级算子数据比对 - -- 依赖:CANN包中的msaccucmp工具。 - -- 输入以下比对命令进行数据比对。 - - ```bash - vc -m my_dump_path -g golden_dump_path [-out output_path] - ``` - - | 参数名称 | 说明 | 是否必选 | - | -------- | ------------------------------------------------------------ | -------- | - | -m | 待比对dump数据目录。 | 是 | - | -g | dump数据目录。 | 是 | - | -out | 结果输出目录。 | 否 | - | -asc | 指定msaccucmp路径,默认路径为:/usr/local/Ascend/ascend-toolkit/latest/tools/operator_cmp/compare/msaccucmp.py。 | 否 | - - - 输出结果:result_{timestamp}.csv文件。 - - 若指定-out参数需要用户传入输出路径,并且路径需要已存在。 - - 若未指定输出目录, 则比对结束后将结果保存在默认目录 “./parse_data/comapre_result”中,比对结束后会打印log提示输出结果存放路径。 - -**示例** - -```bash -# 传入待比对数据目录以及标杆数据目录 -Parse >>> vc -m ./my_dump_path -g ./golden_data_path -...... -# 比对结果打印 -[INFO] The comparison result have been written to "./parse_data/compare_result/result_20230818104735.csv". -[INFO] The command was completed and took 6 seconds. -[INFO] Compare finished!! -``` - -### ACL算子数据的npy转换 - -- 依赖:CANN包中的msaccucmp工具。 - -- 输入以下转换命令进行数据转换, 将ACL级别dump数据转为npy文件。 - - ```bash - dc -n file_name/file_path [-f format] [-out output_path] - ``` - - | 参数名称 | 说明 | 是否必选 | - | -------- | ------------------------------------------------------------ | -------- | - | -n | 需转换的dump数据文件或dump数据文件目录。 | 是 | - | -f | 开启format转换,指定该参数时需要配置format格式,若未指定该参数,则直接转换为npy格式。 | 否 | - | -out | 结果输出目录。 | 否 | - | -asc | 指定msaccucmp路径,默认路径为:/usr/local/Ascend/ascend-toolkit/latest/tools/operator_cmp/compare/msaccucmp.py | 否 | - - [^]: 若传入单个dump文件,则转换单个文件,若传入dump文件目录则转换目录下所有dump文件。 - - - 输出结果:npy文件。 - - 若指定-out参数需要用户传入输出路径,并且路径需要已存在。 - - 若未指定输出目录, 则比对结束后将结果保存在默认目录 “./parse_data/convert_result”中,比对结束后会打印log提示输出结果存放路径及转换结果。 - -- 输入以下命令,展示npy数据统计信息。 - - ```bash - pt -n file_path - ``` - - | 参数名称 | 说明 | 是否必选 | - | -------- | ------------- | -------- | - | -n | npy文件路径。 | 是 | - - 打印统计信息:shape, dtype, max, min和mean。 - -**示例1** - -```bash -# 传入需转换的dump文件目录 -Parse >>> dc -n ./dump_data/ -...... -# 转换结果 -╭──────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ SrcFile: ./dump_data/ -│ - Add.fp32_vars_add_2fp32_vars_Relu_9.31.5.1636595794731103.input.0.npy │ -│ - Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.output.0.npy │ -│ - Add.fp32_vars_add_2fp32_vars_Relu_9.31.5.1636595794731103.input.1.npy │ -│ - Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.input.1.npy │ -│ - Add.fp32_vars_add_3fp32_vars_Relu_12.40.5.1636595794846124.input.1.npy │ -│ - Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.input.0.npy │ -│ - Add.fp32_vars_add_3fp32_vars_Relu_12.40.5.1636595794846124.input.0.npy │ -│ - Add.fp32_vars_add_2fp32_vars_Relu_9.31.5.1636595794731103.output.0.npy │ -│ - Add.fp32_vars_add_3fp32_vars_Relu_12.40.5.1636595794846124.output.0.npy │ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────╯ -``` - -**示例2** - -```bash -# 查看某个dump数据块的数据信息 -# 默认会将数据中的tensor保存成 txt -Parse >>> pt -n ./parse_data/dump_convert/Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.output.0.npy -...... -# 打印统计信息 -[Shape: (1, 16, 56, 56, 16)] [Dtype: float16] [Max: 452.0] [Min: -408.5] [Mean: -3.809] -Path: ./parse_data/dump_convert/Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.input.0.npy -TextFile:./parse_data/dump_convert/Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.input.0.npy.txt -``` - -### pkl文件中指定API的dump数据信息查看 - -- 输入以下命令,解析并输出pkl文件中指定api的统计信息。 - - ```bash - pk -f pkl_path -n api_name - ``` - - | 参数名称 | 说明 | 是否必选 | - | -------- | ----------------- | -------- | - | -f | 指定pkl文件路径。 | 是 | - | -n | 指定API名称。 | 是 | - - - 输出结果:打印统计信息(shape, dtype, max和min mean)。 - - 若pkl文件中存在相应的堆栈信息,则会打印堆栈信息。 - -**示例** - -```bash -# 传入pkl文件及api名称 -Parse >>> pk -f ./torch_dump/ptdbg_v3.2/rank0/api_stack_dump.pkl -n Functional_conv2d_0_forward -...... -# 打印统计信息及堆栈(pkl文件不包含堆栈则不会打印堆栈) - -Statistic Info: - [Functional_conv2d_0_forward_input.0][dtype: torch.float32][shape: [2, 1, 2, 2]][max: 1.576936960220337][min: -0.9757485389709473][mean: 0.4961632490158081] - [Functional_conv2d_0_forward_input.1][dtype: torch.float32][shape: [2, 1, 2, 2]][max: 0.20064473152160645][min: -0.47102075815200806][mean: -0.20796933770179749] - [Functional_conv2d_0_forward_input.2][dtype: torch.float32][shape: [2]][max: 0.17380613088607788][min: -0.16853803396224976][mean: 0.0026340484619140625] - [Functional_conv2d_0_forward_output][dtype: torch.float32][shape: [2, 2, 1, 1]][max: 0.02364911139011383][min: -1.762906551361084][mean: -0.6710853576660156] -``` - -### API可选层级比对 - -- 输入以下命令, 进行统计级和像素级比对。 - - ```bash - cn -m my_data*.npy -g gloden*.npy [-p num] [-al atol] [-rl rtol] - ``` - - - 统计级比对:对tensor整体进行余弦值及相对误差的计算。 - - 像素级比对:对输入的两个npy文件进行逐元素比对。若两个tensor对应元素的相对误差或绝对误差大于**误差阈值**(-al和-rl配置)则被标记为错误数据。 - - | 参数名称 | 说明 | 是否必选 | - | -------- | ----------------------------------------------- | -------- | - | -m | 待比对数据。 | 是 | - | -g | 标杆数据。 | 是 | - | -p | 设置比对结束后打印错误元素的个数,默认值20。 | 否 | - | -al | 判定数据存在精度问题的绝对误差阈值,默认0.001。 | 否 | - | -rl | 判定数据存在精度问题的相对误差阈值,默认0.001。 | 否 | - | -s | 将npy文件保存成txt文件,用于查看,默认开启。 | 否 | - - 输出结果: - - - 统计级比对结果。 - - 两个文件的统计信息(shape, dtype, max, min和mean)。 - - 错误数据打印表格。 - -**示例** - -```bash -# 对比两个tensor的数据 -Parse >>> cn -m Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy -g InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy -p 10 -s -al 0.002 -rl 0.005 - Error Item Table Top Item Table -┏━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ ┏━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ -┃ Index ┃ Left ┃ Right ┃ Diff ┃ ┃ Index ┃ Left ┃ Right ┃ Diff ┃ -┡━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ ┡━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ -│ 155 │ 0.024600908 │ 0.022271132 │ 0.002329776 │ │ 0 │ -0.9206961 │ -0.9222216 │ 0.0015255213 │ -│ 247 │ 0.015752593 │ 0.017937578 │ 0.0021849852 │ │ 1 │ -0.6416973 │ -0.64051837 │ 0.0011789203 │ -│ 282 │ -0.0101207765 │ -0.007852031 │ 0.0022687456 │ │ 2 │ -0.35383835 │ -0.35433492 │ 0.0004965663 │ -│ 292 │ 0.019581757 │ 0.02240482 │ 0.0028230622 │ │ 3 │ -0.18851271 │ -0.18883198 │ 0.00031927228 │ -│ 640 │ -0.06593232 │ -0.06874806 │ 0.0028157383 │ │ 4 │ -0.43508735 │ -0.43534422 │ 0.00025686622 │ -│ 1420 │ 0.09293677 │ 0.09586689 │ 0.0029301196 │ │ 5 │ 1.4447614 │ 1.4466647 │ 0.0019032955 │ -│ 1462 │ -0.085207745 │ -0.088047795 │ 0.0028400496 │ │ 6 │ -0.3455438 │ -0.3444429 │ 0.0011008978 │ -│ 1891 │ -0.03433288 │ -0.036525503 │ 0.002192624 │ │ 7 │ -0.6560242 │ -0.6564579 │ 0.0004336834 │ -│ 2033 │ 0.06828873 │ 0.07139922 │ 0.0031104907 │ │ 8 │ -2.6964858 │ -2.6975214 │ 0.0010356903 │ -│ 2246 │ -0.06376442 │ -0.06121233 │ 0.002552092 │ │ 9 │ -0.73746175 │ -0.73650354 │ 0.00095820427 │ -└───────┴───────────────┴──────────────┴──────────────┘ └───────┴─────────────┴─────────────┴───────────────┘ -╭───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ Left: | -│ |- NpyFile: ./dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy | -│ |- TxtFile: ./dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy.txt | -│ |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.846897] [Min: -8.368301] [Mean: -0.72565556] | -│ DstFile: │ -│ |- NpyFile: ./dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy | -│ |- TxtFile: ./dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy.txt | -│ |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.8425903] [Min: -8.374472] [Mean: -0.7256237] │ -│ NumCnt: 655360 │ -│ AllClose: False │ -│ CosSim: 0.99999493 │ -│ ErrorPer: 0.023504638671875 (rl= 0.005, al= 0.002) │ -╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -``` - -## FAQ - -[FAQ](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/doc/FAQ.md) diff --git "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.T2.md" "b/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.T2.md" deleted file mode 100644 index a41c34834623468e023dc215f244f312d6312227..0000000000000000000000000000000000000000 --- "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.T2.md" +++ /dev/null @@ -1,1665 +0,0 @@ -# **PyTorch精度工具使用指南** - -本文主要介绍PyTorch精度工具精度工具ptdbg_ascend的使用以及精度比对场景示例。 - -ptdbg_ascend工具的原理及安装请参见《[PyTorch精度工具](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/README.md)》。 - -## PyTorch精度比对总体流程 - -1. 准备CPU或GPU训练工程。 - -2. 在环境下安装ptdbg_ascend工具。 - -3. 在训练脚本内插入ptdbg_ascend工具dump接口。 - -4. 执行训练dump数据。 - -5. 将CPU或GPU训练工程迁移为NPU训练工程。 - - 请参见《[PyTorch模型迁移和训练指南](https://www.hiascend.com/document/detail/zh/canncommercial/63RC1/modeldevpt/ptmigr/ptmigr_0001.html)》。 - -6. 在NPU环境下安装ptdbg_ascend工具。 - -7. 在NPU训练脚本内插入ptdbg_ascend工具dump接口。 - -8. NPU环境下执行训练dump数据。 - -9. 创建并配置精度比对脚本,例如compare.py。 - -10. 执行CPU或GPU dump与NPU dump数据的精度比对。 - -11. 比对结果分析。 - -## 场景化示例 - -本章节主要介绍通过ptdbg_ascend工具进行精度比对和分析,主要使用“**CPU或GPU及NPU精度数据dump**”和“**CPU或GPU与NPU精度数据比对**”章节中介绍的ptdbg_ascend工具接口。 - -### 单卡场景精度比对 - -**精度分析建议** - -PyTorch训练场景的精度问题分析建议参考以下思路进行精度比对和比对结果分析: - -1. 整网比对:dump整网数据并进行精度比对,初步定位异常范围。 -2. 缩小范围:根据Accuracy Reached or Not找出不符合精度标准的API。 -3. 范围比对:对不符合精度标准的API重新dump。 -4. 分析原因并优化:分析API精度不符合标准的原因并进行优化调整。 -5. 整网比对:重新进行整网比对,判断优化后的API是否已符合精度标准以及是否出现新的精度问题。 -6. 重复1~5步,直到不存在精度问题为止。 - -**精度分析示例** - -1. dump整网数据。 - - 分别dump CPU或GPU以及NPU数据,在PyTorch训练脚本插入dump接口,示例代码如下(下面以NPU为例,CPU或GPU dump基本相同): - - ```python - from ptdbg_ascend import * - - # 在main函数开始前固定随机数 - seed_all() - - # 配置dump数据目录路径和名称 - set_dump_path("./npu_dump", dump_tag='all') - - # 注册dump回调函数 - register_hook(model, acc_cmp_dump) - - ... - - # 在第一个迭代开始的位置开启dump和堆栈模式,同时为保证数据完整性开启dump bool和整型的tensor以及浮点、bool和整型的标量 - set_dump_switch("ON", mode="api_stack", filter_switch="OFF") - - ... - - # 在第一个迭代结束的位置关闭dump - set_dump_switch("OFF") - ``` - -2. 比对整网数据。 - - 第1步中的NPU dump数据文件为npu_dump.pkl,假设NPU dump npy数据目录为npu_dump,GPU dump数据文件为gpu_dump.pkl,GPU dump npy数据目录为gpu_dump。 - - 创建并配置精度比对脚本,以创建compare.py为例,示例代码如下: - - ```python - from ptdbg_ascend import * - dump_result_param={ - "npu_pkl_path": "./npu_dump/all_v2.0/rank0/api_stack_dump.pkl", - "bench_pkl_path": "./gpu_dump/all_v2.0/rank0/api_stack_dump.pkl", - "npu_dump_data_dir": "./npu_dump/all_v2.0/rank0/api_stack_dump", - "bench_dump_data_dir": "./gpu_dump/all_v2.0/rank0/api_stack_dump", - "is_print_compare_log": True - } - compare(dump_result_param, "./output") - ``` - - 执行比对: - - ```bash - python3 compare.py - ``` - - 在output目录下生成结果文件,包括:`compare_result_{timestamp}.csv`和`advisor_{timestamp}.txt` - -3. 找出存在问题的API。 - - 1. 根据`advisor_{timestamp}.txt`或打屏信息的提示,可找到存在精度问题的算子(Suspect Nodes)和专家建议(Expert Advice) - - ![auto_analyze_log](img/auto_analyze_log.png) - - 2. 根据第2步结果文件`compare_result_{timestamp}.csv`中的Accuracy Reached or No字段显示为NO的API,针对该API执行后续比对操作,分析该API存在的精度问题。 - -4. (可选)提取指定API的堆栈信息和dump数据统计信息。 - - 通过parse接口可以清晰的显示特定API的堆栈信息和dump数据统计信息,结合堆栈信息分析代码中可能存在的精度问题。 - - 创建并配置提取脚本,以创建parse.py为例,示例代码如下: - - ```python - from ptdbg_ascend import * - - # 提取dump信息中第1次调用的API:Torch_batch_normal的堆栈信息及数据统计信息 - parse("./npu_dump/all_v2.0/rank0/api_stack_dump.pkl", "Torch_batch_normal_1_forward") - ``` - - 执行提取: - - ```bash - python3 parse.py - ``` - - - -5. (可选)指定API dump数据。 - - - dump指定前向API的ACL级别数据 - - ```python - from ptdbg_ascend import * - - # 固定随机数,开启确定性计算 - seed_all(mode=True) - set_dump_path("./dump_path", dump_tag='forward') - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - - # dump指定前向API的ACL级别数据、bool和整型的tensor以及浮点、bool和整型的标量 - set_dump_switch("ON", mode="acl", scope=["Tensor_permute_1_forward"], filter_switch="OFF") - - ... - - set_dump_switch("OFF") - ``` - - - dump指定反向API的ACL级别数据 - - ```python - from ptdbg_ascend import * - - # 固定随机数,开启确定性计算 - seed_all(mode=True) - set_dump_path("./dump_path", dump_tag='backward') - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - - # dump指定反向API的ACL级别数据、bool和整型的tensor以及浮点、bool和整型的标量 - set_dump_switch("ON", mode="acl", scope=["Functional_conv2d_1_backward"], filter_switch="OFF") - set_backward_input(["./npu_dump/all_v2.0/rank0/api_stack_dump/Functional_conv2d_1_backward_input.0.npy"]) - - ... - - set_dump_switch("OFF") - ``` - -6. (可选)重新比对。 - - 根据第4或5步的dump数据重新配置compare.py并执行比对,可以对单API模型进行问题复现。 - -**注意事项** - -* dump_mode="acl"场景下,会增加npu的内存消耗,请谨慎开启。 -* 部分API存在调用嵌套关系,比如functional.batch_norm实际调用torch.batch_norm,该场景会影响acl init初始化多次,导致功能异常。 - -### 多卡场景精度比对 - -精度工具支持多卡场景的精度比对,多卡场景的dump步骤与单卡场景完全一致,请参见“**单卡场景精度比对**”章节,不同的是多卡数据精度比对时需要使用“compare_distributed”函数进行比对。如下示例: - -说明:多机多卡场景需要每个设备单独执行比对操作。 - -假设NPU dump npy数据目录为npu_dump/dump_conv2d_v1.0,GPU dump npy数据目录为gpu_dump/dump_conv2d_v1.0。 - -1. 创建比对脚本,例如compare_distributed.py,拷贝如下代码。 - - ```python - from ptdbg_ascend import * - compare_distributed('./npu_dump/ptdbg_dump_v2.0', './gpu_dump/ptdbg_dump_v2.0', './output') - ``` - -2. 执行比对: - - ```bash - python3 compare_distributed.py - ``` - -两次运行须用相同数量的卡,传入`compare_distributed`的两个文件夹下须有相同个数的rank文件夹,且不包含其他无关文件,否则将无法比对。 - -**多卡set_dump_path注意事项** - -多卡一般为多进程,须保证每个进程都正确调用set_dump_path,或把set_dump_path插入到import语句后,如: - -```python -from ptdbg_ascend import * -seed_all() -set_dump_path('./dump_resnet') -``` - -如此可保证set_dump_path在每个进程都被调用。 - -**多卡register_hook注意事项** - -register_hook需要在set_dump_path之后调用,也需要在每个进程上被调用,建议在搬运模型数据到卡之后调用。识别方法如下: - -- 找到训练代码中遍历epoch的for循环或遍历数据集的for循环,把register_hook放到循环开始前即可。 -- 找到训练代码中调用DDP或者DistributedDataParallel的代码行,把register_hook放到该代码行所在的代码块之后。 -- 若代码中均无以上两种情况,需要保证register_hook在模型定义之后插入,并配置rank参数。rank参数获取rank_id请参见“**[rank_id获取方法](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/doc/rank_id获取方法.md)**”。 - -### NPU vs NPU精度比对 - -对于NPU vs NPU场景,是针对同一模型,进行迭代(模型、API版本升级或设备硬件升级)时存在的精度下降问题,对比相同模型在迭代前后版本的API计算数值,进行问题定位。 - -一般情况下迭代涉及NPU自定义算子,因此,可以仅dump NPU自定义算子进行比对。比对精度问题分析请参见“**单卡场景精度比对**”章节。 - -工具当前支持dump NPU自定义算子如下: - -| 序号 | NPU自定义算子 | -| :--- | ----------------------------------- | -| 1 | torch_npu.one_ | -| 2 | torch_npu.npu_sort_v2 | -| 3 | torch_npu.npu_transpose | -| 4 | torch_npu.npu_broadcast | -| 5 | torch_npu.npu_dtype_cast | -| 6 | torch_npu.empty_with_format | -| 7 | torch_npu.npu_one_hot | -| 8 | torch_npu.npu_stride_add | -| 9 | torch_npu.npu_ps_roi_pooling | -| 10 | torch_npu.npu_roi_align | -| 11 | torch_npu.npu_nms_v4 | -| 12 | torch_npu.npu_iou | -| 13 | torch_npu.npu_nms_with_mask | -| 14 | torch_npu.npu_pad | -| 15 | torch_npu.npu_bounding_box_encode | -| 16 | torch_npu.npu_bounding_box_decode | -| 17 | torch_npu.npu_batch_nms | -| 18 | torch_npu.npu_slice | -| 19 | torch_npu._npu_dropout | -| 20 | torch_npu.npu_indexing | -| 21 | torch_npu.npu_ifmr | -| 22 | torch_npu.npu_max | -| 23 | torch_npu.npu_scatter | -| 24 | torch_npu.npu_layer_norm_eval | -| 25 | torch_npu.npu_alloc_float_status | -| 26 | torch_npu.npu_get_float_status | -| 27 | torch_npu.npu_clear_float_status | -| 28 | torch_npu.npu_confusion_transpose | -| 29 | torch_npu.npu_bmmV2 | -| 30 | torch_npu.fast_gelu | -| 31 | torch_npu.npu_sub_sample | -| 32 | torch_npu.npu_deformable_conv2d | -| 33 | torch_npu.npu_mish | -| 34 | torch_npu.npu_anchor_response_flags | -| 35 | torch_npu.npu_yolo_boxes_encode | -| 36 | torch_npu.npu_grid_assign_positive | -| 37 | torch_npu.npu_normalize_batch | -| 38 | torch_npu.npu_masked_fill_range | -| 39 | torch_npu.npu_linear | -| 40 | torch_npu.npu_bert_apply_adam | -| 41 | torch_npu.npu_giou | -| 42 | torch_npu.npu_ciou | -| 43 | torch_npu.npu_diou | -| 44 | torch_npu.npu_sign_bits_pack | -| 45 | torch_npu.npu_sign_bits_unpack | -| 46 | torch_npu.npu_flash_attention | -| 47 | torch_npu.npu_scaled_masked_softmax | -| 48 | torch_npu.npu_rotary_mul | -| 49 | torch_npu.npu_roi_align | -| 50 | torch_npu.npu_roi_alignbk | -| 51 | torch_npu.npu_ptiou | -| 52 | torch_npu.npu_fusion_attention | - -### 通信API的数据dump - -通信类API数据可以使用全量dump方式获取,若只dump通信类API数据,可以使用如下示例: - -```python -debugger.configure_hook(mode="api_list", api_list=["distributed"]) -``` - -或 - -```python -set_dump_switch("ON", mode="api_list", api_list=["distributed"]) -``` - -通信类API支持列表: - -| 序号 | Distributed | -| :--- | -------------------- | -| 1 | send | -| 2 | recv | -| 3 | broadcast | -| 4 | all_reduce | -| 5 | reduce | -| 6 | all_gather | -| 7 | gather | -| 8 | isend | -| 9 | irecv | -| 10 | scatter | -| 11 | reduce_scatter | -| 12 | _reduce_scatter_base | -| 13 | _all_gather_base | - -### 溢出检测场景 - -溢出检测是针对NPU的PyTorch API,检测是否存在溢出的情况。当前仅支持识别aicore浮点溢出。 - -溢出检测原理:针对溢出阶段,开启acl dump模式,重新对溢出阶段执行,落盘数据。 - -建议按照如下步骤操作: - -1. 在NPU环境下安装ptdbg_ascend工具。 - -2. 在NPU训练脚本内插入ptdbg_ascend工具溢出检测接口。 - - - 示例1:全量溢出检测 - - ```python - from ptdbg_ascend import * - seed_all() - # 配置溢出数据目录路径和名称 - set_dump_path("./overflow_dump") - ... - # 设置检测到3次溢出后退出训练 - register_hook(model, overflow_check, overflow_nums=3) - - ... - ``` - - 多卡使用时各卡单独计算溢出次数。 - - - 示例2:dump指定API的ACL级别溢出数据 - - ```python - from ptdbg_ascend import * - seed_all() - # 配置溢出数据目录路径和名称 - set_dump_path("./overflow_dump") - ... - # dump指定API的ACL级别溢出数据 - register_hook(model, overflow_check, dump_mode='acl', dump_config='./dump.json') - - # 在期望溢出检测的step位置开始前打开溢出检测开关 - set_overflow_check_switch("ON") - - ... - - # 在step结束的位置关闭溢出检测开关 - set_overflow_check_switch("OFF") - - ... - ``` - - - 示例3:dump指定反向API的ACL级别的溢出数据 - - 1. 进行全量溢出检测 - - ```python - from ptdbg_ascend import * - seed_all() - # 配置溢出数据目录路径和名称 - set_dump_path("./overflow_dump") - ... - # 设置检测到3次溢出后退出训练 - register_hook(model, overflow_check) - - ... - ``` - - 2. dump指定反向API的ACL级别的溢出数据 - - ```python - from ptdbg_ascend import * - seed_all() - # 配置溢出数据目录路径和名称 - set_dump_path("./overflow_dump") - ... - # dump指定反向API的ACL级别溢出数据 - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - set_dump_switch("ON", mode="acl", scope=["Functional_conv2d_1_backward"]) - set_backward_input(["./npu_dump/ptdbg_dump_v2.0/rank0/dump/Functional_conv2d_1_backward_input.0.npy"]) - ``` - - 针对前向溢出API,可以通过overflow_nums,配置允许的溢出次数,并将每次溢出API的全部ACL数据dump下来,到达指定溢出次数后停止,停止后会看到堆栈打印包含如下字段。 - - ```bash - ValueError: [overflow xxx times]: dump file is saved in 'xxxxx.pkl'. - ``` - - 其中xxx times为用户设置的次数,xxxxx.pkl为文件生成路径。 - -3. NPU环境下执行训练dump溢出数据。 - - 针对输入正常但输出存在溢出的API,会训练执行目录下将溢出的API信息dump并保存为`forward_info_{pid}.json`和`backward_info_{pid}.json`,通过 [Ascend模型精度预检工具](https://gitee.com/ascend/att/tree/master/debug/accuracy_tools/api_accuracy_checker)对json文件进行解析,输出溢出API为正常溢出还是非正常溢出,从而帮助用户快速判断。 - - 精度预检工具执行命令如下: - - ```bash - # 下载att代码仓后执行如下命令 - export PYTHONPATH=$PYTHONPATH:$ATT_HOME/debug/accuracy_tools/ - cd $ATT_HOME/debug/accuracy_tools/api_accuracy_checker/run_ut - python run_overflow_check.py -forward ./forward_info_0.json - ``` - - 反向过程溢出的API暂不支持精度预检功能。 - - 当重复执行溢出检测dump操作时,需要删除上一次dump目录下的溢出检测dump数据,否则将因重名而报错。 - -**注意事项** - -* dump_mode="acl"场景下,会增加npu的内存消耗,请谨慎开启。 -* 部分API存在调用嵌套关系,比如functional.batch_norm实际调用torch.batch_norm,该场景会影响acl init初始化多次,导致功能异常。 -* 混合精度动态loss scale场景下,正常训练会有"Gradient overflow. SKipping step"日志,添加溢出检测后日志消失,可以通过设置环境变量export OVERFLOW_DEBUG_MODE_ENABLE=1,并将register_hook位置调整amp.initialize之前解决。此功能需要cann包配套支持,不支持版本执行报错EZ3003。 - -## debugger方式dump和溢出检测(推荐) - -### PrecisionDebugger模块 - -**功能说明** - -PrecisionDebugger模块包含dump和溢出检测功能的总体配置项。可以指定dump目录,设置dump或溢出检测功能,指定dump的卡和迭代。 - -可以在from ptdbg_ascend import *和模型初始化之间的任意位置添加该模块。 - -**原型** - -```python -PrecisionDebugger(dump_path=None, hook_name=None, rank=None, step=[], enable_dataloader=False): -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ----------------- | ------------------------------------------------------------ | -------- | -| dump_path | 设置dump数据目录路径,参数示例:"./dump_path"。
默认在dump_path目录下生成`ptdbg_dump_{version}`目录,并在该目录下生成`dump.pkl`文件以及`dump`数据文件保存目录。
当**configure_hook**函数配置了mode参数时,`dump.pkl`文件以及`dump`数据文件保存目录名称添加mode参数值为前缀,详情请参见“**dump数据存盘说明**”。
未配置dump_path时,也可以通过环境变量ASCEND_WORK_PATH配置dump路径,此时dump数据将落盘在${ASCEND_WORK_PATH}/dump_data下,自定义配置dump_path优先级高于环境变量,dump_path和环境变量需要二选一。 | 否 | -| hook_name | dump模式,可取值dump和overflow_check,表示dump和溢出检测功能,二选一。 | 是 | -| rank | 指定对某张卡上的数据进行dump或溢出检测,默认未配置(表示dump所有卡的数据),须根据实际卡的Rank ID配置。应配置为大于0的正整数,且须根据实际卡的Rank ID配置,若所配置的值大于实际训练所运行的卡的Rank ID,则dump数据为空,比如当前环境Rank ID为0~7,实际训练运行0~3卡,此时若配置Rank ID为4或不存在的10等其他值,此时dump数据为空。 | 否 | -| step | 指定dump某个step的数据,默认未配置,须指定为训练脚本中存在的step。step为list格式,可配置逐个step,例如:step=[0,1,2];也可以配置step范围,例如:step=list(range(0,9)),表示dump第0到第8个step。 | 否 | -| enable_dataloader | 自动控制开关,可取值True(开启)或False(关闭),默认为False。配置为True后自动识别dump step参数指定的迭代,并在该迭代执行完成后退出训练,此时start和stop函数可不配置,开启该开关要求训练脚本是通过torch.utils.data.dataloader方式加载数据;配置为False则需要配置start和stop函数,并在最后一个stop函数后或一个step结束的位置添加debugger.step()。 | 否 | - -### configure_hook函数(可选) - -**功能说明** - -设置dump范围。 - -建议在**PrecisionDebugger**模块与模型初始化之间的任意位置添加,不添加此函数时默认使用mode="api_stack" dump整网数据。 - -**原型** - -dump: - -```python -debugger.configure_hook(mode="api_stack", scope=[], api_list=[], filter_switch="OFF", acl_config=None, backward_input=[], input_output_mode=["all"], summary_only=False) -``` - -溢出检测: - -```python -debugger.configure_hook(mode=None, acl_config=None, overflow_nums=1, need_replicate=False) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ----------------- | ------------------------------------------------------------ | -------- | -| mode | dump模式。可取值"all"、"list"、"range"、"stack"、"acl"、"api_list"、"api_stack",各参数含义请参见本节的“**函数示例**”。参数示例:mode="list"。默认为api_stack。该参数配置值将作为dump数据文件名的前缀,详情请参见“**dump数据存盘说明**”。 | 否 | -| scope或api_list | dump范围。根据model配置的模式选择dump的API范围,mode="api_list"时,需要配置api_list=[],其他模式有需要时配置scope=[]。参数示例:scope=["Tensor_permute_1_forward", "Tensor_transpose_2_forward"]、api_list=["relu"]。默认为空。 | 否 | -| filter_switch | dump bool和整型的tensor以及浮点、bool和整型的标量的过滤开关。可取值"ON"(表示开启过滤,即不dump)或"OFF"(表示关闭过滤)。参数示例:filter_switch="ON"。默认不配置,即filter_switch="OFF",表示dump上述数据。 | 否 | -| acl_config | acl dump的配置文件。mode="acl"时,该参数必选;mode为其他值时,该参数不选。参数示例:acl_config='./dump.json'。dump.json配置文件详细介绍请参见“**dump.json配置文件说明**”。 | 否 | -| backward_input | 该输入文件为首次运行训练dump得到反向API输入的.npy文件。例如若需要dump Functional_conv2d_1 API的反向过程的输入输出,则需要在dump目录下查找命名包含Functional_conv2d_1、backward和input字段的.npy文件。 | 否 | -| input_output_mode | dump数据过滤。可取值"all"、"forward"、"backward"、"input"和"output",表示仅保存dump的数据中文件名包含"forward"、"backward"、"input"和"output"的前向、反向、输入或输出的.npy文件。参数示例input_output_mode=["backward"]或input_output_mode=["forward", "backward"]。默认为all,即保存所有dump的数据。除了all参数只能单独配置外,其他参数可以自由组合。 | 否 | -| summary_only | dump npy文件过滤,可取值True或False,配置为True后仅dump保存API统计信息的pkl文件,参数示例:summary_only=False,默认为False。 | 否 | -| overflow_nums | 控制溢出次数,表示第N次溢出时,停止训练,过程中检测到溢出API对应ACL数据均dump。参数示例:overflow_nums=3。配置overflow_check时可配置,默认不配置,即检测到1次溢出,训练停止,配置为-1时,表示持续检测溢出直到训练结束。 | 否 | -| need_replicate | 过程dump数据生成开关,执行溢出检测时,dump目录下会生成forward_real_data和backward_real_data的过程dump数据目录,可取值True(生成)或False(不生成),默认不生成。 | 否 | - -**函数示例** - -configure_hook可配置多种dump模式,示例如下: - -说明:以下均以dump部分API数据为例,API名可以从首次dump整网数据的结果csv文件中的NPU Name或Bench Name列获取。 - -- 示例1:dump指定API列表 - - ```python - debugger.configure_hook(mode="list", scope=["Tensor_permute_1_forward", "Tensor_transpose_2_forward", "Torch_relu_3_backward"]) - ``` - -- 示例2:dump指定范围 - - ```python - debugger.configure_hook(mode="range", scope=["Tensor_abs_1_forward", "Tensor_transpose_3_forward"]) - ``` - -- 示例3:STACK模式,只dump堆栈信息 - - ```python - debugger.configure_hook(mode="stack", scope=["Tensor_abs_1_forward", "Tensor_transpose_3_forward"]) - ``` - -- 示例4:dump指定前向API的ACL级别数据 - - ```python - debugger.configure_hook(mode="acl", scope=["Tensor_permute_1_forward"], acl_config="./dump.json") - ``` - -- 示例4:dump指定反向API的ACL级别数据 - - ```python - debugger.configure_hook(mode="acl", scope=["Functional_conv2d_1_backward"], acl_config="./dump.json", backward_input=["./npu_dump/dump_conv2d_v2.0/rank0/dump/Functional_conv2d_1_backward_input.0.npy"]) - ``` - -- 示例5:dump指定某一类API的API级别输入输出数据 - - ```python - debugger.configure_hook(mode="api_list", api_list=["relu"]) - ``` - - mode="api_list"时不配置scope。 - -- 示例6:dump全部API级别输入输出数据以及相应堆栈信息 - - ```python - debugger.configure_hook(mode="api_stack") - ``` - - mode="api_stack"时不配置scope。 - -- 示例7: dump全部API级别输入输出数据并包含bool和整型的tensor以及浮点、bool和整型的标量,配置为OFF,会dump bool和整型数据 - - ```python - debugger.configure_hook(filter_switch="OFF") - ``` - - 配置filter_switch="OFF"同时也可以配置mode、scope和api_list,除dump ACL级别数据。 - -- 示例8:仅保存dump的数据文件名包含“backward”的反向.npy文件 - - ```python - debugger.configure_hook(input_output_mode=["backward"]) - ``` - -- 示例9:仅dump pkl文件 - - ```python - debugger.configure_hook(summary_only=True) - ``` - -- 示例10:溢出检测dump - - ```python - debugger.configure_hook(overflow_nums=1) - ``` - - dump执行时会在**PrecisionDebugger**模块的dump_path参数指定的目录下生成ptdbg_dump_{version}目录,保存溢出数据。 - - 多卡场景时,需要检测到至少有一张卡溢出次数达到overflow_nums时,训练结束。 - - 仅支持NPU环境。 - -- 示例11:dump溢出API的ACL级别数据 - - ```python - debugger.configure_hook(mode="acl", acl_config="./dump.json") - ``` - - 该场景会在原有数据基础上,额外在dump.json文件配置的dump_path目录下生成一份ACL算子数据,该数据可通过“**ptdbg_ascend.parse**”工具进行解析。 - - 仅支持NPU环境。 - -### start函数(可选) - -**功能说明** - -dump或溢出检测启动函数。 - -在模型初始化之后的任意位置添加。 - -**原型** - -```python -debugger.start() -``` - -该函数为类函数,可以使用debugger.start()也可以使用PrecisionDebugger.start()。 - -### stop函数(可选) - -**功能说明** - -dump或溢出检测停止函数。 - -在**start**函数之后的任意位置添加。 - -**原型** - -```python -debugger.stop() -``` - -该函数为类函数,可以使用debugger.stop()也可以使用PrecisionDebugger.stop()。 - -### 示例代码(自动模式) - -- 示例1:开启dump - - ```python - from ptdbg_ascend import * - debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0,2], enable_dataloader=True) - # 请勿将以上初始化流程插入到循环代码中 - ``` - -- 示例2:开启溢出检测dump - - ```python - from ptdbg_ascend import * - debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="overflow_check", step=[0,2], enable_dataloader=True) - # 请勿将以上初始化流程插入到循环代码中 - ``` - -### 示例代码(手动模式) - -一般情况下使用自动模式可以快速方便进行dump操作,但个别大模型可能在部分卡的训练操作中没有调用dataloader,这会导致自动模式无法dump指定迭代的数据,此时需要关闭自动模式手动在迭代前后插入start()和stop()函数,并在最后一个stop函数后或一个step结束的位置添加debugger.step()以标识dump结束。 - -- 示例1:开启dump - - ```python - from ptdbg_ascend import * - debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) - # 请勿将以上初始化流程插入到循环代码中 - - # 模型初始化 - # 下面代码也可以用PrecisionDebugger.start()和PrecisionDebugger.stop() - debugger.start() - - # 需要dump的代码片段1 - - debugger.stop() - debugger.start() - - # 需要dump的代码片段1 - - debugger.stop() - debugger.step() - ``` - -- 示例2:开启溢出检测dump - - ```python - from ptdbg_ascend import * - debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="overflow_check", step=[0]) - # 请勿将以上初始化流程插入到循环代码中 - - # 模型初始化 - # 下面代码也可以用PrecisionDebugger.start()和PrecisionDebugger.stop() - debugger.start() - - # 需要dump的代码片段1 - - debugger.stop() - debugger.start() - - # 需要dump的代码片段1 - - debugger.stop() - debugger.step() - ``` - -## CPU或GPU及NPU精度数据dump - -### 总体说明 - -- 本节主要介绍CPU或GPU及NPU精度数据dump所需要的函数以及示例。 - -- ptdbg_ascend工具默认情况下仅dump PyTorch模型的API输入输出数据进行精度比对,若在比对结果中发现某个API下可能存在ACL的精度问题,那么可以选择dump该API的ACL级别数据进行精度分析。 - -- 某些torch api的输出不是Tensor类型的数据。对于此类API的反向过程进行ACL dump,工具会在运行日志中给出对应的Warning(is not of tensor type and cannot be automatically derived)提示。如若想要进行该类API反向ACL dump,可以通过手动构建单API用例的方式进行ACL dump,具体用例可参见“**[反向ACL dump用例说明](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/doc/%E5%8F%8D%E5%90%91ACL%20dump%E7%94%A8%E4%BE%8B%E8%AF%B4%E6%98%8E.md)**”。 - -- 工具性能:dump数据量较小时(小于5G),参考dump速度0.1GB/s;dump数据量较大时,参考dump速度0.2GB/s。 - 推荐环境配置:独占环境,CPU核心数192,固态硬盘(IO速度参考:固态硬盘 > 500MB/s,机械硬盘60 ~ 170MB/s)。 - - 用户环境性能弱于标准约束或非独占使用的比对速度酌情向下浮动。Dump速度的计算方式:Dump数据量/(单个step添加Dump耗时-原始单个step耗时)。 - -### 约束 -- 进行CPU或GPU数据dump时,请安装torch包而非torch_npu包,避免工具无法识别使用场景,导致失败。 - -- TASK_QUEUE_ENABLE环境变量会导致API下发和执行异步进行,因此在ACL dump前需要将TASK_QUEUE_ENABLE关闭,即export TASK_QUEUE_ENABLE=0。 - -- 不建议在PyTorch训练脚本中同时添加dump接口和性能数据采集(如Ascend PyThon Profiler)接口,二者可能相互影响导致数据不准确。 - -### seed_all - -**功能说明** - -固定随机数。通过固定随机数保证模型的输入或输出一致。在训练主函数开始前调用,避免随机数固定不全。 - -dump操作必选。 - -**函数原型** - -```python -seed_all(seed=1234, mode=False) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ------ | ------------------------------------------------------------ | -------- | -| seed | 随机数种子。参数示例:seed=1000。默认值为:1234。 | 否 | -| mode | 确定性计算模式。可配置True或False。参数示例:mode=True。默认为False。
即使在相同的硬件和输入下,API多次执行的结果也可能不同,开启确定性计算是为了保证在相同的硬件和输入下,API多次执行的结果相同。
确定性计算会导致API执行性能降低,建议在发现模型多次执行结果不同的情况下开启。
rnn类算子、ReduceSum、ReduceMean等算子可能与确定性计算存在冲突,若开启确定性计算后多次执行的结果不相同,则考虑存在这些算子。 | 否 | - -**函数示例** - -seed_all函数的随机数种子,取默认值即可,无须配置;第二个参数默认关闭,不开启确定性计算时也无须配置。 - -- 示例1:仅固定随机数,不开启确定性计算 - - ```python - seed_all() - ``` - -- 示例2:固定随机数,开启确定性计算 - - ```python - seed_all(mode=True) - ``` - -**固定随机数范围** - -seed_all函数可固定随机数的范围如下表。 - -| API | 固定随机数 | -| ---------------------------------------- | --------------------------- | -| os.environ['PYTHONHASHSEED'] = str(seed) | 禁止Python中的hash随机化 | -| random.seed(seed) | 设置random随机生成器的种子 | -| np.random.seed(seed) | 设置numpy中随机生成器的种子 | -| torch.manual_seed(seed) | 设置当前CPU的随机种子 | -| torch.cuda.manual_seed(seed) | 设置当前GPU的随机种子 | -| torch.cuda.manual_seed_all(seed) | 设置所有GPU的随机种子 | -| torch_npu.npu.manual_seed(seed) | 设置当前NPU的随机种子 | -| torch_npu.npu.manual_seed_all(seed) | 设置所有NPU的随机种子 | -| torch.backends.cudnn.enable=False | 关闭cuDNN | -| torch.backends.cudnn.benchmark=False | cuDNN确定性地选择算法 | -| torch.backends.cudnn.deterministic=True | cuDNN仅使用确定性的卷积算法 | - -需要保证CPU或GPU以及NPU的模型输入完全一致,dump数据的比对才有意义,seed_all并不能保证模型输入完全一致,如下表所示场景需要保证输入的一致性。 - -| 场景 | 固定方法 | -| --------------- | ------------- | -| 数据集的shuffle | 关闭shuffle。 | -| dropout | 关闭dropout。 | - -关闭shuffle示例: - -```python -train_loader = torch.utils.data.DataLoader( - train_dataset, - batch_size = batch_size, - shuffle = False, - num_workers = num_workers -) -``` - -关闭dropout: - -在使用from ptdbg import *后,工具会自动将torch.nn.functional.dropout、torch.nn.functional.dropout2d、torch.nn.functional.dropout3d、torch.nn.Dropout、torch.nn.Dropout2d、torch.nn.Dropout3d的接口参数p置为0。 - -### set_dump_path - -**功能说明** - -设置数据保存目录。建议在seed_all函数之后调用且需要保证训练进程能够调用该函数;多卡时须保证每个进程都能调用该函数。 - -**函数原型** - -```python -set_dump_path(fpath=None, dump_tag='ptdbg_dump') -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| -------- | ------------------------------------------------------------ | -------- | -| fpath | 设置数据目录路径。参数示例:'./dump_path'。
默认在dump_path目录下生成`ptdbg_dump_{version}`目录,并在该目录下生成`dump.pkl`文件以及`dump`数据文件保存目录。
当set_dump_switch函数配置了mode参数时,`dump.pkl`文件以及`dump`数据文件保存目录名称添加mode参数值为前缀,详情请参见“**dump数据存盘说明**”。
未配置fpath时,也可以通过环境变量ASCEND_WORK_PATH配置dump路径,此时数据将落盘在${ASCEND_WORK_PATH}/dump_data下,自定义配置dump_path优先级高于环境变量,fpath和环境变量需要二选一。 | 否 | -| dump_tag | 设置数据目录名称。参数示例:dump_tag='dump_conv2d'。默认数据目录命名为ptdbg_dump_{version}。
{version}为当前安装ptdbg_ascend工具版本。目录结构参见“**dump数据存盘说明**”。
配置该参数会将生成的`ptdbg_dump_{version}`目录名称变更为dump_tag配置的值,如`dump_conv2d_{version}`。 | 否 | - -**函数示例** - -- 示例1:设置数据目录路径 - - ```python - set_dump_path('./dump_path') - ``` - -- 示例2:设置数据目录名称 - - ```python - set_dump_path('./dump_path', dump_tag='dump_conv2d') - ``` - - -若以相同的数据目录多次dump,则会因同名导致覆盖;多次dump建议配置不同的dump_tag。 - -### register_hook - -**功能说明** - -注册工具钩子函数。在set_dump_path之后调用。 - -dump操作必选。 - -**函数原型** - -```python -register_hook(model, hook, overflow_nums=overflow_nums, dump_mode=dump_mode, dump_config=dump_config_file) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ------------- | ------------------------------------------------------------ | -------- | -| hook | 注册工具的dump和溢出检测钩子。可取值overflow_check(表示溢出检测)和acc_cmp_dump(表示dump数据),二选一。 | 是 | -| overflow_nums | 控制溢出次数,表示第N次溢出时,停止训练,过程中检测到溢出API对应ACL数据均dump。参数示例:overflow_nums=3。配置overflow_check时可配置,默认不配置,即检测到1次溢出,训练停止,配置为-1时,表示持续检测溢出直到训练结束。 | 否 | -| dump_mode | 控制针对溢出API的dump模式。可取值"api"或"acl",配置acl时表示dump ACL级别的溢出数据,此时set_dump_path参数不生效,dump数据目录由dump_config的.json文件配置,参数示例:dump_mode="acl"。默认不配置,即dump API级别的溢出数据。 | 否 | -| dump_config | acl dump的配置文件。dump_mode="acl"时,该参数必选;dump_mode="api"时,该参数不选。参数示例:dump_config='./dump.json'。 | 否 | - -**函数示例** - -- 示例1:注册工具钩子函数 - - ```python - register_hook(model, acc_cmp_dump) - ``` - -- 示例2:dump指定API的ACL级别数据 - - ```python - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - ``` - - 需要配置set_dump_switch的mode="acl"以及scope指定为前向或反向API,请参见“**set_dump_switch”**的示例。 - - 该场景set_dump_path不生效,由dump_config中的dump.json文件配置dump数据目录。 - -- 示例3:溢出检测dump - - ```python - register_hook(model, overflow_check, overflow_nums=3) - ``` - - dump执行时会在set_dump_path的fpath参数指定的目录下生成ptdbg_dump_{version}目录,保存溢出数据。 - - 多卡场景时,需要检测到至少有一张卡溢出次数达到overflow_nums时,训练结束。 - - 仅支持NPU环境。 - -- 示例4:dump指定API的ACL级别溢出数据 - - ```python - register_hook(model, overflow_check, dump_mode='acl', dump_config='./dump.json') - ``` - - 该场景会在原有数据基础上,额外在dump.json文件配置的dump_path目录下生成一份ACL算子数据,该数据可通过“**ptdbg_ascend.parse**”工具进行解析。 - - 仅支持NPU环境。 - -### set_dump_switch - -**功能说明** - -设置dump范围。建议在register_hook函数之后的脚本内任意位置插入,但进行精度问题排查建议参照“场景化示例 > 单卡场景精度比对”章节的顺序,先从第一个迭代开始的位置调用并dump整网数据。 - -dump操作必选。 - -**函数原型** - -```python -def set_dump_switch(switch, mode="all", scope=[], api_list=[], filter_switch="OFF", dump_mode=["all"], summary_only=False): -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| --------------- | ------------------------------------------------------------ | -------- | -| switch | dump开关。可取值"ON"或"OFF"。须在选定dump开始的位置配置set_dump_switch("ON");dump结束的位置设置set_dump_switch("OFF")。 | 是 | -| mode | dump模式。可取值"all"、"list"、"range"、"stack"、"acl"、"api_list"、"api_stack",各参数含义请参见本节的“**函数示例**”。参数示例:mode="list"。默认为all。该参数配置值将作为dump数据文件名的前缀,详情请参见“**dump数据存盘说明**”。 | 否 | -| scope或api_list | dump范围。根据model配置的模式选择dump的API范围。参数示例:scope=["Tensor_permute_1_forward", "Tensor_transpose_2_forward"]、api_list=["relu"]。默认为空。 | 否 | -| filter_switch | dump bool和整型的tensor以及浮点、bool和整型的标量的过滤开关。可取值"ON"或"OFF"。参数示例:filter_switch="ON"。默认不配置,即filter_switch="OFF",表示dump上述数据。 | 否 | -| dump_mode | dump数据过滤。可取值"all"、"forward"、"backward"、"input"和"output",表示仅保存dump的数据中文件名包含"forward"、"backward"、"input"和"output"的前向、反向、输入或输出的.npy文件。参数示例dump_mode=["backward"]或dump_mode=["forward", "backward"]。默认为all,即保存所有dump的数据。除了all参数只能单独配置外,其他参数可以自由组合。 | 否 | -| summary_only | dump npy文件过滤,可取值True或False,配置为True后仅dump保存API统计信息的pkl文件,参数示例:summary_only=False,默认为False。 | 否 | - -**推荐配置** - -```python -set_dump_switch("ON", mode="api_stack", filter_switch="OFF") -``` - -开启dump数据和堆栈模式,同时为保证数据完整性开启dump bool和整型的tensor以及浮点、bool和整型的标量。 - -**函数示例** - -set_dump_switch可配置多种dump模式,示例如下: - -说明:以下均以dump部分API数据为例,API名可以从首次dump整网数据的结果csv文件中的NPU Name或Bench Name列获取。 - -- 示例1:dump指定API列表 - - ```python - set_dump_switch("ON", mode="list", scope=["Tensor_permute_1_forward", "Tensor_transpose_2_forward", "Torch_relu_3_backward"]) - ``` - -- 示例2:dump指定范围 - - ```python - set_dump_switch("ON", mode="range", scope=["Tensor_abs_1_forward", "Tensor_transpose_3_forward"]) - ``` - -- 示例3:STACK模式,只dump堆栈信息 - - ```python - set_dump_switch("ON", mode="stack", scope=["Tensor_abs_1_forward", "Tensor_transpose_3_forward"]) - ``` - -- 示例4:dump指定前向API的ACL级别数据 - - ```python - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - set_dump_switch("ON", mode="acl", scope=["Tensor_permute_1_forward"]) - ``` - - 需要配置register_hook的dump_mode='acl'和dump_config配置文件。 - -- 示例4:dump指定反向API的ACL级别数据 - - ```python - register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - set_dump_switch("ON", mode="acl", scope=["Functional_conv2d_1_backward"]) - set_backward_input(["./npu_dump/dump_conv2d_v2.0/rank0/dump/Functional_conv2d_1_backward_input.0.npy"]) - ``` - - 需要配置register_hook的dump_mode='acl'和dump_config配置文件,并通过set_backward_input设置反向API输入的.npy文件。 - -- 示例5:dump指定某一类API的API级别输入输出数据 - - ```python - set_dump_switch("ON", mode="api_list", api_list=["relu"]) - ``` - - mode="api_list"时不配置scope。 - -- 示例6:dump全部API级别输入输出数据以及相应堆栈信息 - - ```python - set_dump_switch("ON", mode="api_stack") - ``` - - mode="api_stack"时不配置scope。 - -- 示例7: dump全部API级别输入输出数据并包含bool和整型的tensor以及浮点、bool和整型的标量,配置为OFF,会dump bool和整型数据 - - ```python - set_dump_switch("ON", filter_switch="OFF") - ``` - - 配置filter_switch="OFF"同时也可以配置mode、scope和api_list,除dump ACL级别数据。 - -- 示例8:仅保存dump的数据文件名包含“backward”的反向.npy文件 - - ```python - set_dump_switch("ON", dump_mode=["backward"]) - ``` - -- 示例9:仅dump pkl文件 - - ```python - set_dump_switch("ON", summary_only=True) - ``` - -以上示例均需要在结束dump的位置插入set_dump_switch("OFF")。 - -set_dump_switch配置mode为all或api_stack时,结束dump后,在dump目录下会自动生成compare_data.py比对脚本模板,示例如下: - -```python -from ptdbg_ascend import compare - -pkl_path = "%s" -dump_data_dir = "%s" - -dump_path_param = { - "npu_pkl_path": , - "bench_pkl_path": , - "npu_dump_data_dir": , - "bench_dump_data_dir": , - "is_print_compare_log": True -} - -compare(dump_path_param, output_path="", stack_mode="%s") -``` - -pkl_path和dump_data_dir字段会自动识别pkl和dump目录的路径,用户需要判断当前dump的环境是NPU、CPU或GPU,并将pkl_path和dump_data_dir字段填入下方dump_path_param函数对应的字段中,例如当前设备为NPU,那么填写方式如下: - -```python -from ptdbg_ascend import compare - -pkl_path = "%s" -dump_data_dir = "%s" - -dump_path_param = { - "npu_pkl_path": pkl_path, - "bench_pkl_path": , - "npu_dump_data_dir": dump_data_dir, - "bench_dump_data_dir": , - "is_print_compare_log": True -} - -compare(dump_path_param, output_path="", stack_mode="%s") -``` - -此时,另一侧数据的路径,需要用户另外识别并填入。 - -### set_overflow_check_switch - -**功能说明** - -置溢出检测范围。默认不配置该函数,全量进行溢出检测。 - -仅支持NPU环境。 - -**函数原型** - -```python -set_overflow_check_switch(switch, filter_switch='OFF') -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ------------- | ------------------------------------------------------------ | -------- | -| switch, | 检测开关。可取值"ON"或"OFF"。如果只在特定的step溢出检测,则在期望溢出检测的step位置开始前插入set_overflow_check_switch("ON"),在step结束的位置插入set_overflow_check_switch("OFF")。 | 是 | -| filter_switch | dump bool和整型的tensor以及浮点、bool和整型的标量的过滤开关。可取值"ON"或"OFF"。参数示例:filter_switch="ON"。默认不配置,即filter_switch="OFF",表示dump上述数据。 | 否 | - -**函数示例** - -- 示例1:指定范围溢出检测 - - ```python - register_hook(model, overflow_check) - set_overflow_check_switch("ON") - - ... - - set_overflow_check_switch("OFF") - ``` - - 该场景set_dump_path不生效,dump执行时会在当前目录自动生成ptdbg_dump_{version}目录,保存溢出数据。 - -- 示例2:前向API的ACL级别范围溢出检测 - - ```python - register_hook(model, overflow_check, dump_mode='acl', dump_config='./dump.json') - set_overflow_check_switch("ON") - - ... - - set_overflow_check_switch("OFF") - ``` - - 该场景set_dump_path不生效,由dump_config中的dump.json文件配置溢出数据目录。 - -### set_backward_input - -**功能说明** - -设置反向ACL级别dump时需要的反向输入的.npy文件。 - -**函数原型** - -```python -set_backward_input(backward_input) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| -------------- | ------------------------------------------------------------ | -------- | -| backward_input | 该输入文件为首次运行训练dump得到反向API输入的.npy文件。例如若需要dump Functional_conv2d_1 API的反向过程的输入输出,则需要在dump目录下查找命名包含Functional_conv2d_1、backward和input字段的.npy文件。 | 是 | - -**函数示例** - -```python -register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') -set_dump_switch("ON", mode="acl", scope=["Functional_conv2d_1_backward"]) -set_backward_input(["./npu_dump/dump_conv2d_v2.0/rank0/dump/Functional_conv2d_1_backward_input.0.npy"]) -``` - -## dump.json配置文件说明 - -**dump.json配置示例** - -```python -{ - "dump": - { - "dump_list":[], - "dump_path":"./dump/output", - "dump_mode":"all", - "dump_op_switch":"on" - } -} -``` - -**dump.json参数说明** - -| 字段名 | 说明 | -| -------------- | ------------------------------------------------------------ | -| dump_list | 待dump数据的API模型。为空,无需配置。 | -| dump_path | dump数据文件存储到运行环境的目录,主要用于指定ACL dump数据路径。支持配置绝对路径或相对路径。dump_path须为已存在目录。 | -| dump_mode | dump数据模式,配置如下:
- output:dump API的输出数据。默认值。
- input:dump API的输入数据。
- all:dump API的输入、输出数据。 | -| dump_op_switch | 单API模型dump数据开关,配置如下: * off:关闭单API模型dump,默认值。 * on:开启单API模型dump。 | - -**dump目录说明** - -配置register_hook的dump_config后,采集的dump数据会在{dump_path}/{time}/{deviceid}/{model_id}目录下生成,例如“/home/HwHiAiUser/output/20200808163566/0/0” - -```bash -├── 20230131172437 -│   └── 1 -│   ├── 0 -│   │   ├── Add.Add.45.0.1675157077183551 -│   │   ├── Cast.trans_Cast_0.31.0.1675157077159449 -│   │   ├── Cast.trans_Cast_5.43.0.1675157077180129 -│   │   ├── MatMul.MatMul.39.0.1675157077172961 -│   │   ├── Mul.Mul.29.0.1675157077155731 -│   │   ├── NPUAllocFloatStatus.NPUAllocFloatStatus.24.0.1675157077145262 -│   │   ├── TransData.trans_TransData_1.33.0.1675157077162791 -│   │   └── TransData.trans_TransData_4.41.0.1675157077176648 -│   ├── 1701737061 -│   │   └── Cast.trans_Cast_2.35.0.1675157077166214 -│   ├── 25 -│   │   └── NPUClearFloatStatus.NPUClearFloatStatus.26.0.1675157077150342 -│   └── 68 -│   └── TransData.trans_TransData_3.37.0.1675157077169473 -``` - -## 模块级精度数据dump - -### 总体说明 - -大模型场景下,通常不是简单的利用自动迁移能力实现GPU到NPU的训练脚本迁移,而是会对NPU网络进行一系列针对性的适配,因此,常常会造成迁移后的NPU模型存在部分子结构不能与GPU原始模型完全对应。模型结构不一致导致API调用类型及数量不一致,若直接按照API粒度进行精度数据dump和比对,则无法完全比对所有的API。 - -本节介绍的功能是对模型中的大粒度模块进行数据dump,使其比对时,对于无法以API粒度比对的模块可以直接以模块粒度进行比对。 - -模块指的是继承自nn.Module类模块,通常情况下这类模块就是一个小模型,可以被视为一个整体,dump数据时以模块为粒度进行dump。 - -### module_dump - -**功能说明** - -开启模块级精度数据dump。 - -模块级精度数据dump时必选。 - -**函数原型** - -```python -module_dump(module, module_name) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ----------- | ------------------------------------------------------------ | -------- | -| module | 网络中实例化好的nn.Module类模块的model对象。 | 是 | -| module_name | 用户自定义的该model名称。主要用于dump数据文件的命名,便于在比对时识别模块级数据。 | 是 | - -### module_dump_end - -**功能说明** - -结束模块级精度数据dump。 - -模块级精度数据dump时必选。 - -**函数原型** - -```python -module_dump_end() -``` - -### 示例代码 - -```python -# 根据需要import包 -import os -import torch -import torch.nn as nn -import torch_npu -import torch.nn.functional as F -from ptdbg_ascend import * - -torch.npu.set_device("npu:0") -# 定义一个简单的网络 -class ModuleOP(nn.Module): - def __init__(self) -> None: - super().__init__() - self.linear_1 = nn.Linear(in_features=2, out_features=2) - self.linear_2 = nn.Linear(in_features=2, out_features=1) - def forward(self, x): - x1 = self.linear_1(x) - x2 = self.linear_2(x1) - r1 = F.relu(x2) - return r1 - -if __name__ == "__main__": - module = ModuleOP() - - # 注册工具 - set_dump_path("./dump_data/npu") - set_dump_switch("ON") - register_hook(module, acc_cmp_dump) - - x = torch.randn(2, 2) - - module_dump(module, "MyModule") # 开启模块级精度数据dump - out = module(x) - module_dump_end() # 结束模块级精度数据dump - loss = out.sum() - loss.backward() - set_dump_switch("OFF") -``` - -## dump数据存盘说明 - -dump结果目录结构示例如下: - -```bash -├── dump_path -│ └── ptdbg_dump_{version} -│ ├── rank0 -│ │ ├── dump -| | | ├── Tensor_permute_1_forward.npy -| | | ├── MyModule_0_forward_input.npy # 开启模块级精度数据dump时存在模块级的dump数据文件 -| | | ... -| | | └── Fcuntion_linear_5_backward_output.npy -│ │ └── dump.pkl -│ ├── rank1 -| | ├── dump -| | | └── ... -| | └── dump.pkl -│ ├── ... -│ | -| └── rank7 -``` - -其中ptdbg_dump_{version}为未设置set_dump_path的dump_tag参数时的默认命名;rank为设备上各卡的ID,每张卡上dump的数据会生成对应dump目录。 - -当使用debugger方式dump数据时,配置了PrecisionDebugger模块的step=[]参数,dump结果目录则以step为父目录,例如配置step=[0,1,2]时,dump结果目录为: - -``` -├── dump_path -│ └── step0 -│ | └── ptdbg_dump_{version} -│ | | ├── rank0 -│ | | ├── ... -│ | | ├── rank7 -| ├── step1 -| | | ├── ... -│ └── step2 -``` - -**精度比对dump场景** - -精度比对dump场景的结果如下: - -* dump.pkl文件:包含dump数据的API名称、dtype、 shape以及各数据的max、min、mean统计信息。 - -* dump目录:目录下为npy格式的dump数据。 - - npy文件保存的前缀和PyTorch对应关系如下 - - | 前缀 | Torch模块 | - | ---------- | ------------------- | - | Tensor | torch.Tensor | - | Torch | torch | - | Functional | torch.nn.functional | - | NPU | NPU亲和算子 | - | VF | torch._VF | - -当set_dump_switch或configure_hook配置mode参数(例如:mode="api_stack" )时,dump结果的文件名会添加api_stack前缀,dump结果如下: - -* api_stack_dump.pkl -* api_stack_dump目录 - -**溢出检测dump场景** - -register_hook设置了overflow_check时,检测API溢出,dump结果的文件名格式为:`{api_type}___{api_name}___{API调用次数}_{前向反向}_{当前溢出次数}`,dump结果示例如下: - -* `Tensor___add___1_forward_1.pkl` -* `Tensor___add___1_forward_1`目录 - -## CPU或GPU与NPU精度数据比对 - -### 总体说明 - -- 本节主要介绍CPU或GPU与NPU精度数据比对的函数以及示例。 - -- 比对函数均通过单独创建精度比对脚本执行,可支持单卡和多卡场景的精度数据比对。 - -- 工具性能:比对数据量较小时(参考值单份文件小于10GB),参考比对速度0.1GB/s;比对数据量较大时,参考比对速度0.3GB/s。 - 推荐环境配置:独占环境,CPU核心数192,固态硬盘(IO速度参考:固态硬盘 > 500MB/s,机械硬盘60 ~ 170MB/s)。 - - 用户环境性能弱于标准约束或非独占使用的比对速度酌情向下浮动。比对速度的计算方式:两份比对文件大小/比对耗时。 - -### 约束 - -- NPU自研API,在CPU或GPU若没有对应的API,该API的dump数据不比对。 - -- NPU与CPU或GPU的计算结果误差可能会随着模型的执行不断累积,最终会出现同一个API因为输入的数据差异较大而无法比对的情况。 - -- CPU或GPU与NPU中两个相同的API会因为调用次数不同导致无法比对或比对到错误的API,不影响整体运行,该API忽略。 - -### compare_distributed - -**功能说明** - -将CPU或GPU与NPU的dump文件进行比对,支持单卡和多卡,可同时比对多卡的dump数据。多机场景需要每个设备单独执行比对操作。可自动检索和匹配对应卡和进程所dump的数据文件,再调用compare进行比对。单机单卡时与compare函数二选一。 - -**函数原型** - -```python -compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| -------------- | ------------------------------------------------------------ | -------- | -| npu_dump_dir | 配置NPU环境下的dump目录,即set_dump_path函数的dump_tag参数对应的目录名称。参数示例:'./npu_dump/dump_conv2d_v2.0'。 | 是 | -| bench_dump_dir | 配置CPU、GPU或NPU环境下的dump目录,即set_dump_path函数的dump_tag参数对应的目录名称。参数示例:'./gpu_dump/dump_conv2d_v2.0'。 | 是 | -| output_path | 配置比对结果csv文件存盘目录。需要预先创建output_path目录。参数示例:'./output'。文件名称基于时间戳自动生成,格式为:`compare_result_rank{npu_ID}-rank{cpu/gpu/npu_ID}_{timestamp}.csv`。 | 是 | -| **kwargs | 支持compare的所有可选参数。 | 否 | - -**函数示例** - -创建比对脚本,例如compare_distributed.py,拷贝如下代码,具体参数请根据实际环境修改。 - -```python -from ptdbg_ascend import * -compare_distributed('./npu_dump/ptdbg_dump_v2.0', './gpu_dump/ptdbg_dump_v2.0', './output') -``` - -### compare - -**功能说明** - -将CPU或GPU与NPU的dump文件进行比对,仅支持单机单卡。 - -**函数原型** - -```python -compare(input_param, output_path, stack_mode=False, auto_analyze=True, fuzzy_match=False) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ------------ | ------------------------------------------------------------ | -------- | -| input_param | 配置dump数据文件及目录。配置参数包括:
- "npu_pkl_path":指定NPU dump目录下的.pkl文件。参数示例:"npu_pkl_path": "./npu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump.pkl"。必选。
- "bench_pkl_path":指定CPU、GPU或NPU dump目录下的.pkl文件。参数示例:"bench_pkl_path": "./gpu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump.pkl"。必选。
- "npu_dump_data_dir":"指定NPU dump目录下的dump数据目录。参数示例:"npu_dump_data_dir": "./npu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump"。必选。
- "bench_dump_data_dir":"指定CPU、GPU或NPU dump目录下的dump数据目录。参数示例:"npu_dump_data_dir": "./gpu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump"。必选。
- "is_print_compare_log":配置是否开启日志打屏。可取值True或False。可选。 | 是 | -| output_path | 配置比对结果csv文件存盘目录。参数示例:'./output'。文件名称基于时间戳自动生成,格式为:`compare_result_{timestamp}.csv`。 | 是 | -| stack_mode | 配置stack_mode的开关。仅当dump数据时配置set_dump_switch的mode="api_stack"时需要开启。参数示例:stack_mode=True,默认为False。 | 否 | -| auto_analyze | 自动精度分析,开启后工具自动针对比对结果进行分析,识别到第一个精度不达标节点(在比对结果文件中的“Accuracy Reached or Not”列显示为No),并给出问题可能产生的原因(打屏展示并生成advisor_{timestamp}.txt文件)。可取值True或False,参数示例:auto_analyze=False,默认为True。 | 否 | -| fuzzy_match | 模糊匹配。开启后,对于网络中同一层级且命名仅调用次数不同的API,可匹配并进行比对。可取值True或False,参数示例:fuzzy_match=True,默认为False。 | 否 | - -**函数示例** - -单机单卡场景下创建比对脚本,例如compare.py,拷贝如下代码,具体参数请根据实际环境修改。 - -```python -from ptdbg_ascend import * -dump_result_param={ -"npu_pkl_path": "./npu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump.pkl", -"bench_pkl_path": "./gpu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump.pkl", -"npu_dump_data_dir": "./npu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump", -"bench_dump_data_dir": "./gpu_dump/ptdbg_dump_v2.0/rank0/api_stack_dump", -"is_print_compare_log": True -} -compare(dump_result_param, "./output", stack_mode=True) -``` - -### parse - -**功能说明** - -解析并提取dump信息中的堆栈信息及数据统计信息。 - -**函数原型** - -```python -parse(pkl_file, moudule_name_prefix) -``` - -**参数说明** - -| 参数名 | 说明 | 是否必选 | -| ------------------- | ------------------------------------------------------------ | -------- | -| pkl_file | 指定dump数据文件中的pkl文件名。参数示例:"./npu_dump/ptdbg_dump_v2.0/rank0/dump.pkl"。 | 是 | -| moudule_name_prefix | 指定待提取的API接口前缀。参数示例:"Torch_norm_1_forward"。 | 是 | - -**函数示例** - -创建堆栈信息及数据统计信息提取脚本,例如parse.py,拷贝如下代码,具体参数请根据实际环境修改。 - -```python -from ptdbg_ascend import * -parse("./npu_dump/ptdbg_dump_v2.0/rank0/dump.pkl", "Torch_batch_normal_1_forward") -``` - -### 计算精度评价指标 - -PyTorch精度比对是以CPU或GPU的计算结果为标杆,计算Cosine(余弦相似度)、MaxAbsErr(最大绝对误差)和MaxRelativeErr(最大相对误差),根据这两个结果判断API在运行时是否存在精度问题。 - -计算精度评价指标: - -1. Cosine:通过计算两个向量的余弦值来判断其相似度,数值越接近于1说明计算出的两个张量越相似,实际可接受阈值为大于0.99。在计算中可能会存在nan,主要由于可能会出现其中一个向量为0。 - -2. MaxAbsErr:当最大绝对误差越接近0表示其计算的误差越小,实际可接受阈值为小于0.001。 - -3. MaxRelativeErr:当最大相对误差越接近0表示其计算的误差越小。 - - 当dump数据中存在0或Nan时,比对结果中最大相对误差则出现inf或Nan的情况,属于正常现象。 - -精度比对结果csv文件中只需要通过Accuracy Reached or Not来判断计算精度是否达标,判断标准如下: - -1. Cosine < 0.99 且 MaxAbsError > 0.001时,精度不达标,标记为“No”。 -2. Cosine < 0.9,精度不达标,标记为“No”。 -3. MaxAbsError > 1,精度不达标,标记为“No”。 -4. 其余情况下记为精度达标,标记为“Yes”。 - -## ptdbg_ascend.parse数据解析功能 - -ptdbg_ascend.parse为命令行交互式界面解析工具,提供更多的数据解析功能并且展示结果。 - -主要的使用场景包括: - -- 支持指定ACL层级算子数据比对。 -- 支持指定ACL层级算子数据转换及展示。 -- 支持交互式指定pkl文件中API对应dump数据查看。 -- 支持API进行可选层级比对和打印(统计级和像素级)。 - -安装ptdbg_ascend工具后,可以通过使用命令 **python -m ptdbg_ascend.parse** 进入交互式界面,可在parse的界面中执行Shell命令,以及上述场景的相关解析命令。Ctrl+C可以退出该界面。 - -### ACL层级算子数据比对 - -- 依赖:CANN包中的msaccucmp工具。 - -- 输入以下比对命令进行数据比对。 - - ```bash - vc -m my_dump_path -g golden_dump_path [-out output_path] - ``` - - | 参数名称 | 说明 | 是否必选 | - | -------- | ------------------------------------------------------------ | -------- | - | -m | 待比对dump数据目录。 | 是 | - | -g | dump数据目录。 | 是 | - | -out | 结果输出目录。 | 否 | - | -asc | 指定msaccucmp路径,默认路径为:/usr/local/Ascend/ascend-toolkit/latest/tools/operator_cmp/compare/msaccucmp.py。 | 否 | - - - 输出结果:result_{timestamp}.csv文件。 - - 若指定-out参数需要用户传入输出路径,并且路径需要已存在。 - - 若未指定输出目录, 则比对结束后将结果保存在默认目录 “./parse_data/comapre_result”中,比对结束后会打印log提示输出结果存放路径。 - -**示例** - -```bash -# 传入待比对数据目录以及标杆数据目录 -Parse >>> vc -m ./my_dump_path -g ./golden_data_path -...... -# 比对结果打印 -[INFO] The comparison result have been written to "./parse_data/compare_result/result_20230818104735.csv". -[INFO] The command was completed and took 6 seconds. -[INFO] Compare finished!! -``` - -### ACL算子数据的npy转换 - -- 依赖:CANN包中的msaccucmp工具。 - -- 输入以下转换命令进行数据转换, 将ACL级别dump数据转为npy文件。 - - ```bash - dc -n file_name/file_path [-f format] [-out output_path] - ``` - - | 参数名称 | 说明 | 是否必选 | - | -------- | ------------------------------------------------------------ | -------- | - | -n | 需转换的dump数据文件或dump数据文件目录。 | 是 | - | -f | 开启format转换,指定该参数时需要配置format格式,若未指定该参数,则直接转换为npy格式。 | 否 | - | -out | 结果输出目录。 | 否 | - | -asc | 指定msaccucmp路径,默认路径为:/usr/local/Ascend/ascend-toolkit/latest/tools/operator_cmp/compare/msaccucmp.py | 否 | - - [^]: 若传入单个dump文件,则转换单个文件,若传入dump文件目录则转换目录下所有dump文件。 - - - 输出结果:npy文件。 - - 若指定-out参数需要用户传入输出路径,并且路径需要已存在。 - - 若未指定输出目录, 则比对结束后将结果保存在默认目录 “./parse_data/convert_result”中,比对结束后会打印log提示输出结果存放路径及转换结果。 - -- 输入以下命令,展示npy数据统计信息。 - - ```bash - pt -n file_path - ``` - - | 参数名称 | 说明 | 是否必选 | - | -------- | ------------- | -------- | - | -n | npy文件路径。 | 是 | - - 打印统计信息:shape, dtype, max, min和mean。 - -**示例1** - -```bash -# 传入需转换的dump文件目录 -Parse >>> dc -n ./dump_data/ -...... -# 转换结果 -╭──────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ SrcFile: ./dump_data/ -│ - Add.fp32_vars_add_2fp32_vars_Relu_9.31.5.1636595794731103.input.0.npy │ -│ - Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.output.0.npy │ -│ - Add.fp32_vars_add_2fp32_vars_Relu_9.31.5.1636595794731103.input.1.npy │ -│ - Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.input.1.npy │ -│ - Add.fp32_vars_add_3fp32_vars_Relu_12.40.5.1636595794846124.input.1.npy │ -│ - Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.input.0.npy │ -│ - Add.fp32_vars_add_3fp32_vars_Relu_12.40.5.1636595794846124.input.0.npy │ -│ - Add.fp32_vars_add_2fp32_vars_Relu_9.31.5.1636595794731103.output.0.npy │ -│ - Add.fp32_vars_add_3fp32_vars_Relu_12.40.5.1636595794846124.output.0.npy │ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────╯ -``` - -**示例2** - -```bash -# 查看某个dump数据块的数据信息 -# 默认会将数据中的tensor保存成 txt -Parse >>> pt -n ./parse_data/dump_convert/Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.output.0.npy -...... -# 打印统计信息 -[Shape: (1, 16, 56, 56, 16)] [Dtype: float16] [Max: 452.0] [Min: -408.5] [Mean: -3.809] -Path: ./parse_data/dump_convert/Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.input.0.npy -TextFile:./parse_data/dump_convert/Add.fp32_vars_add_1fp32_vars_Relu_6.24.5.1636595794631347.input.0.npy.txt -``` - -### pkl文件中指定API的dump数据信息查看 - -- 输入以下命令,解析并输出pkl文件中指定api的统计信息。 - - ```bash - pk -f pkl_path -n api_name - ``` - - | 参数名称 | 说明 | 是否必选 | - | -------- | ----------------- | -------- | - | -f | 指定pkl文件路径。 | 是 | - | -n | 指定API名称。 | 是 | - - - 输出结果:打印统计信息(shape, dtype, max和min mean)。 - - 若pkl文件中存在相应的堆栈信息,则会打印堆栈信息。 - -**示例** - -```bash -# 传入pkl文件及api名称 -Parse >>> pk -f ./torch_dump/ptdbg_v3.2/rank0/api_stack_dump.pkl -n Functional_conv2d_0_forward -...... -# 打印统计信息及堆栈(pkl文件不包含堆栈则不会打印堆栈) - -Statistic Info: - [Functional_conv2d_0_forward_input.0][dtype: torch.float32][shape: [2, 1, 2, 2]][max: 1.576936960220337][min: -0.9757485389709473][mean: 0.4961632490158081] - [Functional_conv2d_0_forward_input.1][dtype: torch.float32][shape: [2, 1, 2, 2]][max: 0.20064473152160645][min: -0.47102075815200806][mean: -0.20796933770179749] - [Functional_conv2d_0_forward_input.2][dtype: torch.float32][shape: [2]][max: 0.17380613088607788][min: -0.16853803396224976][mean: 0.0026340484619140625] - [Functional_conv2d_0_forward_output][dtype: torch.float32][shape: [2, 2, 1, 1]][max: 0.02364911139011383][min: -1.762906551361084][mean: -0.6710853576660156] -``` - -### API可选层级比对 - -- 输入以下命令, 进行统计级和像素级比对。 - - ```bash - cn -m my_data*.npy -g gloden*.npy [-p num] [-al atol] [-rl rtol] - ``` - - - 统计级比对:对tensor整体进行余弦值及相对误差的计算。 - - 像素级比对:对输入的两个npy文件进行逐元素比对。若两个tensor对应元素的相对误差或绝对误差大于**误差阈值**(-al和-rl配置)则被标记为错误数据。 - - | 参数名称 | 说明 | 是否必选 | - | -------- | ----------------------------------------------- | -------- | - | -m | 待比对数据。 | 是 | - | -g | 标杆数据。 | 是 | - | -p | 设置比对结束后打印错误元素的个数,默认值20。 | 否 | - | -al | 判定数据存在精度问题的绝对误差阈值,默认0.001。 | 否 | - | -rl | 判定数据存在精度问题的相对误差阈值,默认0.001。 | 否 | - | -s | 将npy文件保存成txt文件,用于查看,默认开启。 | 否 | - - 输出结果: - - - 统计级比对结果。 - - 两个文件的统计信息(shape, dtype, max, min和mean)。 - - 错误数据打印表格。 - -**示例** - -```bash -# 对比两个tensor的数据 -Parse >>> cn -m Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy -g InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy -p 10 -s -al 0.002 -rl 0.005 - Error Item Table Top Item Table -┏━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ ┏━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ -┃ Index ┃ Left ┃ Right ┃ Diff ┃ ┃ Index ┃ Left ┃ Right ┃ Diff ┃ -┡━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ ┡━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ -│ 155 │ 0.024600908 │ 0.022271132 │ 0.002329776 │ │ 0 │ -0.9206961 │ -0.9222216 │ 0.0015255213 │ -│ 247 │ 0.015752593 │ 0.017937578 │ 0.0021849852 │ │ 1 │ -0.6416973 │ -0.64051837 │ 0.0011789203 │ -│ 282 │ -0.0101207765 │ -0.007852031 │ 0.0022687456 │ │ 2 │ -0.35383835 │ -0.35433492 │ 0.0004965663 │ -│ 292 │ 0.019581757 │ 0.02240482 │ 0.0028230622 │ │ 3 │ -0.18851271 │ -0.18883198 │ 0.00031927228 │ -│ 640 │ -0.06593232 │ -0.06874806 │ 0.0028157383 │ │ 4 │ -0.43508735 │ -0.43534422 │ 0.00025686622 │ -│ 1420 │ 0.09293677 │ 0.09586689 │ 0.0029301196 │ │ 5 │ 1.4447614 │ 1.4466647 │ 0.0019032955 │ -│ 1462 │ -0.085207745 │ -0.088047795 │ 0.0028400496 │ │ 6 │ -0.3455438 │ -0.3444429 │ 0.0011008978 │ -│ 1891 │ -0.03433288 │ -0.036525503 │ 0.002192624 │ │ 7 │ -0.6560242 │ -0.6564579 │ 0.0004336834 │ -│ 2033 │ 0.06828873 │ 0.07139922 │ 0.0031104907 │ │ 8 │ -2.6964858 │ -2.6975214 │ 0.0010356903 │ -│ 2246 │ -0.06376442 │ -0.06121233 │ 0.002552092 │ │ 9 │ -0.73746175 │ -0.73650354 │ 0.00095820427 │ -└───────┴───────────────┴──────────────┴──────────────┘ └───────┴─────────────┴─────────────┴───────────────┘ -╭───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ Left: | -│ |- NpyFile: ./dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy | -│ |- TxtFile: ./dump/temp/decode/Add.InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.323.1619494134703053.output.0.npy.txt | -│ |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.846897] [Min: -8.368301] [Mean: -0.72565556] | -│ DstFile: │ -│ |- NpyFile: ./dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy | -│ |- TxtFile: ./dump/cpu/InceptionV3_InceptionV3_Mixed_7a_Branch_0_add_3.0.1619492699305998.npy.txt | -│ |- NpySpec: [Shape: (32, 8, 8, 320)] [Dtype: float32] [Max: 5.8425903] [Min: -8.374472] [Mean: -0.7256237] │ -│ NumCnt: 655360 │ -│ AllClose: False │ -│ CosSim: 0.99999493 │ -│ ErrorPer: 0.023504638671875 (rl= 0.005, al= 0.002) │ -╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -``` - -## FAQ - -[FAQ](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/doc/FAQ.md) diff --git "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.T3.md" "b/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.md" similarity index 92% rename from "debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.T3.md" rename to "debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.md" index 04a221afee9db88d031214c64ec54c94c951ac75..23ee300d25593173f67910440ecfe9da854af05b 100644 --- "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.T3.md" +++ "b/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v4.0.md" @@ -1,6 +1,6 @@ # **PyTorch精度工具使用指南** -本文主要介绍PyTorch精度工具精度工具ptdbg_ascend的使用以及精度比对场景示例。 +本文主要介绍PyTorch精度工具ptdbg_ascend的使用以及精度比对场景示例。 ptdbg_ascend工具的原理及安装请参见《[PyTorch精度工具](https://gitee.com/ascend/att/blob/master/debug/accuracy_tools/ptdbg_ascend/README.md)》。 @@ -55,23 +55,23 @@ PyTorch训练场景的精度问题分析建议参考以下思路进行精度比 ```python from ptdbg_ascend import * - + # 在main函数开始前固定随机数 seed_all() - + # 配置dump数据目录路径和名称 set_dump_path("./npu_dump", dump_tag='all') - + # 注册dump回调函数 register_hook(model, acc_cmp_dump) - + ... - + # 在第一个迭代开始的位置开启dump和堆栈模式,同时为保证数据完整性开启dump bool和整型的tensor以及浮点、bool和整型的标量 set_dump_switch("ON", mode="api_stack", filter_switch="OFF") - + ... - + # 在第一个迭代结束的位置关闭dump set_dump_switch("OFF") ``` @@ -91,7 +91,7 @@ PyTorch训练场景的精度问题分析建议参考以下思路进行精度比 "bench_dump_data_dir": "./gpu_dump/all_v2.0/rank0/api_stack_dump", "is_print_compare_log": True } - compare(dump_result_param, "./output") + compare(dump_result_param, "./output", stack_mode=True) ``` 执行比对: @@ -118,7 +118,7 @@ PyTorch训练场景的精度问题分析建议参考以下思路进行精度比 ```python from ptdbg_ascend import * - + # 提取dump信息中第1次调用的API:Torch_batch_normal的堆栈信息及数据统计信息 parse("./npu_dump/all_v2.0/rank0/api_stack_dump.pkl", "Torch_batch_normal_1_forward") ``` @@ -131,23 +131,23 @@ PyTorch训练场景的精度问题分析建议参考以下思路进行精度比 -5. (可选)指定API dump数据。 +5. (可选)指定API对其底层ACL数据进行dump。 - dump指定前向API的ACL级别数据 ```python from ptdbg_ascend import * - + # 固定随机数,开启确定性计算 seed_all(mode=True) set_dump_path("./dump_path", dump_tag='forward') register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - + # dump指定前向API的ACL级别数据、bool和整型的tensor以及浮点、bool和整型的标量 set_dump_switch("ON", mode="acl", scope=["Tensor_permute_1_forward"], filter_switch="OFF") - + ... - + set_dump_switch("OFF") ``` @@ -155,18 +155,18 @@ PyTorch训练场景的精度问题分析建议参考以下思路进行精度比 ```python from ptdbg_ascend import * - + # 固定随机数,开启确定性计算 seed_all(mode=True) set_dump_path("./dump_path", dump_tag='backward') register_hook(model, acc_cmp_dump, dump_mode='acl', dump_config='./dump.json') - + # dump指定反向API的ACL级别数据、bool和整型的tensor以及浮点、bool和整型的标量 set_dump_switch("ON", mode="acl", scope=["Functional_conv2d_1_backward"], filter_switch="OFF") set_backward_input(["./npu_dump/all_v2.0/rank0/api_stack_dump/Functional_conv2d_1_backward_input.0.npy"]) - + ... - + set_dump_switch("OFF") ``` @@ -296,12 +296,6 @@ register_hook需要在set_dump_path之后调用,也需要在每个进程上被 通信类API数据可以使用全量dump方式获取,若只dump通信类API数据,可以使用如下示例: -```python -debugger.configure_hook(mode="api_list", api_list=["distributed"]) -``` - -或 - ```python set_dump_switch("ON", mode="api_list", api_list=["distributed"]) ``` @@ -460,7 +454,7 @@ PrecisionDebugger(dump_path=None, hook_name=None, rank=None, step=[], enable_dat | rank | 指定对某张卡上的数据进行dump或溢出检测,默认未配置(表示dump所有卡的数据),须根据实际卡的Rank ID配置。应配置为大于0的正整数,且须根据实际卡的Rank ID配置,若所配置的值大于实际训练所运行的卡的Rank ID,则dump数据为空,比如当前环境Rank ID为0~7,实际训练运行0~3卡,此时若配置Rank ID为4或不存在的10等其他值,此时dump数据为空。 | 否 | | step | 指定dump某个step的数据,默认未配置,须指定为训练脚本中存在的step。step为list格式,可配置逐个step,例如:step=[0,1,2];也可以配置step范围,例如:step=list(range(0,9)),表示dump第0到第8个step。 | 否 | | enable_dataloader | 自动控制开关,可取值True(开启)或False(关闭),默认为False。配置为True后自动识别dump step参数指定的迭代,并在该迭代执行完成后退出训练,此时start和stop函数可不配置,开启该开关要求训练脚本是通过torch.utils.data.dataloader方式加载数据;配置为False则需要配置start和stop函数,并在最后一个stop函数后或一个step结束的位置添加debugger.step()。 | 否 | -| model | 开启model模式,传入网络模型实例化的对象,配置该参数后,dump操作仅dump网络中init方法里调用的方法(nn.model类),不会对所有API进行dump。参数示例: model=net,net为网络模型实例化的对象名称。默认未配置。
配置该参数时,PrecisionDebugger模块请在模型实例化之后调用。
该模式不支持“溢出检测”和“模块级精度数据dump”。 | 否 | +| model | 开启model模式,传入网络模型实例化的对象,配置该参数后,dump操作仅dump网络中init方法里调用的方法(nn.Module类),不会对所有API进行dump。参数示例: model=net,net为网络模型实例化的对象名称。默认未配置。
配置该参数时,PrecisionDebugger模块请在模型实例化之后调用。
该模式不支持“溢出检测”和“模块级精度数据dump”。 | 否 | ### configure_hook函数(可选) @@ -507,72 +501,83 @@ configure_hook可配置多种dump模式,示例如下: - 示例1:dump指定API列表 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) debugger.configure_hook(mode="list", scope=["Tensor_permute_1_forward", "Tensor_transpose_2_forward", "Torch_relu_3_backward"]) ``` - 示例2:dump指定范围 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) debugger.configure_hook(mode="range", scope=["Tensor_abs_1_forward", "Tensor_transpose_3_forward"]) ``` - 示例3:STACK模式,只dump堆栈信息 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) debugger.configure_hook(mode="stack", scope=["Tensor_abs_1_forward", "Tensor_transpose_3_forward"]) ``` - 示例4:dump指定前向API的ACL级别数据 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) debugger.configure_hook(mode="acl", scope=["Tensor_permute_1_forward"], acl_config="./dump.json") ``` -- 示例4:dump指定反向API的ACL级别数据 +- 示例5:dump指定反向API的ACL级别数据 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) debugger.configure_hook(mode="acl", scope=["Functional_conv2d_1_backward"], acl_config="./dump.json", backward_input=["./npu_dump/dump_conv2d_v2.0/rank0/dump/Functional_conv2d_1_backward_input.0.npy"]) ``` -- 示例5:dump指定某一类API的API级别输入输出数据 +- 示例6:dump指定某一类API的API级别输入输出数据 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) debugger.configure_hook(mode="api_list", api_list=["relu"]) ``` mode="api_list"时不配置scope。 -- 示例6:dump全部API级别输入输出数据以及相应堆栈信息 +- 示例7:dump全部API级别输入输出数据以及相应堆栈信息 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) debugger.configure_hook(mode="api_stack") ``` mode="api_stack"时不配置scope。 -- 示例7: dump全部API级别输入输出数据并包含bool和整型的tensor以及浮点、bool和整型的标量,配置为OFF,会dump bool和整型数据 +- 示例8: dump全部API级别输入输出数据并包含bool和整型的tensor以及浮点、bool和整型的标量,配置为OFF,会dump bool和整型数据 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) debugger.configure_hook(filter_switch="OFF") ``` 配置filter_switch="OFF"同时也可以配置mode、scope和api_list,除dump ACL级别数据。 -- 示例8:仅保存dump的数据文件名包含“backward”的反向.npy文件 +- 示例9:仅保存dump的数据文件名包含“backward”的反向.npy文件 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) debugger.configure_hook(input_output_mode=["backward"]) ``` -- 示例9:仅dump pkl文件 +- 示例10:仅dump pkl文件 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="dump", step=[0]) debugger.configure_hook(summary_only=True) ``` -- 示例10:溢出检测dump +- 示例11:溢出检测dump ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="overflow_check", step=[0]) debugger.configure_hook(overflow_nums=1) ``` @@ -585,11 +590,12 @@ configure_hook可配置多种dump模式,示例如下: - 示例11:dump溢出API的ACL级别数据 ```python + debugger = PrecisionDebugger(dump_path="./dump_path", hook_name="overflow_check", step=[0]) debugger.configure_hook(mode="acl", acl_config="./dump.json") ``` - + 该场景会在原有数据基础上,额外在dump.json文件配置的dump_path目录下生成一份ACL算子数据,该数据可通过“**ptdbg_ascend.parse**”工具进行解析。 - + 仅支持NPU环境。 ### start函数(可选) @@ -626,6 +632,8 @@ debugger.stop() ### 示例代码(自动模式) +需要保证用户训练代码是通过torch.utils.data.dataloader方式加载数据。 + - 示例1:开启dump ```python @@ -690,11 +698,11 @@ debugger.stop() debugger.step() ``` -## CPU或GPU及NPU精度数据dump +## register_hook方式dump和溢出检测 ### 总体说明 -- 本节主要介绍CPU或GPU及NPU精度数据dump所需要的函数以及示例。 +- 本节主要介绍CPU或GPU及NPU精度数据dump和溢出检测所需要的函数以及示例。 - ptdbg_ascend工具默认情况下仅dump PyTorch模型的API输入输出数据进行精度比对,若在比对结果中发现某个API下可能存在ACL的精度问题,那么可以选择dump该API的ACL级别数据进行精度分析。 @@ -718,7 +726,7 @@ debugger.stop() 固定随机数。通过固定随机数保证模型的输入或输出一致。在训练主函数开始前调用,避免随机数固定不全。 -dump操作必选。 +使用form ptdbg import *后自动导入该函数,代码无需再次添加,若需要修改随机数种子和确定性计算模式,则需要通过添加该函数修改。 **函数原型** @@ -846,8 +854,8 @@ register_hook(model, hook, overflow_nums=overflow_nums, dump_mode=dump_mode, dum | model | 传入网络模型实例化的对象。参数示例: model=net,net为网络模型实例化的对象名称。 | 是 | | hook | 注册工具的dump和溢出检测钩子。可取值overflow_check(表示溢出检测)和acc_cmp_dump(表示dump数据),二选一。 | 是 | | overflow_nums | 控制溢出次数,表示第N次溢出时,停止训练,过程中检测到溢出API对应ACL数据均dump。参数示例:overflow_nums=3。配置overflow_check时可配置,默认不配置,即检测到1次溢出,训练停止,配置为-1时,表示持续检测溢出直到训练结束。 | 否 | -| dump_mode | 控制针对溢出API的dump模式,可取值"model"、"acl"或"api"。配置为"model"时,表示开启model模式,dump操作仅dump网络中init方法里调用的方法(nn.model类),不会对所有API进行dump,不支持“溢出检测”和“模块级精度数据dump”;配置acl时,表示dump ACL级别的溢出数据,此时set_dump_path参数不生效,dump数据目录由dump_config的.json文件配置。参数示例:dump_mode="acl"。默认不配置,即dump API级别的溢出数据。 | 否 | -| dump_config | acl dump的配置文件。dump_mode="acl"时,该参数必选;dump_mode="api"时,该参数不选。参数示例:dump_config='./dump.json'。 | 否 | +| dump_mode | 控制针对溢出API的dump模式,可取值"model"、"acl"或"api"。配置为"model"时,表示开启model模式,dump操作仅dump网络中init方法里调用的方法(nn.Module类),不会对所有API进行dump,不支持“溢出检测”和“模块级精度数据dump”;配置acl时,表示dump ACL级别的溢出数据,此时set_dump_path参数不生效,dump数据目录由dump_config的.json文件配置。参数示例:dump_mode="acl"。默认不配置,即dump API级别的溢出数据。 | 否 | +| dump_config | acl dump的配置文件。dump_mode="acl"时,该参数必选;dump_mode="api"或"model"时,该参数不选。参数示例:dump_config='./dump.json'。 | 否 | **函数示例** @@ -1228,8 +1236,8 @@ torch.npu.set_device("npu:0") class ModuleOP(nn.Module): def __init__(self) -> None: super().__init__() - self.linear_1 = nn.Linear(in_features=2, out_features=2) - self.linear_2 = nn.Linear(in_features=2, out_features=1) + self.linear_1 = nn.Linear(in_features=8, out_features=4) + self.linear_2 = nn.Linear(in_features=4, out_features=2) def forward(self, x): x1 = self.linear_1(x) x2 = self.linear_2(x1) @@ -1240,18 +1248,16 @@ if __name__ == "__main__": module = ModuleOP() # 注册工具 - set_dump_path("./dump_data/npu") - set_dump_switch("ON") - register_hook(module, acc_cmp_dump) - - x = torch.randn(2, 2) + pdbg = PrecisionDebugger("./dump_data/npu", hook_name="dump") + pdbg.start() - module_dump(module, "MyModule") # 开启模块级精度数据dump + x = torch.randn(10, 8) + module_dump(module, "MyModuleOP") # 开启模块级精度数据dump out = module(x) module_dump_end() # 结束模块级精度数据dump loss = out.sum() loss.backward() - set_dump_switch("OFF") + pdbg.stop() ``` ## dump数据存盘说明 @@ -1261,66 +1267,60 @@ dump结果目录结构示例如下: ```bash ├── dump_path │ └── ptdbg_dump_{version} -│ ├── rank0 -│ │ ├── dump -| | | ├── Tensor_permute_1_forward.npy -| | | ├── MyModule_0_forward_input.npy # 开启模块级精度数据dump时存在模块级的dump数据文件 -| | | ... -| | | └── Fcuntion_linear_5_backward_output.npy -│ │ └── dump.pkl -│ ├── rank1 -| | ├── dump -| | | └── ... -| | └── dump.pkl -│ ├── ... -│ | -| └── rank7 +│ ├── step0 +│ | ├── rank0 +│ | │ ├── dump +| | | | ├── Tensor_permute_1_forward.npy +| | | | ├── MyModule_0_forward_input.npy # 开启模块级精度数据dump时存在模块级的dump数据文件 +| | | | ... +| | | | └── Fcuntion_linear_5_backward_output.npy +│ | │ └── dump.pkl +│ | ├── rank1 +| | | ├── dump +| | | | └── ... +| | | └── dump.pkl +│ | ├── ... +│ | | +| | └── rank7 +│ ├── step1 +│ | ├── ... +│ ├── step2 ``` dump过程中,npy文件在对应算子或者模块被执行后就会落盘,而pkl文件则需要在正常执行PrecisionDebugger.stop()或set_dump_switch("OFF")后才会被落盘保存,异常的程序终止会保存终止前被执行算子的相关npy文件,但是不会生成pkl文件。 -其中ptdbg_dump_{version}为未设置set_dump_path的dump_tag参数时的默认命名;rank为设备上各卡的ID,每张卡上dump的数据会生成对应dump目录。 - -当使用debugger方式dump数据时,配置了PrecisionDebugger模块的step=[]参数,dump结果目录则以step为父目录,例如配置step=[0,1,2]时,dump结果目录为: - -``` -├── dump_path -│ └── step0 -│ | └── ptdbg_dump_{version} -│ | | ├── rank0 -│ | | ├── ... -│ | | ├── rank7 -| ├── step1 -| | | ├── ... -│ └── step2 -``` +其中`ptdbg_dump_{version}`为默认命名,debugger方式dump不支持修改该文件夹名称,使用set_dump_path函数则支持通过dump_tag参数修改文件夹名称;rank为设备上各卡的ID,每张卡上dump的数据会生成对应dump目录。 **精度比对dump场景** 精度比对dump场景的结果如下: -* dump.pkl文件:包含dump数据的API名称、dtype、 shape以及各数据的max、min、mean统计信息。 +* dump.pkl文件:包含dump数据的API名称(命名格式为:`{api_type}_{api_name}_{API调用次数}_{前向反向}_{input/output}.{参数序号}`)、dtype、 shape以及各数据的max、min、mean统计信息。 + + 其中,“参数序号”表示该API下的第n个参数,例如1,则为第一个参数,若该参数为list格式,则根据list继续排序,例如1.1,表示该API的第1个参数的第1个子参数。 * dump目录:目录下为npy格式的dump数据。 npy文件保存的前缀和PyTorch对应关系如下 - | 前缀 | Torch模块 | - | ---------- | ------------------- | - | Tensor | torch.Tensor | - | Torch | torch | - | Functional | torch.nn.functional | - | NPU | NPU亲和算子 | - | VF | torch._VF | + | 前缀 | Torch模块 | + | ----------- | ------------------- | + | Tensor | torch.Tensor | + | Torch | torch | + | Functional | torch.nn.functional | + | NPU | NPU亲和算子 | + | VF | torch._VF | + | Aten | torch.ops.aten | + | Distributed | torch.distributed | -当set_dump_switch或configure_hook配置mode参数(例如:mode="api_stack" )时,dump结果的文件名会添加api_stack前缀,dump结果如下: +当configure_hook或set_dump_switch配置mode参数(例如:mode="api_stack" )时,dump结果的文件名会添加api_stack前缀,dump结果如下: * api_stack_dump.pkl * api_stack_dump目录 **溢出检测dump场景** -register_hook设置了overflow_check时,检测API溢出,dump结果的文件名格式为:`{api_type}___{api_name}___{API调用次数}_{前向反向}_{当前溢出次数}`,dump结果示例如下: +PrecisionDebugger模块的hook_name参数或register_hook函数设置了overflow_check时,检测API溢出,dump结果的文件名格式为:`{api_type}___{api_name}___{API调用次数}_{前向反向}_{当前溢出次数}`,dump结果示例如下: * `Tensor___add___1_forward_1.pkl` * `Tensor___add___1_forward_1`目录 @@ -1543,7 +1543,7 @@ Parse >>> vc -m ./my_dump_path -g ./golden_data_path | 参数名称 | 说明 | 是否必选 | | --------- | ------------------------------------------------------------ | -------- | | -n | 需转换的dump数据文件或dump数据文件目录。 | 是 | - | -f | 开启format转换,指定该参数时需要配置format格式,若未指定该参数,则直接转换为npy格式。 | 否 | + | -f | 开启format转换,指定该参数时需要配置format格式。当前内置的Format转换支持如下类型:
FRACTAL_NZ转换NCHW
FRACTAL_NZ转换成NHWC
FRACTAL_NZ转换ND
HWCN转换FRACTAL_Z
HWCN转换成NCHW
HWCN转换成NHWC
NC1HWC0转换成HWCN
NC1HWC0转换成NCHW
NC1HWC0转换成NHWC
NCHW转换成FRACTAL_Z
NCHW转换成NHWC
NHWC转换成FRACTAL_Z
NHWC转换成HWCN
NHWC转换成NCHW
NDC1HWC0转换成NCDHW | 否 | | -out | 结果输出目录。 | 否 | | -cmp_path | 指定msaccucmp路径,默认路径为:/usr/local/Ascend/ascend-toolkit/latest/tools/operator_cmp/compare/msaccucmp.py | 否 | diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py index d8ead53e7a3a61595e503bc0f9163dfb1f32018e..cf96045a2778473426eaa3d9695e6bf0fb99dd36 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py @@ -68,7 +68,7 @@ class Const: DUMP_RATIO_MAX = 100 SUMMERY_DATA_NUMS = 256 FLOAT_EPSILON = np.finfo(float).eps - SUPPORT_DUMP_MODE = ['api', 'acl', 'model'] + SUPPORT_DUMP_MODE = ['api', 'acl'] ON = 'ON' OFF = 'OFF' BACKWARD = 'backward' @@ -87,7 +87,6 @@ class Const: AUTO = "auto" ONLINE_DUMP_MODE = [ALL, LIST, AUTO, OFF] - API_PATTERN = r"^[A-Za-z0-9]+[_]+([A-Za-z0-9]+[_]*[A-Za-z0-9]+)[_]+[0-9]+[_]+[A-Za-z0-9]+" WRITE_FLAGS = os.O_WRONLY | os.O_CREAT WRITE_MODES = stat.S_IWUSR | stat.S_IRUSR @@ -413,7 +412,7 @@ def is_starts_with(string, prefix_list): def check_stack_mode(pkl_fp): api_prefix = "" - api_pattern = r'\[\"([0-9a-zA-Z_]+_(for|back)ward)_(in|out)put(\.[0-9]+)?' + api_pattern = r'\[\"([0-9a-zA-Z_.]+_(for|back)ward)_(in|out)put(\.[0-9]+)?' is_stack_mode = False for index, line in enumerate(pkl_fp): if index == 0: @@ -501,12 +500,6 @@ def get_dump_data_path(dump_dir): return dump_data_path, file_is_exist -def get_api_name_from_matcher(name): - api_matcher = re.compile(Const.API_PATTERN) - match = api_matcher.match(name) - return match.group(1) if match else "" - - def modify_dump_path(dump_path, mode): if mode == Const.ALL: return dump_path diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/debugger/precision_debugger.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/debugger/precision_debugger.py index e8eb2adb82a95c47e60af2fb4bfbc9e21c9b7b1d..6d846b9c2148bc57f7aaa9c056a3d141ce32f4b7 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/debugger/precision_debugger.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/debugger/precision_debugger.py @@ -1,7 +1,7 @@ import os import torch from ..common.utils import Const, check_switch_valid, generate_compare_script, check_is_npu, print_error_log, \ - CompareException + CompareException, print_warn_log from ..dump.dump import DumpUtil, acc_cmp_dump, write_to_disk, get_pkl_file_path from ..dump.utils import set_dump_path, set_dump_switch_print_info, generate_dump_path_str, \ set_dump_switch_config, set_backward_input @@ -13,31 +13,41 @@ from .debugger_config import DebuggerConfig class PrecisionDebugger: - first_start = True - hook_func = None - config = None - model = None + _instance = None + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super(PrecisionDebugger, cls).__new__(cls) + cls._instance.first_start = True + cls._instance.hook_func = None + cls._instance.config = None + cls._instance.model = None + cls._instance.enable_dataloader = False + return cls._instance def __init__(self, dump_path=None, hook_name=None, rank=None, step=None, enable_dataloader=False, model=None): - if hook_name is None: - err_msg = "You must provide hook_name argument to PrecisionDebugger\ - when config is not provided." - raise Exception(err_msg) - step = step or [] - self.config = DebuggerConfig(dump_path, hook_name, rank, step) - self.configure_hook = self.get_configure_hook(self.config.hook_name) - self.configure_hook() - DumpUtil.target_iter = self.config.step - DumpUtil.target_rank = self.config.rank - set_dump_path(self.config.dump_path) - PrecisionDebugger.hook_func = overflow_check if self.config.hook_name == "overflow_check" else acc_cmp_dump - PrecisionDebugger.model = model - if not isinstance(enable_dataloader, bool): - print_error_log("Params enable_dataloader only support True or False.") - raise CompareException(CompareException.INVALID_PARAM_ERROR) - if enable_dataloader: - DumpUtil.iter_num -= 1 - torch.utils.data.dataloader._BaseDataLoaderIter.__next__ = iter_tracer(torch.utils.data.dataloader._BaseDataLoaderIter.__next__) + if not hasattr(self, 'initialized'): + self.initialized = True + if hook_name is None: + err_msg = "You must provide hook_name argument to PrecisionDebugger\ + when config is not provided." + raise Exception(err_msg) + step = step or [] + self.config = DebuggerConfig(dump_path, hook_name, rank, step) + self.configure_hook = self.get_configure_hook(self.config.hook_name) + self.configure_hook() + DumpUtil.target_iter = self.config.step + DumpUtil.target_rank = self.config.rank + set_dump_path(self.config.dump_path) + self.hook_func = overflow_check if self.config.hook_name == "overflow_check" else acc_cmp_dump + self.model = model + self.enable_dataloader = enable_dataloader + if not isinstance(enable_dataloader, bool): + print_error_log("Params enable_dataloader only support True or False.") + raise CompareException(CompareException.INVALID_PARAM_ERROR) + if enable_dataloader: + DumpUtil.iter_num -= 1 + torch.utils.data.dataloader._BaseDataLoaderIter.__next__ = iter_tracer(torch.utils.data.dataloader._BaseDataLoaderIter.__next__) def get_configure_hook(self, hook_name): hook_dict = {"dump": self.configure_full_dump, "overflow_check": self.configure_overflow_dump} @@ -74,36 +84,53 @@ class PrecisionDebugger: @classmethod def start(cls): - if DumpUtil.iter_num in DumpUtil.target_iter or len(DumpUtil.target_iter) == 0: - if cls.first_start: - register_hook_core(cls.hook_func, cls.model) - cls.first_start = False - DumpUtil.dump_switch = "ON" - OverFlowUtil.overflow_check_switch = "ON" - dump_path_str = generate_dump_path_str() - set_dump_switch_print_info("ON", DumpUtil.dump_switch_mode, dump_path_str) - elif len(DumpUtil.target_iter) != 0: - if DumpUtil.iter_num > max(DumpUtil.target_iter): - PrecisionDebugger.stop() - raise Exception("ptdbg: exit after iteration {}".format(DumpUtil.target_iter)) + instance = cls._instance + if not instance: + raise Exception("No instance of PrecisionDebugger found.") + if instance.enable_dataloader: + print_warn_log("DataLoader is enabled, start() skipped.") else: - cls.stop() + if DumpUtil.iter_num in DumpUtil.target_iter or not DumpUtil.target_iter: + if instance.first_start: + register_hook_core(instance.hook_func, instance.model) + instance.first_start = False + DumpUtil.dump_switch = "ON" + OverFlowUtil.overflow_check_switch = "ON" + dump_path_str = generate_dump_path_str() + set_dump_switch_print_info("ON", DumpUtil.dump_switch_mode, dump_path_str) + elif DumpUtil.target_iter and DumpUtil.iter_num > max(DumpUtil.target_iter): + cls.stop() + raise Exception("ptdbg: exit after iteration {}".format(max(DumpUtil.target_iter))) + else: + cls.stop() @classmethod def stop(cls): - DumpUtil.dump_switch = "OFF" - OverFlowUtil.overflow_check_switch = "OFF" - dump_path_str = generate_dump_path_str() - set_dump_switch_print_info("OFF", DumpUtil.dump_switch_mode, dump_path_str) - write_to_disk() - if check_is_npu() and DumpUtil.dump_switch_mode in [Const.ALL, Const.API_STACK, Const.LIST, Const.RANGE, Const.API_LIST]: - generate_compare_script(DumpUtil.dump_data_dir, get_pkl_file_path(), DumpUtil.dump_switch_mode) + instance = cls._instance + if not instance: + raise Exception("PrecisionDebugger instance is not created.") + if instance.enable_dataloader: + print_warn_log("DataLoader is enabled, stop() skipped.") + else: + DumpUtil.dump_switch = "OFF" + OverFlowUtil.overflow_check_switch = "OFF" + dump_path_str = generate_dump_path_str() + set_dump_switch_print_info("OFF", DumpUtil.dump_switch_mode, dump_path_str) + write_to_disk() + if check_is_npu() and DumpUtil.dump_switch_mode in [Const.ALL, Const.API_STACK, Const.LIST, Const.RANGE, Const.API_LIST]: + generate_compare_script(DumpUtil.dump_data_dir, get_pkl_file_path(), DumpUtil.dump_switch_mode) @classmethod def step(cls): - DumpUtil.dump_init_enable = True - DumpUtil.iter_num += 1 - HOOKModule.module_count = {} + instance = cls._instance + if not instance: + raise Exception("PrecisionDebugger instance is not created.") + if not instance.enable_dataloader: + DumpUtil.iter_num += 1 + DumpUtil.dump_init_enable = True + HOOKModule.module_count = {} + else: + print_warn_log("DataLoader is enabled, step() skipped.") @staticmethod def incr_iter_num_maybe_exit(): diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py index 4d330e26fa760f36e5b40e35145b1d8a67ec4d7d..72da133e8667394abc98dd15046af7c6e4cfa27c 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py @@ -20,7 +20,6 @@ import json import os import threading from pathlib import Path -from collections import defaultdict import numpy as np import torch @@ -40,12 +39,45 @@ from ..common.file_check_util import FileOpen, change_mode, FileCheckConst, chec forward_init_status = False backward_init_status = False -api_list = [] thread_lock = threading.Lock() pkl_name = "" rank = os.getpid() multi_output_apis = ["_sort_", "npu_flash_attention"] -module_count = defaultdict(int) +module_count = {} + + +class APIList(list): + threshold = 1000 + + def __init__(self, *args): + self.dump_count = 0 + self.pkl_mode_changed = False + super().__init__(*args) + + def flush(self): + pkl_path = get_pkl_file_path() + if len(self) == 0 or pkl_path == "": + return + with FileOpen(pkl_path, 'a') as f: + try: + f.write('\n'.join(json.dumps(item) for item in self)) + f.write('\n') + except IOError as ex: + raise Exception("write to disk failed") from ex + self.dump_count += 1 + print_info_log(f"write {len(self)} items to {pkl_path} the {self.dump_count} time") + if not self.pkl_mode_changed: + change_mode(pkl_path, FileCheckConst.DATA_FILE_AUTHORITY) + self.pkl_mode_changed = True + self.clear() + + def append(self, data): + list.append(self, data) + if len(self) >= APIList.threshold: + self.flush() + + +api_list = APIList() class DataInfo(object): @@ -195,12 +227,8 @@ def rename_(): global rank global pkl_name if rank is not None and pkl_name is not None: - if DumpUtil.target_iter: - dir_name = os.path.join(DumpUtil.dump_root, "step{}".format(DumpUtil.iter_num), "rank{}".format(os.getpid())) - new_name = os.path.join(DumpUtil.dump_root, "step{}".format(DumpUtil.iter_num), "rank{}".format(rank)) - else: - dir_name = os.path.join(DumpUtil.dump_root, "rank{}".format(os.getpid())) - new_name = os.path.join(DumpUtil.dump_root, "rank{}".format(rank)) + dir_name = os.path.join(DumpUtil.dump_root, "step{}".format(DumpUtil.iter_num), "rank{}".format(os.getpid())) + new_name = os.path.join(DumpUtil.dump_root, "step{}".format(DumpUtil.iter_num), "rank{}".format(rank)) if not os.path.exists(new_name) and os.path.exists(dir_name): _, file_name = os.path.split(pkl_name) os.rename(dir_name, new_name) @@ -218,11 +246,10 @@ def dump_acc_cmp(name, in_feat, out_feat, dump_step, module): dump_file = modify_dump_path(dump_file, DumpUtil.dump_switch_mode) global rank dump_dir, dump_filename = os.path.split(dump_file) - if DumpUtil.target_iter: - dump_dir = os.path.join(dump_dir, "step{}".format(DumpUtil.iter_num)) - if not os.path.exists(dump_dir): - Path(dump_dir).mkdir(mode=FileCheckConst.DATA_DIR_AUTHORITY, exist_ok=True) - dump_file = os.path.join(dump_dir, dump_filename) + dump_dir = os.path.join(dump_dir, "step{}".format(DumpUtil.iter_num)) + if not os.path.exists(dump_dir): + Path(dump_dir).mkdir(mode=FileCheckConst.DATA_DIR_AUTHORITY, exist_ok=True) + dump_file = os.path.join(dump_dir, dump_filename) rank_this = get_tensor_rank(in_feat, out_feat) DumpUtil.dump_root = os.path.dirname(DumpUtil.dump_path) if rank_this is not None and rank != rank_this: @@ -233,10 +260,7 @@ def dump_acc_cmp(name, in_feat, out_feat, dump_step, module): npy_dir = dump_filename[:-4] else: npy_dir = dump_filename - if DumpUtil.target_iter: - DumpUtil.dump_data_dir = os.path.join(DumpUtil.dump_root, "step{}".format(DumpUtil.iter_num), "rank{}".format(rank), npy_dir) - else: - DumpUtil.dump_data_dir = os.path.join(DumpUtil.dump_root, "rank{}".format(rank), npy_dir) + DumpUtil.dump_data_dir = os.path.join(DumpUtil.dump_root, "step{}".format(DumpUtil.iter_num), "rank{}".format(rank), npy_dir) if DumpUtil.target_rank is not None: if rank != DumpUtil.target_rank: return @@ -342,23 +366,35 @@ def dump_mode_backward_acl_dump(module, module_name, grad_path): print_info_log("Dump %s op file." % module_name) +def module_count_func(name, name_template): + module_name = name.split("_")[-3] + if Const.FORWARD in name_template: + if module_name not in module_count: + module_count[module_name] = [0, [0]] + else: + if module_count[module_name][-1] and \ + module_count[module_name][0] != module_count[module_name][-1][-1]: + module_count[module_name][-1].pop() + module_count[module_name][0] += 1 + module_count[module_name][-1].append(module_count[module_name][0]) + index = module_count[module_name][0] + else: + index = module_count[module_name][-1].pop() + return index + + def acc_cmp_dump(name, **kwargs): dump_step = kwargs.get('dump_step', 1) pid = kwargs.get('pid') + name_template = name if not pid: return RuntimeError("Not get the specified process pid.") def acc_cmp_hook(module, in_feat, out_feat=None): - nonlocal name - if "_{}_" in name: - module_name = name.split("_")[1] - if Const.BACKWARD in name: - index = module_count[module_name] - 1 - module_count[module_name] = index - else: - index = module_count[module_name] - module_count[module_name] = index + 1 - name = name.format(index) + nonlocal name, name_template + if "_{}_" in name_template: + index = module_count_func(name, name_template) + name = name_template.format(index) if pid == os.getpid(): dump_acc_cmp(name, in_feat, out_feat, dump_step, module) if hasattr(module, "input_args"): @@ -370,16 +406,7 @@ def acc_cmp_dump(name, **kwargs): def write_to_disk(): - global api_list - if api_list: - with FileOpen(pkl_name, 'a') as f: - try: - f.write('\n'.join(json.dumps(item) for item in api_list)) - f.write('\n') - except: - raise Exception("write to disk failed") - change_mode(pkl_name, FileCheckConst.DATA_FILE_AUTHORITY) - api_list = [] + api_list.flush() def get_pkl_file_path(): diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py index 5ab2152479801e99268f67027db375e266178e4c..950294a68fe368de18b2925180ef96177cee37fa 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py @@ -8,7 +8,7 @@ import torch.distributed as dist from ..dump import dump from ..common.utils import print_error_log, CompareException, DumpException, Const, get_time, print_info_log, \ - check_mode_valid, get_api_name_from_matcher, check_switch_valid, check_dump_mode_valid, check_summary_only_valid, generate_compare_script, \ + check_mode_valid, check_switch_valid, check_dump_mode_valid, check_summary_only_valid, generate_compare_script, \ check_is_npu, check_file_valid, make_dump_path_if_not_exists, check_path_before_create from ..common.file_check_util import FileChecker, FileCheckConst, check_path_length, check_path_pattern_vaild @@ -159,7 +159,7 @@ def set_dump_path(fpath=None, dump_tag='ptdbg_dump'): def get_tensor_rank(in_feat, out_feat): if dist.is_initialized(): return dist.get_rank() - + def get_tensor_rank_single(x): if isinstance(x, (list, tuple)): if len(x) > 0: diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py index 55b97208a6d4981889074d8d70a095f7d56b1d23..b2517633da9a58429f8d728f3810ff70900a8a28 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py @@ -15,27 +15,29 @@ # limitations under the License. """ - import functools - +import threading import torch import torch.nn as nn import torch.utils.hooks as full_hooks -g_stop_hook = False - class HOOKModule(nn.Module): module_count = {} - + inner_stop_hook = {} + def __init__(self, hook) -> None: super(HOOKModule, self).__init__() self.has_overflow = False self.input_args = tuple() self.input_kwargs = dict() self.prefix = "" + self.current_thread = threading.current_thread().ident + if self.current_thread not in HOOKModule.inner_stop_hook: + HOOKModule.inner_stop_hook[self.current_thread] = False + self.stop_hook = HOOKModule.inner_stop_hook.get(self.current_thread, False) - if not g_stop_hook: + if not self.stop_hook: if hasattr(self, "prefix_op_name_"): self.prefix = self.prefix_op_name_ @@ -45,19 +47,17 @@ class HOOKModule(nn.Module): else: HOOKModule.module_count[self.prefix] += 1 self.prefix = self.prefix + str(HOOKModule.module_count[self.prefix] - 1) + '_' - self.register_forward_hook(hook(self.prefix + "forward")) self.register_backward_hook(hook(self.prefix + "backward")) def __call__(self, *input, **kwargs): changed = False - global g_stop_hook - if not g_stop_hook: - g_stop_hook = True + if not self.stop_hook: + HOOKModule.inner_stop_hook[self.current_thread] = True changed = True result = self._call_func(*input, **kwargs) if changed: - g_stop_hook = False + HOOKModule.inner_stop_hook[self.current_thread] = False return result def _call_func(self, *input, **kwargs): @@ -105,4 +105,4 @@ class HOOKModule(nn.Module): functools.update_wrapper(wrapper, hook) grad_fn.register_hook(wrapper) self._maybe_warn_non_full_backward_hook(input, result, grad_fn) - return result \ No newline at end of file + return result diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/register_hook.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/register_hook.py index 5297ed70e21a3c42e24490bf0a178de2e2c27553..bc6b9c4949bc92abd5dac60b944c2e02a03bdfef 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/register_hook.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/register_hook.py @@ -70,10 +70,7 @@ def register_hook(model, hook, **kwargs): if dump_mode == 'acl': DumpUtil.dump_switch_mode = dump_mode DumpUtil.set_acl_config(dump_config_file) - if dump_mode == 'model': - register_hook_core(hook, model) - else: - register_hook_core(hook) + register_hook_core(hook) def init_overflow_nums(overflow_nums): @@ -127,11 +124,12 @@ def register_hook_core(hook, model=None): if not isinstance(model, torch.nn.Module): print_error_log("The argument model must be an object of torch.nn.Module") raise CompareException(CompareException.INVALID_PARAM_ERROR) - for _, module in model.named_modules(): - if "torch.nn.modules" in str(module.__class__): - prefix = "Module_" + module.__class__.__name__ - module.register_forward_hook(hook(prefix + "_{}_" + "forward")) - module.register_backward_hook(hook(prefix + "_{}_" + "backward")) + for name, module in model.named_modules(): + if module == model: + continue + prefix = name + "_" + module.__class__.__name__ + module.register_forward_hook(hook(prefix + "_{}_" + "forward")) + module.register_backward_hook(hook(prefix + "_{}_" + "backward")) else: api_register.initialize_hook(hook) api_register.api_modularity() diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/support_wrap_ops.yaml index b483092a7cae1c8aed09dfe5dc1a2e1c01bf8408..0f1e2da70a49a5c9ff33e372b5b676b7338cae4c 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/support_wrap_ops.yaml +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/support_wrap_ops.yaml @@ -1074,6 +1074,7 @@ torch_npu: - npu_rotated_iou - npu_conv2d - npu_softmax_cross_entropy_with_logits + - npu_all_gather_base_mm aten: - signbit diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/overflow_check.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/overflow_check.py index 10875743c76161f093c08d959d85b941c48b5115..afa0fea202659619c7df8f4f67e63fd819c48228 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/overflow_check.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/overflow_check.py @@ -16,6 +16,7 @@ from .utils import OverFlowUtil, dump_overflow, check_overflow_npu, clear_overfl from ..dump.utils import DumpUtil, Const, get_tensor_rank, create_dirs_if_not_exist from .info_dump import write_api_info_json, ForwardAPIInfo, BackwardAPIInfo from ..dump import dump +from ..common.file_check_util import FileCheckConst backward_init_status = False api_overflow = [] @@ -91,12 +92,11 @@ def overflow_check(name, **kwargs): return dump_file = DumpUtil.get_dump_path() global rank - if DumpUtil.target_iter: - dump_dir, dump_filename = os.path.split(dump_file) - dump_dir = os.path.join(dump_dir, "step{}".format(DumpUtil.iter_num)) - if not os.path.exists(dump_dir): - Path(dump_dir).mkdir(mode=0o750, exist_ok=True) - dump_file = os.path.join(dump_dir, dump_filename) + dump_dir, dump_filename = os.path.split(dump_file) + dump_dir = os.path.join(dump_dir, "step{}".format(DumpUtil.iter_num)) + if not os.path.exists(dump_dir): + Path(dump_dir).mkdir(mode=FileCheckConst.DATA_DIR_AUTHORITY, exist_ok=True) + dump_file = os.path.join(dump_dir, dump_filename) rank_this = get_tensor_rank(in_feat, out_feat) DumpUtil.dump_root = os.path.dirname(DumpUtil.dump_path) if rank_this is not None and rank != rank_this: diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_tool.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_tool.py index 8b587eb3999e9c6174a07cd1424ad97d2f5d437e..2dbda307857e54bc01bca1804c3269fc355fa6b4 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_tool.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_tool.py @@ -94,7 +94,8 @@ class ParseTool: msaccucmp_path = self.util.path_strip(args.msaccucmp_path) if args.msaccucmp_path else Const.MS_ACCU_CMP_PATH self.util.check_path_valid(msaccucmp_path) self.util.check_executable_file(msaccucmp_path) - self.util.check_str_param(args.format) + if args.format: + self.util.check_str_param(args.format) self.compare.convert_dump_to_npy(args.path, args.format, args.output_path, msaccucmp_path) @catch_exception diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/setup.py b/debug/accuracy_tools/ptdbg_ascend/src/python/setup.py index 8ce00ca4f7b665e8a2d1e9135fec2e6aaa3d8e2f..a7128e41c4cd215ee2ec4c1c56fd0a46f4a8c5f0 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/setup.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/setup.py @@ -20,7 +20,7 @@ import stat from pathlib import Path import setuptools -VERSION = '4.0.T2' +VERSION = '5.0.T1' def generate_ptdbg_ascend_version(): diff --git a/debug/accuracy_tools/ptdbg_ascend/test/ut/test_utils.py b/debug/accuracy_tools/ptdbg_ascend/test/ut/test_utils.py index 6adbab471fb6939c1a50b0b53a32fe9ce71a226d..fcbd728ec187884a01cf7ffad331f88df89166ef 100644 --- a/debug/accuracy_tools/ptdbg_ascend/test/ut/test_utils.py +++ b/debug/accuracy_tools/ptdbg_ascend/test/ut/test_utils.py @@ -7,17 +7,6 @@ from ptdbg_ascend.common.file_check_util import FileCheckException class TestUtilsMethods(unittest.TestCase): - def test_get_api_name_from_matcher(self): - normal_name = "Functional_relu__1_output" - unusual_name = "Functional_norm_layer_1_output" - error_name = "Tensor_onnx::connect_1_input" - api_name_1 = utils.get_api_name_from_matcher(normal_name) - api_name_2 = utils.get_api_name_from_matcher(unusual_name) - api_name_3 = utils.get_api_name_from_matcher(error_name) - self.assertEqual(api_name_1, "relu") - self.assertEqual(api_name_2, "norm_layer") - self.assertEqual(api_name_3, "") - def test_check_file_or_directory_path_1(self): file = "list" with pytest.raises(FileCheckException) as error: diff --git a/plugins/tensorboard-plugins/tb_plugin/README.md b/plugins/tensorboard-plugins/tb_plugin/README.md index 6f2a2af1708600171fb9d35050f13f7f1464a103..514f889cbeafbb7798e020d7a64dc09351991c59 100644 --- a/plugins/tensorboard-plugins/tb_plugin/README.md +++ b/plugins/tensorboard-plugins/tb_plugin/README.md @@ -9,8 +9,10 @@ pandas >= 1.0.0 ,tensorboard >= 2.11.0,protobuf <= 3.20.3 * 安装方式 1. pip安装(推荐) \ - 现本插件已经上传到pypi社区,用户可在python环境下直接通过以下pip指令进行安装:\ + * 现本插件已经上传到pypi社区,用户可在python环境下直接通过以下pip指令进行安装:\ `pip install torch-tb-profiler-ascend` + * 也可在pypi社区上下载离线whl包,传输到无法访问公网的环境上离线安装使用。访问[下载链接](https://pypi.org/project/torch-tb-profiler-ascend/#files)选择whl包进行下载,之后便可使用指令安装(此处{version}为whl包实际版本)\ + `pip install torch-tb-profiler_ascend_{version}_py3_none_any.whl` 2. 从源代码安装 * 从仓库下载源码: @@ -18,12 +20,13 @@ `git clone https://gitee.com/ascend/att.git` * 进入目录 `/plugins/tensorboard_plugins/tb_plugin` 下. - + * 编译前端代码 + - `python setup.py build_fe` \ + **注意**: build_fe步骤需要安装yarn和Node.js环境 * 执行安装命令可直接安装: - `pip install .` + - `pip install .` * 或: 构建whl包安装 - - `python setup.py build_fe sdist bdist_wheel` \ - **注意**: build_fe步骤需要安装yarn和Node.js环境 + - `python setup.py build_fe sdist bdist_wheel` 在 `/tb_plugins/profiling/tb_plugin/dist` 目录下取出whl包,使用以下指令安装(此处{version}为whl包实际版本) @@ -263,7 +266,7 @@ ##### 文件配置 ###### 文件导入 - 界面分为左侧边栏和右侧展示界面。点击左侧的Import Files或在左侧未勾选文件时点击右侧界面中心的Import Files字体,将会弹出系统文件资源管理窗,可以上传需要比对的.txt或.log格式的模型网络训练日志文件。 + 界面分为左侧边栏和右侧展示界面。点击左侧的Import Files或在左侧未勾选文件时点击右侧界面中心的Import Files字体,将会弹出系统文件资源管理窗,可以上传需要比对的模型网络训练日志文件。 注:当前最多支持上传6个文件,单个文件大小不能超过10MB。 ![Alt text](./docs/images/accuracy.PNG) diff --git "a/plugins/tensorboard-plugins/tb_plugin/docs/\345\205\254\347\275\221URL\350\257\264\346\230\216.xlsx" "b/plugins/tensorboard-plugins/tb_plugin/docs/\345\205\254\347\275\221URL\350\257\264\346\230\216.xlsx" index 4c6800ec528c8dc1cba44f006c79f0f4f64a027c..b7a8bf1fd0e7eec640e46af76e16c6a228f335ba 100644 Binary files "a/plugins/tensorboard-plugins/tb_plugin/docs/\345\205\254\347\275\221URL\350\257\264\346\230\216.xlsx" and "b/plugins/tensorboard-plugins/tb_plugin/docs/\345\205\254\347\275\221URL\350\257\264\346\230\216.xlsx" differ diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/AccuracyLeftPanel.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/AccuracyLeftPanel.tsx index 73ffbf7dc8daf2cb40fc40d48eefd9f8e2cdcbf9..ef9b170ec7a3de46039e5345ddf574f6fd620077 100644 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/AccuracyLeftPanel.tsx +++ b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/AccuracyLeftPanel.tsx @@ -185,12 +185,7 @@ export const AccuracyLeftPanel: React.FC = (props) => { } const addFile = (fileName: string, fileContent: string) => { - // 限制文件后缀为.log或.txt const fileLength = fileName.length - if (fileLength <= 4 || !['.txt', '.log'].includes(fileName.slice(fileLength - 4).toLowerCase())) { - message.warn('Please select a file with the extension of "txt" or "log"') - return - } const tempList: FileInfo[] = JSON.parse(JSON.stringify(fileList)) // 上传同名文件加上(1~最大文件数减1)标识 if (!!tempList.find(item => item.fileName === fileName)) { @@ -312,7 +307,6 @@ export const AccuracyLeftPanel: React.FC = (props) => { id='accComparisonSelectFile' style={{ display: 'none' }} type='file' - accept='.txt,.log' onChange={uploadFile} /> diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DiffOverview.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DiffOverview.tsx index a6f6a15d9027d1915cd32d771af4ed5527df76ed..e8071b2c5966d944804b4d8abd780d8389042d38 100644 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DiffOverview.tsx +++ b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DiffOverview.tsx @@ -124,7 +124,8 @@ const DiffColumnChart: React.FC = ( trigger: 'axis', formatter: function (params: any) { const index = params[0].name.indexOf('@') - var res = `${index > -1 ? params[0].name.slice(index + 1) : params[0].name}
` + const safeName = params[0].name.replace(//g, '>') + var res = `${index > -1 ? safeName.slice(index + 1) : safeName}
` for (const item of params) { if (typeof item.value[item.encode.y[0]] === 'number') { res += ` = (props) => { formatter: (data) => { const typedData = data as echarts.DefaultLabelFormatterCallbackParams const index = typedData.name.indexOf('_') - return `${index > -1 ? typedData.name.slice(index + 1) : - typedData.name}
${tooltip_mode === 'both' ? + const safeName = typedData.name.replace(//g, '>') + return `${index > -1 ? safeName.slice(index + 1) : safeName}
${tooltip_mode === 'both' ? typedData.value : ''}(${typedData.percent}%)` }, confine: true, @@ -117,8 +117,9 @@ export const PieChart: React.FC = (props) => { const currentItem = rowsWithUniqueName.find(item => item.name === data.name) const index = data.name.indexOf('_') const percent = ((currentItem?.value || 0) * 100 / totalValue).toFixed(2) - return `${index > -1 ? data.name.slice(index + 1) : data.name}
${tooltip_mode === 'both' ? - (currentItem?.value || 0) : ''}(${percent}%)` + const safeName = data.name.replace(//g, '>') + return `${index > -1 ? safeName.slice(index + 1) : + safeName}
${tooltip_mode === 'both' ? (currentItem?.value || 0) : ''}(${percent}%)` } } }, diff --git a/plugins/tensorboard-plugins/tb_plugin/setup.py b/plugins/tensorboard-plugins/tb_plugin/setup.py index 6a023c8243bdf6e57b6084a0cb9c341f94a991f6..3c09006122c776df8fbe8af5836711613e3f6a9c 100644 --- a/plugins/tensorboard-plugins/tb_plugin/setup.py +++ b/plugins/tensorboard-plugins/tb_plugin/setup.py @@ -43,7 +43,7 @@ def get_version(rel_path): INSTALL_REQUIRED = [ "pandas >= 1.0.0", - "tensorboard >= 1.15, !=2.1.0" + "tensorboard >= 2.11.0" ] TESTS_REQUIRED = INSTALL_REQUIRED + [ diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/__init__.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/__init__.py index d5ed2b3d4c574a3d21a0ba50dc1f05d3ceb39365..f9c3e88c388718994d9741f01a003f4ecc4e2a2f 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/__init__.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/__init__.py @@ -4,4 +4,4 @@ # Entry point for Pytorch TensorBoard plugin package. -__version__ = '0.4.0.4' +__version__ = '0.4.0.5' diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/consts.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/consts.py index 14ca1a8b0f6563b08a82ef90fdcf9b147c5008ff..0adf134730c144decb8049c03acaf56136b9a67f 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/consts.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/consts.py @@ -18,6 +18,7 @@ # # Modifications: Add visualization of PyTorch Ascend profiling. # -------------------------------------------------------------------------- +import enum import re from collections import namedtuple @@ -25,15 +26,15 @@ PLUGIN_NAME = 'pytorch_profiler' WORKER_PATTERN = re.compile(r"""^(.*?) # worker name (\.\d+)? # optional timestamp like 1619499959628 used as span name - \.pt\.trace\.json # the ending suffix - (?:\.gz)?$""", re.X) # optional .gz extension + \.pt\.trace\.json$""", re.X) -TRACE_PATTERN = re.compile(r"""^trace_view\.json(\.gz)?$""") +TRACE_PATTERN = re.compile(r"""^trace_view\.json$""") WORKER_SPAN_PATTERN = re.compile(r"""([^\\]*)_(\d+(\.\d+)?)_ascend_pt$""") NODE_PROCESS_PATTERN = re.compile(r"""^(.*)_(\d+)""") MONITOR_RUN_REFRESH_INTERNAL_IN_SECONDS = 10 MAX_GPU_PER_NODE = 64 +MAX_FILE_SIZE = 500 * 1024 * 1024 View = namedtuple('View', 'id, name, display_name') OVERALL_VIEW = View(1, 'overall', 'Overview') @@ -98,3 +99,16 @@ TOOLTIP_OP_TC_SELF_AICORE = \ 'Time of Device Self Duration With AICore / Device Self Duration.' TOOLTIP_OP_TC_TOTAL_AICORE = \ 'Time of Device Total Duration With AICore / Device Total Duration.' + + +class InputFilesType(enum.Enum): + KERNEL_DETAILS_CSV = 'kernel_details.csv' + MEMORY_RECORD_CSV = 'memory_record.csv' + MEMORY_OPERATOR_CSV = 'operator_memory.csv' + MEMORY_COMPONENT_CSV = 'npu_module_mem.csv' + OPERATOR_DETAILS_CSV = 'operator_details.csv' + DISTRIBUTED_STEP_CSV = 'step_trace_time.csv' + DISTRIBUTED_COMMUNICATION_JSON = 'communication.json' + + +INPUT_FILE_LIST = [e.value for e in InputFilesType] diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/__init__.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/__init__.py index a3807634fdd0c927a25153a01d700088a14fa68d..6bd764e88d4fecd142e7a953b1adb5c4a72262b9 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/__init__.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/__init__.py @@ -1,4 +1,4 @@ from .cache import Cache from .file import (BaseFileSystem, StatData, abspath, basename, download_file, exists, get_filesystem, glob, isdir, join, listdir, - makedirs, read, register_filesystem, relpath, walk) + makedirs, read, register_filesystem, relpath, walk, stat) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/plugin.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/plugin.py index 6d431fa1a9146b203369ba632bec46c4cbc400e3..6091fdbcd906bf49e4e631afe7d2ba57e65ce711 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/plugin.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/plugin.py @@ -577,8 +577,11 @@ class TorchProfilerPlugin(base_plugin.TBPlugin): # Currently, assume run data is immutable, so just load once loader = RunLoader(name, run_dir['name'], self._cache, run_dir['device_target']) run = loader.load() - logger.info('Run %s loaded', name) - self._queue.put(run) + if run.profiles: + self._queue.put(run) + logger.info('Run %s loaded', name) + else: + logger.warning(f'Run {name} skipped') except Exception as ex: logger.warning('Failed to load run %s. Exception=%s', ex, name, exc_info=True) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py index ee1ce5b62deebb5d63dca8fc54f43f0cd2e1a6da..9961ae7cf9eb3144da8f1ac78e551a56ca4f27b8 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py @@ -29,6 +29,7 @@ from typing import Dict, List, Optional from .op_tree import OpTreeBuilder from .. import io, utils +from ..consts import InputFilesType, MAX_FILE_SIZE, INPUT_FILE_LIST from ..utils import href from . import trace from .communication import analyze_communication_nodes @@ -141,6 +142,8 @@ class RunProfileData(object): @staticmethod def parse_gpu(worker, span, path, cache_dir): trace_path, trace_json = RunProfileData._preprocess_file(path, cache_dir, 'GPU') + if not trace_json: + return None profile = RunProfileData.from_json(worker, span, trace_json) profile.trace_file_path = trace_path @@ -156,8 +159,16 @@ class RunProfileData(object): has_memory_operator = False has_communication_overlap = False has_communication_wait_ops = False + + def _check_file_size_valid(filepath): + if io.stat(filepath).length > MAX_FILE_SIZE: + logger.warning( + f'File "{filepath}" exceeds the maximum limit size of 500MB and will be skipped.') + return False + return True + for file in io.listdir(path): - if utils.is_npu_trace_path(file): + if utils.is_npu_trace_path(file) and _check_file_size_valid(io.join(path, file)): has_trace = True trace_file = io.join(path, file) trace_path, trace_json = RunProfileData._preprocess_file(trace_file, cache_dir, 'Ascend') @@ -179,26 +190,27 @@ class RunProfileData(object): profile.profiler_start_ts = 0 for file in io.listdir(path): - if str(file) == 'kernel_details.csv': - has_kernel = True - profile.kernel_file_path = io.join(path, file) - if str(file) == 'memory_record.csv': - has_memory_record = True - profile.memory_curve_path = io.join(path, file) - if str(file) == 'operator_memory.csv': - has_memory_operator = True - profile.memory_operator_path = io.join(path, file) - if str(file) == 'npu_module_mem.csv': - profile.memory_component_path = io.join(path, file) - if str(file) == 'operator_details.csv': - profile.has_operator_view = True - profile.operator_path = io.join(path, file) - if str(file) == 'step_trace_time.csv': - has_communication_overlap = True - profile.distributed_csv_path = io.join(path, file) - if str(file) == 'communication.json': - has_communication_wait_ops = True - profile.communication_json_path = io.join(path, file) + if str(file) in INPUT_FILE_LIST and _check_file_size_valid(io.join(path, file)): + if InputFilesType(file) == InputFilesType.KERNEL_DETAILS_CSV: + has_kernel = True + profile.kernel_file_path = io.join(path, file) + if InputFilesType(file) == InputFilesType.MEMORY_RECORD_CSV: + has_memory_record = True + profile.memory_curve_path = io.join(path, file) + if InputFilesType(file) == InputFilesType.MEMORY_OPERATOR_CSV: + has_memory_operator = True + profile.memory_operator_path = io.join(path, file) + if InputFilesType(file) == InputFilesType.MEMORY_COMPONENT_CSV: + profile.memory_component_path = io.join(path, file) + if InputFilesType(file) == InputFilesType.OPERATOR_DETAILS_CSV: + profile.has_operator_view = True + profile.operator_path = io.join(path, file) + if InputFilesType(file) == InputFilesType.DISTRIBUTED_STEP_CSV: + has_communication_overlap = True + profile.distributed_csv_path = io.join(path, file) + if InputFilesType(file) == InputFilesType.DISTRIBUTED_COMMUNICATION_JSON: + has_communication_wait_ops = True + profile.communication_json_path = io.join(path, file) profile.has_kernel = has_kernel profile.has_memory = has_memory_operator and has_memory_record @@ -243,9 +255,13 @@ class RunProfileData(object): str_data = data.decode('utf-8') # only replace the N/A without surrounding double quote fout.write(re.sub(r'(? consts.MAX_FILE_SIZE: + logger.warning( + f'File "{absolute_path}" exceeds the maximum limit size of 500MB and will be skipped.') + continue worker = match.group(1) span = match.group(2) @@ -123,6 +126,10 @@ class RunLoader(object): data = RunProfileData.parse_npu(worker, span, local_file, self.caches.cache_dir) else: data = RunProfileData.parse_gpu(worker, span, local_file, self.caches.cache_dir) + if not data: + self.queue.put((None, None)) + logger.debug('finishing process data') + return if data.trace_file_path != local_file: self.caches.add_file(local_file, data.trace_file_path) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py index 0186139a59d43e54ba462e74063428c74033ea1c..df983cd7ced759f702a0b9510655ff7fb2c7c59a 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py @@ -146,19 +146,22 @@ class RunGenerator(object): title = [x.lower() for x in data[0]] title_name = RunGenerator._check_overlap_data(title) if not title_name: - logger.error("Incomplete content of CSV file.") + logger.error(f"Incomplete content of CSV file {path}.") return overlap_by_steps - for step in data[1:]: - key = step[0] - if key == '': - key = 'all' - overlap = [float(step[int(title_name[0])]), float(step[int(title_name[1])]), - float(step[int(title_name[2])]), float(step[int(title_name[3])])] - if key in overlap_by_steps: - overlap_by_steps[key] = list(np.add(overlap, overlap_by_steps[key])) - else: - overlap_by_steps[key] = list(overlap) + for idx, step in enumerate(data[1:]): + try: + key = step[0] + if key == '': + key = 'all' + overlap = [float(step[int(title_name[0])]), float(step[int(title_name[1])]), + float(step[int(title_name[2])]), float(step[int(title_name[3])])] + if key in overlap_by_steps: + overlap_by_steps[key] = list(np.add(overlap, overlap_by_steps[key])) + else: + overlap_by_steps[key] = list(overlap) + except (ValueError, IndexError): + logger.error(f'File "{path}" has wrong data format in row {idx + 2} and will skip it.') return overlap_by_steps @staticmethod @@ -182,6 +185,8 @@ class RunGenerator(object): if not io.exists(path): raise FileNotFoundError(path) data = io.read(path) + wait_by_step: Dict[str, Dict[str, float]] = OrderedDict() + table_ops: Dict[str, List[float]] = OrderedDict() try: communication_json = json.loads(data, strict=False) except JSONDecodeError as e: @@ -192,11 +197,13 @@ class RunGenerator(object): str_data = data.decode('utf-8') # only replace the N/A without surrounding double quote fout.write(re.sub(r'(?', abs(float(ls[1])), - round((float(ls[2]) - self.profile_data.profiler_start_ts) / 1000, 3) if ls[2] else None, - round((float(ls[3]) - self.profile_data.profiler_start_ts) / 1000, 3) if ls[3] else None, - round(float(ls[4]) / 1000, 3) if ls[4] else None] - display_datas[device_type].append(nums) + try: + nums = [ls[0] if ls[0] else '', abs(float(ls[1])), + round((float(ls[2]) - self.profile_data.profiler_start_ts) / 1000, 3) if ls[2] else None, + round((float(ls[3]) - self.profile_data.profiler_start_ts) / 1000, 3) if ls[3] else None, + round(float(ls[4]) / 1000, 3) if ls[4] else None] + display_datas[device_type].append(nums) + except ValueError: + logger.error(f'File "{path}" has wrong data format in row {idx + 2} and will skip it.') table['rows'] = display_datas for name in display_datas: devices_type.append(name) @@ -566,7 +580,8 @@ class RunGenerator(object): def _handle_memory_data(self): process_data = defaultdict() pta_or_ge_data = defaultdict() - datas = RunGenerator._get_csv_data(self.profile_data.memory_curve_path) + path = self.profile_data.memory_curve_path + datas = RunGenerator._get_csv_data(path) required_column_idxs = { 'Component': -1, 'Device Type': -1, @@ -579,21 +594,26 @@ class RunGenerator(object): if column_exist_count < len(required_column_idxs): logger.error('Required column is missing in file "memory_record.csv"') else: - for ls in datas[1:]: - time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 3) - device_type = ls[device_type_idx] - if ls[tag_type_idx] == 'PTA+GE': - process_data.setdefault(device_type, {}).setdefault('Allocated', []).append( - [time_column, round(float(ls[allocated_idx]), 3)]) - process_data.setdefault(device_type, {}).setdefault('Reserved', []).append( - [time_column, round(float(ls[reserved_idx]), 3)]) - elif ls[tag_type_idx] == 'APP': - line_chart_data = [time_column, None, round(float(ls[reserved_idx]), 3)] - pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[tag_type_idx], []).append(line_chart_data) - elif ls[tag_type_idx] in ('PTA', 'GE'): - line_chart_data = [time_column, round(float(ls[allocated_idx]), 3), - round(float(ls[reserved_idx]), 3)] - pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[tag_type_idx], []).append(line_chart_data) + for idx, ls in enumerate(datas[1:]): + try: + time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 3) + device_type = ls[device_type_idx] + if ls[tag_type_idx] == 'PTA+GE': + process_data.setdefault(device_type, {}).setdefault('Allocated', []).append( + [time_column, round(float(ls[allocated_idx]), 3)]) + process_data.setdefault(device_type, {}).setdefault('Reserved', []).append( + [time_column, round(float(ls[reserved_idx]), 3)]) + elif ls[tag_type_idx] == 'APP': + line_chart_data = [time_column, None, round(float(ls[reserved_idx]), 3)] + pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[tag_type_idx], []).append( + line_chart_data) + elif ls[tag_type_idx] in ('PTA', 'GE'): + line_chart_data = [time_column, round(float(ls[allocated_idx]), 3), + round(float(ls[reserved_idx]), 3)] + pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[tag_type_idx], []).append( + line_chart_data) + except ValueError: + logger.error(f'File "{path}" has wrong data format in row {idx + 2} and will skip it.') return process_data, pta_or_ge_data @@ -608,7 +628,8 @@ class RunGenerator(object): {'name': 'Time(ms)', 'type': 'number'}] } peak_memory_rows = defaultdict(list) - component_datas = RunGenerator._get_csv_data(self.profile_data.memory_component_path) + path = self.profile_data.memory_component_path + component_datas = RunGenerator._get_csv_data(path) if component_datas: required_column_idxs = { 'Component': -1, @@ -619,16 +640,19 @@ class RunGenerator(object): (tag_type_idx, time_idx, reserved_idx, device_type_idx), column_exist_count = \ RunGenerator._check_csv_columns(component_datas[0], required_column_idxs) if column_exist_count < len(required_column_idxs): - logger.error('Required column is missing in file "npm_module_mem.csv"') + logger.error(f'Required column is missing in file "{path}"') else: - for ls in component_datas[1:]: + for idx, ls in enumerate(component_datas[1:]): memory_curve_id_dict = { 'device_type_idx': device_type_idx, 'reserved_idx': reserved_idx, 'tag_type_idx': tag_type_idx, 'time_idx': time_idx } - self._handle_peak_memory_rows(memory_curve_id_dict, ls, peak_memory_rows) + try: + self._handle_peak_memory_rows(memory_curve_id_dict, ls, peak_memory_rows) + except (ValueError, TypeError): + logger.error(f'File "{path}" has wrong data format in row {idx + 2} and will skip it.') peak_memory_events['rows'] = peak_memory_rows return peak_memory_events @@ -1023,7 +1047,7 @@ class RunGenerator(object): else: table['columns'].append({'type': 'string', 'name': column}) - self._handle_kernel_table_rows(name_idx, duration_idx, core_type_idx, datas[1:]) + self._handle_kernel_table_rows(name_idx, duration_idx, core_type_idx, datas[1:], path) table['rows'] = datas[1:] return result @@ -1057,8 +1081,11 @@ class RunGenerator(object): mem = device_prop.get('totalGlobalMem') if mem is not None: - gpu_info['Memory'] = '{} GB'.format(round(float(mem) / 1024 / 1024 / 1024, 2)) - gpu_info['Memory Raw'] = mem + try: + gpu_info['Memory'] = '{} GB'.format(round(float(mem) / 1024 / 1024 / 1024, 2)) + gpu_info['Memory Raw'] = mem + except ValueError: + logger.warning('The value of "totalGlobalMem" must be number in the JSON, please check it.') major = device_prop.get('computeMajor') minor = device_prop.get('computeMinor') @@ -1067,10 +1094,15 @@ class RunGenerator(object): return gpu_info - def _handle_kernel_table_rows(self, name_idx, duration_idx, core_type_idx, rows): - for row in rows: + def _handle_kernel_table_rows(self, name_idx, duration_idx, core_type_idx, rows, path): + for idx, row in enumerate(rows): call_name = row[name_idx] - call_duration = float(row[duration_idx]) + try: + call_duration = float(row[duration_idx]) + except ValueError: + logger.error( + f'File "{path}" has wrong data format in row {idx + 2} in Duration column and will skip the row.') + continue call_type = row[core_type_idx] if self.accelerator_data.get(call_type) is not None: self.accelerator_data[call_type] += call_duration diff --git a/profiler/README.md b/profiler/README.md index dede53ba00f9bb007464e718c2a70e13fc4e3ace..368ffc71504d578a1a8ba685544f321ec7dba8a1 100644 --- a/profiler/README.md +++ b/profiler/README.md @@ -4,7 +4,7 @@ ATT工具针对训练&大模型场景,提供端到端调优工具:用户采 ### NPU Profiling数据采集 -目前ATT工具主要支持Ascend PyTorch Profiler接口的性能数据采集,请参见《[Ascend PyTorch Profiler性能调优工具介绍](https://gitee.com/ascend/att/wikis/%E6%A1%88%E4%BE%8B%E5%88%86%E4%BA%AB/%E6%80%A7%E8%83%BD%E6%A1%88%E4%BE%8B/Ascend%20PyTorch%20Profiler%E6%80%A7%E8%83%BD%E8%B0%83%E4%BC%98%E5%B7%A5%E5%85%B7%E4%BB%8B%E7%BB%8D)》。 +目前ATT工具主要支持Ascend PyTorch Profiler接口的性能数据采集,请参考官方文档:[Ascend PyTorch Profiler数据采集与分析](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/modeldevpt/ptmigr/AImpug_0067.html)。 Ascend PyTorch Profiler接口支持AscendPyTorch 5.0.RC2或更高版本,支持的PyThon和CANN软件版本配套关系请参见《CANN软件安装指南》中的“[安装PyTorch](https://www.hiascend.com/document/detail/zh/canncommercial/63RC2/envdeployment/instg/instg_000041.html)”。 @@ -79,8 +79,6 @@ ascend pytorch profiler数据目录结构如下: |- * _ascend_pt ``` -Profiler配置接口详细介绍可以参考官方文档:[Ascend PyTorch Profiler数据采集与分析](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/modeldevpt/ptmigr/AImpug_0067.html) - ### 子功能介绍 | 工具名称 | 说明 | | ------------------------------------------------------------ | ------------------------------------------------------------ | diff --git a/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py b/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py index 2a142dbe011cbad4275b0c5c187099d94717ad5d..84153727ecc7d1b2bab572a4460ad69abf753e29 100644 --- a/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py +++ b/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py @@ -39,7 +39,7 @@ class ComputeAdviceBase(AdviceBase): if not os.path.exists(self.kernel_details_path): print("[ERROR] kernel_details.csv is not exist in the Path: {}.".format(os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT"))) return False - elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path).endswith(".csv"): + elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path) == "kernel_details.csv": self.kernel_details_path = self.collection_path else: print("[ERROR] Please input ascend_pt or kernel_details.csv") diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py index 7306bcecde6ead400c9ff82f99ad5d8d706fb634..cf483ed70e1ff00ef1489f40f77af168dfbea031 100644 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py +++ b/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py @@ -38,9 +38,9 @@ class NpuFusedAdvice(ComputeAdviceBase): filter_data = self.cur_data.get(self.cur_data.get("duration sum(us)", 0) > 0) op_num = len(filter_data.index) op_dur = filter_data["duration sum(us)"].sum() - self.cur_advice = "Advice:\n" if op_num > 0: index = 0 + self.cur_advice = "Advice:\n" self.cur_bottleneck = f"The computing time of fusable op is {round(op_dur, 2)} ms." for _, row in filter_data.iterrows(): cur_op = "[" + ", ".join(row.loc["pattern"]) + "]" diff --git a/profiler/advisor/advisor_backend/prof_bean_advisor/cluster_step_trace_time_bean.py b/profiler/advisor/advisor_backend/prof_bean_advisor/cluster_step_trace_time_bean.py index 6adbe07be53dc13ab51b0ed862dfc7121d588892..b108fc77a3f3408d48c79ce6b542f98427d88b0b 100644 --- a/profiler/advisor/advisor_backend/prof_bean_advisor/cluster_step_trace_time_bean.py +++ b/profiler/advisor/advisor_backend/prof_bean_advisor/cluster_step_trace_time_bean.py @@ -25,15 +25,6 @@ class ClusterStepTraceTimeBean: def __init__(self, data: dict): self._data = data - @property - def row(self) -> list: - row = [] - for field_name in self._data.keys(): - if field_name == self.STEP: - continue - row.append(float(self._data.get(field_name, ))) - return row - @property def step(self) -> str: return self._data.get(self.STEP, '') diff --git a/profiler/compare_tools/comparator/base_comparator.py b/profiler/compare_tools/comparator/base_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..330fb871ee19b9bac1c0dfff4cae5648ebeedf1c --- /dev/null +++ b/profiler/compare_tools/comparator/base_comparator.py @@ -0,0 +1,24 @@ +from abc import ABC, abstractmethod + + +class BaseComparator(ABC): + def __init__(self, origin_data: any, bean: any): + self._sheet_name = bean.TABLE_NAME + self._headers = bean.HEADERS + self._overhead = bean.OVERHEAD + self._origin_data = origin_data + self._bean = bean + self._rows = [] + + def generate_data(self) -> dict: + ''' + generate one sheet(table) data + type: dict + sheet name as the dict key + ''' + self._compare() + return {self._sheet_name: {"headers": self._headers, "rows": self._rows, "overhead": self._overhead}} + + @abstractmethod + def _compare(self): + raise NotImplementedError("Function _compare need to be implemented.") diff --git a/profiler/compare_tools/comparator/communication_comparator.py b/profiler/compare_tools/comparator/communication_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..72ed8576cdd340fd926f577eb630825ead4490a7 --- /dev/null +++ b/profiler/compare_tools/comparator/communication_comparator.py @@ -0,0 +1,20 @@ +from comparator.base_comparator import BaseComparator +from compare_bean.communication_bean import CommunicationBean +from utils.constant import Constant +from utils.common_func import update_order_id + + +class CommunicationComparator(BaseComparator): + def __init__(self, origin_data: dict, bean: any): + super().__init__(origin_data, bean) + + def _compare(self): + base_data = self._origin_data.get(Constant.BASE_DATA, {}) + comparison_data = self._origin_data.get(Constant.COMPARISON_DATA, {}) + for comm_name, comm_data in base_data.items(): + comparison_comm_data = comparison_data.pop(comm_name, {}) + self._rows.extend(CommunicationBean(comm_name, comm_data, comparison_comm_data).rows) + for comm_name, comm_data in comparison_data.items(): + self._rows.extend(CommunicationBean(comm_name, {}, comm_data).rows) + update_order_id(self._rows) + diff --git a/profiler/compare_tools/comparator/index_comparator.py b/profiler/compare_tools/comparator/index_comparator.py deleted file mode 100644 index 91b050548def5fe1ec34353621c46462dbf748de..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/comparator/index_comparator.py +++ /dev/null @@ -1,44 +0,0 @@ -from utils.args_manager import ArgsManager - - -class IndexComparator: - def __init__(self, args: any): - self._args = args - self._args_manager = ArgsManager() - self._base_profiling = self._args_manager.base_profiling - self._comparison_profiling = self._args_manager.comparison_profiling - - def compare(self) -> list: - base_data_dict, comparison_data_dict = {}, {} - if not self._base_profiling.communication_data: - print(f"[WARNING] Can't find any communication op in the file: {self._base_profiling.json_path}") - for data in self._base_profiling.communication_data: - name_list = data.get("name", "").split("_") - if len(name_list) >= 2: - base_data_dict.setdefault(name_list[1].lower(), []).append(float(data.get("dur", 0))) - if self._args.base_profiling_path != self._args.comparison_profiling_path: - if not self._comparison_profiling.communication_data: - print(f"[WARNING] Can't find any communication op in the file: {self._comparison_profiling.json_path}") - for data in self._comparison_profiling.communication_data: - name_list = data.get("name", "").split("_") - if len(name_list) >= 2: - comparison_data_dict.setdefault(name_list[1].lower(), []).append(float(data.get("dur", 0))) - result_data = [] - for name, base_dur_list in base_data_dict.items(): - base_row = [name, None, len(base_dur_list), sum(base_dur_list), sum(base_dur_list) / len(base_dur_list), - max(base_dur_list), min(base_dur_list)] - if self._args.base_profiling_path == self._args.comparison_profiling_path: - result_data.append(base_row + [None] * 7) - continue - com_dur_list = comparison_data_dict.pop(name, None) - if not com_dur_list: - com_row = [None, None, None, 0, None, None, None] - else: - com_row = [name, None, len(com_dur_list), sum(com_dur_list), sum(com_dur_list) / len(com_dur_list), - max(com_dur_list), min(com_dur_list)] - result_data.append(base_row + com_row) - for name, com_dur_list in comparison_data_dict.items(): - com_row = [name, None, len(com_dur_list), sum(com_dur_list), sum(com_dur_list) / len(com_dur_list), - max(com_dur_list), min(com_dur_list)] - result_data.append([None, None, None, 0, None, None, None] + com_row) - return result_data diff --git a/profiler/compare_tools/comparator/operator_comparator.py b/profiler/compare_tools/comparator/operator_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..d7b22af577571b82cbc71b10005b4fe85b034f5b --- /dev/null +++ b/profiler/compare_tools/comparator/operator_comparator.py @@ -0,0 +1,13 @@ +from comparator.base_comparator import BaseComparator + + +class OperatorComparator(BaseComparator): + def __init__(self, origin_data: any, bean: any): + super().__init__(origin_data, bean) + + def _compare(self): + if not self._origin_data: + return + self._rows = [None] * (len(self._origin_data)) + for index, (base_op, comparison_op) in enumerate(self._origin_data): + self._rows[index] = self._bean(index, base_op, comparison_op).row diff --git a/profiler/compare_tools/comparator/operator_statistic_comparator.py b/profiler/compare_tools/comparator/operator_statistic_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..eaaaccea3797a7c5a67148f9787493f5c46ef50d --- /dev/null +++ b/profiler/compare_tools/comparator/operator_statistic_comparator.py @@ -0,0 +1,28 @@ +from comparator.base_comparator import BaseComparator +from utils.common_func import update_order_id + + +class OperatorStatisticComparator(BaseComparator): + def __init__(self, origin_data: list, bean: any): + super().__init__(origin_data, bean) + + def _compare(self): + if not self._origin_data: + return + base_op_dict, comparison_op_dict = self._group_by_op_name() + for op_name, base_data in base_op_dict.items(): + comparison_data = comparison_op_dict.pop(op_name, []) + self._rows.append(self._bean(op_name, base_data, comparison_data).row) + for op_name, comparison_data in comparison_op_dict.items(): + self._rows.append(self._bean(op_name, [], comparison_data).row) + self._rows.sort(key=lambda x: x[-2], reverse=True) # order by diff column + update_order_id(self._rows) + + def _group_by_op_name(self): + base_op_dict, comparison_op_dict = {}, {} + for base_op, comparison_op in self._origin_data: + if base_op: + base_op_dict.setdefault(base_op.name, []).append(base_op) + if comparison_op: + comparison_op_dict.setdefault(comparison_op.name, []).append(comparison_op) + return base_op_dict, comparison_op_dict diff --git a/profiler/compare_tools/comparator/overall_performance_comparator.py b/profiler/compare_tools/comparator/overall_performance_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..161f574ba53e91e3efe0e33d27f363744ee559a1 --- /dev/null +++ b/profiler/compare_tools/comparator/overall_performance_comparator.py @@ -0,0 +1,52 @@ +from comparator.base_comparator import BaseComparator +from utils.constant import Constant + + +class OverallPerformanceComparator(BaseComparator): + def __init__(self, origin_data: dict, bean: any): + super().__init__(origin_data, bean) + + def _compare(self): + base_profiling_info = self._origin_data.get(Constant.BASE_DATA) + comp_profiling_info = self._origin_data.get(Constant.COMPARISON_DATA) + self._headers = [''] + base_col = [f'{base_profiling_info.profiling_type}'] + comp_col = [f'{comp_profiling_info.profiling_type}'] + if not base_profiling_info.hide_op_details and not comp_profiling_info.hide_op_details: + self._headers.extend(['Cube Time(Num)', 'Vector Time(Num)']) + base_col.extend([f'{base_profiling_info.cube_time:.3f}s({base_profiling_info.cube_num})', + f'{base_profiling_info.vec_time:.3f}s({base_profiling_info.vec_num})']) + comp_col.extend([f'{comp_profiling_info.cube_time:.3f}s({comp_profiling_info.cube_num})', + f'{comp_profiling_info.vec_time:.3f}s({comp_profiling_info.vec_num})']) + if base_profiling_info.other_time or comp_profiling_info.other_time: + self._headers.append('Other Time') + base_col.append(f'{base_profiling_info.other_time:.3f}s') + comp_col.append(f'{comp_profiling_info.other_time:.3f}s') + if base_profiling_info.fa_time_fwd or comp_profiling_info.fa_time_fwd: + self._headers.append('Flash Attention Time(Forward)(Num)') + base_col.append(f'{base_profiling_info.fa_time_fwd:.3f}s({base_profiling_info.fa_num_fwd})') + comp_col.append(f'{comp_profiling_info.fa_time_fwd:.3f}s({comp_profiling_info.fa_num_fwd})') + if base_profiling_info.fa_time_bwd or comp_profiling_info.fa_time_bwd: + self._headers.append('Flash Attention Time(Backward)(Num)') + base_col.append(f'{base_profiling_info.fa_time_bwd:.3f}s({base_profiling_info.fa_num_bwd})') + comp_col.append(f'{comp_profiling_info.fa_time_bwd:.3f}s({comp_profiling_info.fa_num_bwd})') + self._headers.extend(['Computing Time']) + base_col.extend([f'{base_profiling_info.compute_time:.3f}s']) + comp_col.extend([f'{comp_profiling_info.compute_time:.3f}s']) + if base_profiling_info.memory_used or comp_profiling_info.memory_used: + self._headers.append('Mem Usage') + base_col.append(f'{base_profiling_info.memory_used:.2f}G') + comp_col.append(f'{comp_profiling_info.memory_used:.2f}G') + self._headers.extend(['Uncovered Communication Time']) + base_col.extend([f'{base_profiling_info.communication_not_overlapped: .3f}s']) + comp_col.extend([f'{comp_profiling_info.communication_not_overlapped: .3f}s']) + if base_profiling_info.sdma_time or comp_profiling_info.sdma_time: + self._headers.append('SDMA Time(Num)') + base_col.append(f'{base_profiling_info.sdma_time:.3f}s({base_profiling_info.sdma_num})') + comp_col.append(f'{comp_profiling_info.sdma_time:.3f}s({comp_profiling_info.sdma_num})') + cue = '(Not minimal profiling)' if base_profiling_info.is_not_minimal_profiling() or \ + comp_profiling_info.is_not_minimal_profiling() else '' + self._headers.extend(['Free Time', 'E2E Time' + cue]) + base_col.extend([f'{base_profiling_info.scheduling_time:.3f}s', f'{base_profiling_info.e2e_time:.3f}s']) + comp_col.extend([f'{comp_profiling_info.scheduling_time:.3f}s', f'{comp_profiling_info.e2e_time:.3f}s']) + self._rows = [base_col, comp_col] diff --git a/profiler/compare_tools/compare_bean/communication_bean.py b/profiler/compare_tools/compare_bean/communication_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..0af0a7fa9d277f99562b663969e9669b4a68024c --- /dev/null +++ b/profiler/compare_tools/compare_bean/communication_bean.py @@ -0,0 +1,72 @@ +from utils.constant import Constant +from utils.excel_config import ExcelConfig +from utils.common_func import calculate_diff_ratio + + +class CommunicationInfo: + + def __init__(self, name: str, data_list: list, is_task: bool): + self.comm_op_name = None + self.task_name = None + self.calls = None + self.total_duration = 0 + self.avg_duration = None + self.max_duration = None + self.min_duration = None + if data_list: + self.comm_op_name = "|" if is_task else name + self.task_name = name if is_task else None + self.calls = len(data_list) + self.total_duration = sum(data_list) + self.avg_duration = sum(data_list) / len(data_list) + self.max_duration = max(data_list) + self.min_duration = min(data_list) + + +class CommunicationBean: + TABLE_NAME = Constant.COMMUNICATION_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, name: str, base_comm_data: dict, comparison_comm_data: dict): + self._name = name + self._base_comm = base_comm_data + self._comparison_comm = comparison_comm_data + + @property + def rows(self): + rows = [] + base_comm = CommunicationInfo(self._name, self._base_comm.get("comm_list", []), is_task=False) + comparison_comm = CommunicationInfo(self._name, self._comparison_comm.get("comm_list", []), is_task=False) + rows.append(self._get_row(base_comm, comparison_comm, is_task=False)) + + base_task = self._base_comm.get("comm_task", {}) + comparison_task = self._comparison_comm.get("comm_task", {}) + if not base_task and not comparison_task: + return rows + + for task_name, task_list in base_task.items(): + base_task_info = CommunicationInfo(task_name, task_list, is_task=True) + comparison_task_info = CommunicationInfo("", [], is_task=True) + for _task_name, _task_list in comparison_task.items(): + comparison_task_info = CommunicationInfo(_task_name, _task_list, is_task=True) + comparison_task.pop(_task_name, None) + break + rows.append(self._get_row(base_task_info, comparison_task_info, is_task=True)) + for task_name, task_list in comparison_task.items(): + base_task_info = CommunicationInfo("", [], is_task=True) + comparison_task_info = CommunicationInfo(task_name, task_list, is_task=True) + rows.append(self._get_row(base_task_info, comparison_task_info, is_task=True)) + + return rows + + @classmethod + def _get_row(cls, base_info: CommunicationInfo, comparison_info: CommunicationInfo, is_task: bool) -> list: + row = [None, base_info.comm_op_name, base_info.task_name, base_info.calls, base_info.total_duration, + base_info.avg_duration, base_info.max_duration, base_info.min_duration, comparison_info.comm_op_name, + comparison_info.task_name, comparison_info.calls, comparison_info.total_duration, + comparison_info.avg_duration, comparison_info.max_duration, comparison_info.min_duration] + diff_fields = [None, None] if is_task else calculate_diff_ratio(base_info.total_duration, + comparison_info.total_duration) + row.extend(diff_fields) + return row diff --git a/profiler/compare_tools/compare_bean/memory_compare_bean.py b/profiler/compare_tools/compare_bean/memory_compare_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..22af09b531e7c9e33f423800abff2f375832b5fc --- /dev/null +++ b/profiler/compare_tools/compare_bean/memory_compare_bean.py @@ -0,0 +1,47 @@ +from utils.common_func import calculate_diff_ratio +from utils.constant import Constant +from utils.excel_config import ExcelConfig +from utils.torch_op_node import TorchOpNode +from utils.tree_builder import TreeBuilder + + +class MemoryCompareBean: + TABLE_NAME = Constant.MEMORY_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, index: int, base_op: TorchOpNode, comparison_op: TorchOpNode): + self._index = index + self._base_op = MemoryInfo(base_op) + self._comparison_op = MemoryInfo(comparison_op) + + @property + def row(self): + row = [self._index + 1, self._base_op.operator_name, self._base_op.input_shape, self._base_op.input_type, + self._base_op.memory_details, self._base_op.size, self._comparison_op.operator_name, + self._comparison_op.input_shape, self._comparison_op.input_type, self._comparison_op.memory_details, + self._comparison_op.size] + diff_fields = calculate_diff_ratio(self._base_op.size, self._comparison_op.size) + row.extend(diff_fields) + return row + + +class MemoryInfo: + def __init__(self, torch_op: TorchOpNode): + self.operator_name = None + self.input_shape = None + self.input_type = None + self.size = 0 + self.memory_details = "" + self._memory_list = [] + if torch_op: + self.operator_name = torch_op.name + self.input_shape = torch_op.input_shape + self.input_type = torch_op.input_type + self._memory_list = TreeBuilder.get_total_memory(torch_op) + self._update_memory_fields() + + def _update_memory_fields(self): + for memory in self._memory_list: + self.size += memory.size + self.memory_details += memory.memory_details diff --git a/profiler/compare_tools/compare_bean/memory_statistic_bean.py b/profiler/compare_tools/compare_bean/memory_statistic_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..827f095704e68ad7f7f58248f67fe512c2cb5a6c --- /dev/null +++ b/profiler/compare_tools/compare_bean/memory_statistic_bean.py @@ -0,0 +1,38 @@ +from utils.common_func import calculate_diff_ratio +from utils.constant import Constant +from utils.tree_builder import TreeBuilder +from utils.excel_config import ExcelConfig + + +class MemoryStatisticBean: + TABLE_NAME = Constant.MEMORY_TOP_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, name: str, base_data: list, comparison_data: list): + self._name = name + self._base_info = MemoryStatisticInfo(base_data) + self._comparison_info = MemoryStatisticInfo(comparison_data) + + @property + def row(self): + row = [None, self._name, self._base_info.duration_ms, self._base_info.size_mb, self._base_info.number, + self._comparison_info.duration_ms, self._comparison_info.size_mb, self._comparison_info.number] + diff_fields = calculate_diff_ratio(self._base_info.size_mb, self._comparison_info.size_mb) + row.extend(diff_fields) + return row + + +class MemoryStatisticInfo: + def __init__(self, data_list: list): + self._data_list = data_list + self.duration_ms = 0 + self.size_mb = 0 + self.number = len(data_list) + self._get_info() + + def _get_info(self): + for op_data in self._data_list: + memory_list = TreeBuilder.get_total_memory(op_data) + self.duration_ms += sum([memory.duration / Constant.US_TO_MS for memory in memory_list]) + self.size_mb += sum([memory.size / Constant.KB_TO_MB for memory in memory_list]) diff --git a/profiler/compare_tools/compare_bean/operator_compare_bean.py b/profiler/compare_tools/compare_bean/operator_compare_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..ee0c71c383203dfc71f429315c7d6565613edd64 --- /dev/null +++ b/profiler/compare_tools/compare_bean/operator_compare_bean.py @@ -0,0 +1,47 @@ +from utils.common_func import calculate_diff_ratio +from utils.constant import Constant +from utils.excel_config import ExcelConfig +from utils.torch_op_node import TorchOpNode +from utils.tree_builder import TreeBuilder + + +class OperatorCompareBean: + TABLE_NAME = Constant.OPERATOR_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, index: int, base_op: TorchOpNode, comparison_op: TorchOpNode): + self._index = index + self._base_op = OperatorInfo(base_op) + self._comparison_op = OperatorInfo(comparison_op) + + @property + def row(self): + row = [self._index + 1, self._base_op.operator_name, self._base_op.input_shape, self._base_op.input_type, + self._base_op.kernel_details, self._base_op.device_dur, self._comparison_op.operator_name, + self._comparison_op.input_shape, self._comparison_op.input_type, self._comparison_op.kernel_details, + self._comparison_op.device_dur] + diff_fields = calculate_diff_ratio(self._base_op.device_dur, self._comparison_op.device_dur) + row.extend(diff_fields) + return row + + +class OperatorInfo: + def __init__(self, torch_op: TorchOpNode): + self.operator_name = None + self.input_shape = None + self.input_type = None + self.device_dur = 0 + self.kernel_details = "" + self._kernel_list = [] + if torch_op: + self.operator_name = torch_op.name + self.input_shape = torch_op.input_shape + self.input_type = torch_op.input_type + self._kernel_list = TreeBuilder.get_total_kernels(torch_op) + self._update_kernel_fields() + + def _update_kernel_fields(self): + for kernel in self._kernel_list: + self.device_dur += kernel.device_dur + self.kernel_details += kernel.kernel_details diff --git a/profiler/compare_tools/compare_bean/operator_statistic_bean.py b/profiler/compare_tools/compare_bean/operator_statistic_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..6aab6ecfe96e11b996dd1dedd73cce5d73069320 --- /dev/null +++ b/profiler/compare_tools/compare_bean/operator_statistic_bean.py @@ -0,0 +1,36 @@ +from utils.common_func import calculate_diff_ratio +from utils.constant import Constant +from utils.excel_config import ExcelConfig +from utils.tree_builder import TreeBuilder + + +class OperatorStatisticBean: + TABLE_NAME = Constant.OPERATOR_TOP_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, name: str, base_data: list, comparison_data: list): + self._name = name + self._base_info = OperatorStatisticInfo(base_data) + self._comparison_info = OperatorStatisticInfo(comparison_data) + + @property + def row(self): + row = [None, self._name, self._base_info.device_dur_ms, self._base_info.number, + self._comparison_info.device_dur_ms, self._comparison_info.number] + diff_fields = calculate_diff_ratio(self._base_info.device_dur_ms, self._comparison_info.device_dur_ms) + row.extend(diff_fields) + return row + + +class OperatorStatisticInfo: + def __init__(self, data_list: list): + self._data_list = data_list + self.device_dur_ms = 0 + self.number = len(data_list) + self._get_info() + + def _get_info(self): + for op_data in self._data_list: + kernel_list = TreeBuilder.get_total_kernels(op_data) + self.device_dur_ms += sum([kernel.device_dur / Constant.US_TO_MS for kernel in kernel_list]) diff --git a/profiler/compare_tools/compare_bean/origin_data_bean/compare_event.py b/profiler/compare_tools/compare_bean/origin_data_bean/compare_event.py index 994f2235eebad7fc4a659a6f9e548159ee5473fe..99284916449268b0e969b27f96965b82876421c5 100644 --- a/profiler/compare_tools/compare_bean/origin_data_bean/compare_event.py +++ b/profiler/compare_tools/compare_bean/origin_data_bean/compare_event.py @@ -1,42 +1,45 @@ +from decimal import Decimal + +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean from utils.constant import Constant class KernelEvent: - def __init__(self, event: dict, device_type: int): + def __init__(self, event: TraceEventBean, device_type: str): self._event = event self._device_type = device_type - self._device_dur = self._event.get("dur", 0.0) @property def kernel_name(self) -> str: - return self._event.get("name", "") + return self._event.name @property def device_dur(self) -> float: - return self._device_dur + return self._event.dur @property def task_id(self) -> int: - return self._event.get("args", {}).get("Task Id") + return self._event.task_id @property def task_type(self) -> str: - return self._event.get("args", {}).get("Task Type") + return self._event.task_type @property def kernel_details(self): if self._device_type == Constant.GPU: - return f"{self.kernel_name} [duration: {self.device_dur}]" + return f"{self.kernel_name} [duration: {self.device_dur}]\n" return f"{self.kernel_name}, {self.task_id}, {self.task_type} [duration: {self.device_dur}]\n" class MemoryEvent: - def __init__(self, event: dict, name: str): + def __init__(self, event: dict): self._event = event - self._name = name + self._name = "" self._size = 0.0 - self._release_time = 0 - self._allocation_time = 0 + self._ts = Decimal(0) + self._release_time = Decimal(0) + self._allocation_time = Decimal(0) self._duration = 0.0 self.init() @@ -54,8 +57,20 @@ class MemoryEvent: return f"{name}, ({self._allocation_time}, {self._release_time}), " \ f"[duration: {self._duration}], [size: {self._size}]\n" + @property + def is_torch_op(self) -> bool: + return False + + @property + def start_time(self) -> Decimal: + return self._ts + + def set_name(self, name: str): + self._name = name + def init(self): self._size = self._event.get(Constant.SIZE, 0) + self._ts = self._event.get(Constant.TS, 0) self._release_time = self._event.get(Constant.RELEASE_TIME) self._allocation_time = self._event.get(Constant.ALLOCATION_TIME) if not self._release_time or not self._allocation_time: diff --git a/profiler/compare_tools/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_bean/origin_data_bean/kernel_details_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..fbe2664644f43bf520740381acfd01796a7f9cc7 --- /dev/null +++ b/profiler/compare_tools/compare_bean/origin_data_bean/kernel_details_bean.py @@ -0,0 +1,76 @@ +import math + +import pandas as pd + +from utils.common_func import convert_to_float + + +class KernelDetailsBean: + def __init__(self, data: dict): + self._data = data + self._op_type = "" + self._name = "" + self._aiv_vec_time = 0.0 + self._mac_time = 0.0 + self._duration = 0.0 + self.init() + + @property + def op_type(self) -> str: + return self._op_type + + @property + def name(self) -> str: + return self._name + + @property + def aiv_vec_time(self) -> float: + if self._aiv_vec_time == "" or self._aiv_vec_time == "N/A": + return float("nan") + return convert_to_float(self._aiv_vec_time) + + @property + def mac_time(self) -> float: + if self._mac_time == "" or self._mac_time == "N/A": + return float("nan") + return convert_to_float(self._mac_time) + + @property + def duration(self) -> float: + return convert_to_float(self._duration) + + def is_hide_op_pmu(self): + if "mac_time(us)" in self._data.keys() or "aiv_vec_time(us)" in self._data.keys(): + return False + return True + + def is_vector(self): + if not pd.isna(self.aiv_vec_time) and self.aiv_vec_time > 0: + return True + if not pd.isna(self.mac_time) and math.isclose(self.mac_time, 0.0): + return True + return False + + def is_invalid(self): + if pd.isna(self.aiv_vec_time) and pd.isna(self.mac_time): + return True + return False + + def is_fa_bwd(self): + return 'bwd' in self.op_type.lower() or 'grad' in self.op_type.lower() + + def is_sdma(self): + return self.name.lower().startswith("aclnninplacecopy") and "tensormove" in self.name.lower() + + def is_flash_attention(self): + return "flashattention" in self.op_type.lower() + + def is_cube(self): + return "matmul" in self.op_type.lower() + + def init(self): + self._op_type = self._data.get('Type', "") + self._name = self._data.get('Name', "") + self._aiv_vec_time = self._data.get('aiv_vec_time(us)', "") + self._mac_time = self._data.get('mac_time(us)', "") + self._duration = self._data.get('Duration(us)', 0) diff --git a/profiler/compare_tools/compare_bean/origin_data_bean/memory_record_bean.py b/profiler/compare_tools/compare_bean/origin_data_bean/memory_record_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..7903cb09e73db98099c24ba32fa334e49bbbfdad --- /dev/null +++ b/profiler/compare_tools/compare_bean/origin_data_bean/memory_record_bean.py @@ -0,0 +1,15 @@ +from utils.common_func import convert_to_float + + +class MemoryRecordBean: + def __init__(self, data: dict): + self._data = data + self._total_reserved_mb = 0.0 + self.init() + + @property + def total_reserved_mb(self) -> float: + return convert_to_float(self._total_reserved_mb) + + def init(self): + self._total_reserved_mb = self._data.get("Total Reserved(MB)", 0) diff --git a/profiler/compare_tools/compare_bean/origin_data_bean/operator_memory_bean.py b/profiler/compare_tools/compare_bean/origin_data_bean/operator_memory_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..b6ca88eaba122b2e017a91e1de0a6ec65070f88c --- /dev/null +++ b/profiler/compare_tools/compare_bean/origin_data_bean/operator_memory_bean.py @@ -0,0 +1,43 @@ +from decimal import Decimal + +from utils.common_func import convert_to_float, convert_to_decimal + + +class OperatorMemoryBean: + + def __init__(self, data: dict): + self._data = data + self._name = "" + self._size = 0.0 + self._allocation_time = Decimal(0) + self._release_time = Decimal(0) + self.init() + + @property + def name(self) -> str: + return self._name + + @property + def size(self) -> float: + return convert_to_float(self._size) + + @property + def allocation_time(self) -> Decimal: + if not self._allocation_time: + return Decimal(0) + return convert_to_decimal(self._allocation_time) + + @property + def release_time(self) -> Decimal: + if not self._release_time: + return Decimal(0) + return convert_to_decimal(self._release_time) + + def init(self): + self._name = self._data.get("Name", "") + self._size = self._data.get("Size(KB)", 0) + self._allocation_time = self._data.get("Allocation Time(us)", 0) + self._release_time = self._data.get("Release Time(us)", 0) + + def is_cann_op(self): + return "cann::" in self._name diff --git a/profiler/compare_tools/compare_bean/origin_data_bean/trace_event_bean.py b/profiler/compare_tools/compare_bean/origin_data_bean/trace_event_bean.py index 2d80d6e76de8a5551be1518d3cfb10e04ae2e3f2..fb7b479dbcb794776e0518f09928ac5231bf8ad1 100644 --- a/profiler/compare_tools/compare_bean/origin_data_bean/trace_event_bean.py +++ b/profiler/compare_tools/compare_bean/origin_data_bean/trace_event_bean.py @@ -1,6 +1,7 @@ from decimal import Decimal -from common_func.constant import Constant +from utils.common_func import convert_to_float, convert_to_decimal +from utils.constant import Constant class TraceEventBean: @@ -15,6 +16,7 @@ class TraceEventBean: self._cat = "" self._name = "" self._args = {} + self._is_torch_op = False self.init() @property @@ -27,15 +29,15 @@ class TraceEventBean: @property def dur(self) -> float: - return self._dur + return convert_to_float(self._dur) @property def start_time(self) -> Decimal: - return self._ts + return convert_to_decimal(self._ts) @property def end_time(self) -> Decimal: - return self._ts + Decimal(self._dur) + return self.start_time + convert_to_decimal(self._dur) @property def name(self) -> str: @@ -66,12 +68,19 @@ class TraceEventBean: return self._args.get("stream") @property - def task_type(self) -> int: + def task_type(self) -> str: return self._args.get('Task Type') + @property + def task_id(self) -> int: + return self._args.get('Task Id') + @property def device_id(self) -> int: - return self._args.get('Device Id', -1) + try: + return int(self._args.get('Device Id', Constant.INVALID_VALUE)) + except Exception: + return Constant.INVALID_VALUE @property def total_reserved(self): @@ -97,6 +106,14 @@ class TraceEventBean: def event(self) -> dict: return self._event + @property + def is_torch_op(self) -> bool: + return self._is_torch_op + + @is_torch_op.setter + def is_torch_op(self, value: bool): + self._is_torch_op = value + def is_m_mode(self) -> bool: return self._ph == "M" @@ -109,6 +126,12 @@ class TraceEventBean: def is_flow_end(self) -> bool: return self._ph == "f" + def is_enqueue(self) -> bool: + return self.lower_cat == "enqueue" + + def is_dequeue(self) -> bool: + return self.lower_cat == "dequeue" + def is_process_meta(self) -> bool: return self.is_m_mode() and self._name == "process_name" @@ -121,6 +144,9 @@ class TraceEventBean: def is_hccl_process_name(self) -> bool: return self.process_name == "HCCL" + def is_overlap_process_name(self) -> bool: + return self.process_name == "Overlap Analysis" + def is_npu_process_name(self) -> bool: return self.process_name == "Ascend Hardware" @@ -137,23 +163,37 @@ class TraceEventBean: return self.lower_cat == "kernel" def is_nccl_name(self): - return "nccl" in self.lower_name - - def is_nccl_kernel(self): - return self.is_kernel_cat() and self.is_nccl_name() + return self.lower_name.startswith("nccl") def is_kernel_except_nccl(self): - return self.is_kernel_cat() and not self.is_nccl_kernel() + return self.is_kernel_cat() and not self.is_nccl_name() def is_memory_event(self): return self.lower_name == '[memory]' and self.device_id >= 0 + def is_compute_event(self): + return self.task_type in ('AI_CORE', 'MIX_AIC', 'MIX_AIV', 'AI_CPU', 'AI_VECTOR_CORE', 'FFTS_PLUS') + + def is_sdma_event(self): + return self.task_type in ('SDMA_SQE', 'PCIE_DMA_SQE') + + def is_event_wait(self): + return self.task_type == 'EVENT_WAIT_SQE' + + def is_backward(self): + bwd_list = ["bwd", "backward"] + for bwd in bwd_list: + if bwd in self.lower_name: + return True + return False + def init(self): - self._pid = self._event.get("pid", 0) - self._tid = self._event.get("tid", 0) - self._ts = Decimal(str(self._event.get("ts", 0))) - self._dur = float(self._event.get("dur", 0)) - self._ph = self._event.get("ph", "") - self._cat = self._event.get("cat", "") - self._name = self._event.get("name", "") - self._args = self._event.get("args", {}) + if isinstance(self._event, dict): + self._pid = self._event.get("pid", 0) + self._tid = self._event.get("tid", 0) + self._ts = self._event.get("ts", 0) + self._dur = self._event.get("dur", 0) + self._ph = self._event.get("ph", "") + self._cat = self._event.get("cat", "") + self._name = self._event.get("name", "") + self._args = self._event.get("args", {}) diff --git a/profiler/compare_tools/compare_bean/profiling_info.py b/profiler/compare_tools/compare_bean/profiling_info.py index 9a4a794f1f678fa8bb126ea8202ca8fcbd7c2177..f7711261d12c86ab16ccad01cf5b7ed66f272227 100644 --- a/profiler/compare_tools/compare_bean/profiling_info.py +++ b/profiler/compare_tools/compare_bean/profiling_info.py @@ -22,8 +22,8 @@ class ProfilingInfo: self.e2e_time = 0.0 self.sdma_time = 0.0 self.scheduling_time = 0.0 - self.flash_attention_time_bwd = 0.0 - self.flash_attention_time_fwd = 0.0 + self.fa_time_bwd = 0.0 + self.fa_time_fwd = 0.0 self.minimal_profiling = False self.hide_op_details = False @@ -36,5 +36,56 @@ class ProfilingInfo: self.e2e_time = self.e2e_time / 10 ** 6 self.sdma_time = self.sdma_time / 10 ** 6 self.scheduling_time = self.scheduling_time / 10 ** 6 - self.flash_attention_time_bwd = self.flash_attention_time_bwd / 10 ** 6 - self.flash_attention_time_fwd = self.flash_attention_time_fwd / 10 ** 6 + self.fa_time_bwd = self.fa_time_bwd / 10 ** 6 + self.fa_time_fwd = self.fa_time_fwd / 10 ** 6 + + def calculate_other_time(self): + self.other_time = max( + [0, self.compute_time - self.cube_time - self.fa_time_fwd - self.fa_time_bwd - self.vec_time]) + + def calculate_vec_time(self): + self.vec_time = self.compute_time - self.cube_time - self.fa_time_fwd - self.fa_time_bwd + + def calculate_schedule_time(self): + self.scheduling_time = self.e2e_time - self.compute_time - self.communication_not_overlapped + + def update_fa_fwd_info(self, time: float): + self.fa_time_fwd += time + self.fa_num_fwd += 1 + + def update_fa_bwd_info(self, time: float): + self.fa_time_bwd += time + self.fa_num_bwd += 1 + + def update_sdma_info(self, time: float, num: int = 1): + self.sdma_time += time + self.sdma_num += num + + def update_cube_info(self, time: float): + self.cube_time += time + self.cube_num += 1 + + def update_vec_info(self, time: float): + self.vec_time += time + self.vec_num += 1 + + def set_compute_time(self, time: float): + self.compute_time = time + + def update_compute_time(self, time: float): + self.compute_time += time + + def set_e2e_time(self, time: float): + self.e2e_time = time + + def set_comm_not_overlap(self, time: float): + self.communication_not_overlapped = time + + def update_comm_not_overlap(self, time: float): + self.communication_not_overlapped += time + + def set_memory_used(self, memory: float): + self.memory_used = memory + + def is_not_minimal_profiling(self) -> bool: + return self.profiling_type == Constant.NPU and not self.minimal_profiling diff --git a/profiler/compare_tools/generation/base_generator.py b/profiler/compare_tools/generation/base_generator.py deleted file mode 100644 index e65bf337380518497d6a50d75bd32fe58acf5e07..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/base_generator.py +++ /dev/null @@ -1,11 +0,0 @@ -from abc import ABC, abstractmethod - - -class BaseGenerator(ABC): - def __init__(self, sheet_name: str, data: any): - self.sheet_name = sheet_name - self.data = data - - @abstractmethod - def generate_data(self): - raise NotImplementedError("Function generate_data need to be implemented.") diff --git a/profiler/compare_tools/generation/communication_compare_generator.py b/profiler/compare_tools/generation/communication_compare_generator.py deleted file mode 100644 index 243d6a51506f32cc627b4046c0976b7504c4dbf4..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/communication_compare_generator.py +++ /dev/null @@ -1,50 +0,0 @@ -import math - -import pandas as pd - -from generation.base_generator import BaseGenerator -from utils.args_manager import ArgsManager -from utils.common_func import calculate_diff_ratio -from utils.constant import Constant - - -class CommunicationCompareGenerator(BaseGenerator): - def __init__(self, data: list): - super().__init__(Constant.COMMUNICATION_SHEET, data) - self._base_task_data = ArgsManager().base_profiling.communication_task_data - self._comparison_task_data = ArgsManager().comparison_profiling.communication_task_data - - def generate_data(self): - result_data = [] - row_headers = ["base_op", "base_task", "base_calls", "base_total_dur", "base_avg_dur", "base_max_dur", - "base_min_dur", "com_op", "com_task", "com_calls", "com_total_dur", "com_avg_dur", "com_max_dur", - "com_min_dur"] - for row in self.data: - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - result_data.append(row + [None, None]) - else: - result_data.append(row + calculate_diff_ratio(row[row_headers.index("base_total_dur")], - row[row_headers.index("com_total_dur")])) - base_data = self._get_task_statistic(row[row_headers.index("base_op")], is_base=True) - comparison_data = self._get_task_statistic(row[row_headers.index("com_op")], is_base=False) - for index in range(max(len(base_data), len(comparison_data))): - if index >= len(base_data): - base_row = ["|"] + [None] * 6 - else: - base_row = ["|"] + base_data[index] - if index >= len(comparison_data): - comparison_row = ["|"] + [None] * 6 - else: - comparison_row = ["|"] + comparison_data[index] - result_data.append(base_row + comparison_row + [None, None]) - return result_data - - def _get_task_statistic(self, name: str, is_base: bool): - if not name: - return [] - task_list = self._base_task_data.get(name) if is_base else self._comparison_task_data.get(name) - if task_list: - data = [[data.get("name", ""), float(data.get("dur", 0))] for data in task_list] - df = pd.DataFrame(data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR]) - return df.groupby(Constant.OP_KEY).agg(["count", "sum", "mean", "max", "min"]).reset_index().values.tolist() - return [] diff --git a/profiler/compare_tools/generation/comparison_generator.py b/profiler/compare_tools/generation/comparison_generator.py deleted file mode 100644 index 44798cb95441ba27fe53e2a3bbb19d803162be83..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/comparison_generator.py +++ /dev/null @@ -1,33 +0,0 @@ -from comparator.index_comparator import IndexComparator -from comparator.op_comparator import OpComparator -from generation.communication_compare_generator import CommunicationCompareGenerator -from generation.memory_compare_generator import MemoryCompareGenerator -from generation.memory_statistic_generator import MemoryStatisticGenerator -from generation.operator_compare_generator import OperatorCompareGenerator -from generation.operator_statistic_generator import OperatorStatisticGenerator -from view.excel_view import ExcelViewer -from utils.constant import Constant -from utils.args_manager import ArgsManager -from utils.torch_op_node import TorchOpNode -from utils.tree_builder import TreeBuilder - - -class ComparisonGenerator: - def __init__(self, args: any): - self._args = args - self._args_manager = ArgsManager() - - def run(self, file_path: str): - data_dict = {} - if self._args.enable_operator_compare or self._args.enable_memory_compare: - op_compare_result = OpComparator(self._args).compare() - if self._args.enable_communication_compare: - index_compare_result = IndexComparator(self._args).compare() - data_dict[Constant.COMMUNICATION_SHEET] = CommunicationCompareGenerator(index_compare_result).generate_data() - if self._args.enable_operator_compare: - data_dict[Constant.OPERATOR_SHEET] = OperatorCompareGenerator(op_compare_result).generate_data() - data_dict[Constant.OPERATOR_TOP_SHEET] = OperatorStatisticGenerator(op_compare_result).generate_data() - if self._args.enable_memory_compare: - data_dict[Constant.MEMORY_SHEET] = MemoryCompareGenerator(op_compare_result).generate_data() - data_dict[Constant.MEMORY_TOP_SHEET] = MemoryStatisticGenerator(op_compare_result).generate_data() - ExcelViewer(data_dict, file_path).generate_view() diff --git a/profiler/compare_tools/generation/memory_compare_generator.py b/profiler/compare_tools/generation/memory_compare_generator.py deleted file mode 100644 index 2cf919d1f658a57f893231a8a873f113601197cb..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/memory_compare_generator.py +++ /dev/null @@ -1,37 +0,0 @@ -from generation.base_generator import BaseGenerator -from utils.args_manager import ArgsManager -from utils.common_func import calculate_diff_ratio -from utils.constant import Constant -from utils.torch_op_node import TorchOpNode -from utils.tree_builder import TreeBuilder - - -class MemoryCompareGenerator(BaseGenerator): - def __init__(self, data: list): - super().__init__(Constant.MEMORY_SHEET, data) - - def generate_data(self): - def get_row_info(torch_op_node: TorchOpNode): - if not torch_op_node: - return [None] * 4 + [0] - memory_list = TreeBuilder.get_total_memory(torch_op_node) - size = 0 - memory_details = "" - for memory in memory_list: - size += memory.size - memory_details += memory.memory_details - return [torch_op_node.name, torch_op_node.input_shape, torch_op_node.input_type, memory_details, size] - - if not self.data: - return [] - data = [None] * (len(self.data)) - for index, (base_op, comparison_op) in enumerate(self.data): - base_row = get_row_info(base_op) - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - comparison_row = [None] * 5 - diff_ratio = [None] * 2 - else: - comparison_row = get_row_info(comparison_op) - diff_ratio = calculate_diff_ratio(base_row[-1], comparison_row[-1]) - data[index] = base_row + comparison_row + diff_ratio - return data diff --git a/profiler/compare_tools/generation/memory_statistic_generator.py b/profiler/compare_tools/generation/memory_statistic_generator.py deleted file mode 100644 index 652e73a38d2e2c07fd401f2319565d46c56a1853..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/memory_statistic_generator.py +++ /dev/null @@ -1,53 +0,0 @@ -from generation.base_generator import BaseGenerator -from utils.args_manager import ArgsManager -from utils.common_func import calculate_diff_ratio -from utils.constant import Constant -from utils.tree_builder import TreeBuilder - - -class MemoryStatisticGenerator(BaseGenerator): - def __init__(self, data: list): - super().__init__(Constant.MEMORY_TOP_SHEET, data) - - def generate_data(self): - base_op_dict, comparison_op_dict = {}, {} - for base_op, comparison_op in self.data: - if base_op: - memory_list = TreeBuilder.get_total_memory(base_op) - size = sum([memory.size / Constant.KB_TO_MB for memory in memory_list]) - duration = sum([memory.duration / Constant.US_TO_MS for memory in memory_list]) - base_op_dict.setdefault(base_op.name, {}).setdefault("size", []).append(size) - base_op_dict.setdefault(base_op.name, {}).setdefault("duration", []).append(duration) - if comparison_op: - memory_list = TreeBuilder.get_total_memory(comparison_op) - size = sum([memory.size / Constant.KB_TO_MB for memory in memory_list]) - duration = sum([memory.duration / Constant.US_TO_MS for memory in memory_list]) - comparison_op_dict.setdefault(comparison_op.name, {}).setdefault("size", []).append(size) - comparison_op_dict.setdefault(comparison_op.name, {}).setdefault("duration", []).append(duration) - result_data = [] - for op_name, base_data in base_op_dict.items(): - base_dur = sum(base_data.get("duration", [])) - base_size = sum(base_data.get("size", [])) - base_num = len(base_data.get("size", [])) - comparison_data = comparison_op_dict.pop(op_name, None) - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - result_data.append([op_name, base_dur, base_size, base_num] + [None] * 5) - elif comparison_data: - comparison_dur = sum(comparison_data.get("duration", [])) - comparison_size = sum(comparison_data.get("size", [])) - comparison_num = len(comparison_data.get("size", [])) - result_data.append( - [op_name, base_dur, base_size, base_num, comparison_dur, comparison_size, - comparison_num] + calculate_diff_ratio(base_size, comparison_size)) - else: - result_data.append( - [op_name, base_dur, base_size, base_num, 0, 0, 0] + calculate_diff_ratio(base_size, 0)) - for op_name, comparison_data_dict in comparison_op_dict.items(): - comparison_dur = sum(comparison_data_dict.get("duration", [])) - comparison_size = sum(comparison_data_dict.get("size", [])) - comparison_num = len(comparison_data_dict.get("size", [])) - result_data.append([op_name, 0, 0, 0, comparison_dur, comparison_size, comparison_num] + - calculate_diff_ratio(0, comparison_size)) - if ArgsManager().base_profiling_path != ArgsManager().comparison_profiling_path: - result_data.sort(key=lambda x: x[-2], reverse=True) - return result_data diff --git a/profiler/compare_tools/generation/operator_compare_generator.py b/profiler/compare_tools/generation/operator_compare_generator.py deleted file mode 100644 index 0f876a3ed834a96e9fa581bda06004905f7c4e2d..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/operator_compare_generator.py +++ /dev/null @@ -1,39 +0,0 @@ -from generation.base_generator import BaseGenerator -from utils.args_manager import ArgsManager -from utils.common_func import calculate_diff_ratio -from utils.constant import Constant -from utils.torch_op_node import TorchOpNode -from utils.tree_builder import TreeBuilder - - -class OperatorCompareGenerator(BaseGenerator): - def __init__(self, data: list): - super().__init__(Constant.OPERATOR_SHEET, data) - - def generate_data(self): - def get_row_info(torch_op_node: TorchOpNode): - if not torch_op_node: - return [None] * 4 + [0] - kernel_list = TreeBuilder.get_total_kernels(torch_op_node) - duration = 0 - kernel_details = "" - for kernel in kernel_list: - duration += kernel.device_dur - kernel_details += kernel.kernel_details - return [torch_op_node.name, torch_op_node.input_shape, torch_op_node.input_type, kernel_details, duration] - - if not self.data: - return [] - data = [None] * (len(self.data)) - index = 0 - for base_op, comparison_op in self.data: - base_row = get_row_info(base_op) - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - comparison_row = [None] * 5 - diff_ratio = [None] * 2 - else: - comparison_row = get_row_info(comparison_op) - diff_ratio = calculate_diff_ratio(base_row[-1], comparison_row[-1]) - data[index] = base_row + comparison_row + diff_ratio - index += 1 - return data diff --git a/profiler/compare_tools/generation/operator_statistic_generator.py b/profiler/compare_tools/generation/operator_statistic_generator.py deleted file mode 100644 index ec685c42f41f7e73521ef82b711a3c88fb011801..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/operator_statistic_generator.py +++ /dev/null @@ -1,43 +0,0 @@ -from generation.base_generator import BaseGenerator -from utils.args_manager import ArgsManager -from utils.common_func import calculate_diff_ratio -from utils.constant import Constant -from utils.tree_builder import TreeBuilder - - -class OperatorStatisticGenerator(BaseGenerator): - def __init__(self, data: list): - super().__init__(Constant.OPERATOR_TOP_SHEET, data) - - def generate_data(self): - base_op_dict, comparison_op_dict = {}, {} - for base_op, comparison_op in self.data: - if base_op: - kernel_list = TreeBuilder.get_total_kernels(base_op) - duration = sum([kernel.device_dur / Constant.US_TO_MS for kernel in kernel_list]) - base_op_dict.setdefault(base_op.name, []).append(duration) - if comparison_op: - kernel_list = TreeBuilder.get_total_kernels(comparison_op) - duration = sum([kernel.device_dur / Constant.US_TO_MS for kernel in kernel_list]) - comparison_op_dict.setdefault(comparison_op.name, []).append(duration) - result_data = [] - for op_name, base_duration_list in base_op_dict.items(): - base_dur = sum(base_duration_list) - comparison_duration_list = comparison_op_dict.pop(op_name, None) - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - result_data.append([op_name, base_dur, len(base_duration_list)] + [None] * 4) - elif comparison_duration_list: - comparison_dur = sum(comparison_duration_list) - result_data.append( - [op_name, base_dur, len(base_duration_list), comparison_dur, - len(comparison_duration_list)] + calculate_diff_ratio(base_dur, comparison_dur)) - else: - result_data.append( - [op_name, base_dur, len(base_duration_list), 0, 0] + calculate_diff_ratio(base_dur, 0)) - for op_name, comparison_duration_list in comparison_op_dict.items(): - comparison_dur = sum(comparison_duration_list) - result_data.append([op_name, 0, 0, comparison_dur, len(comparison_duration_list)] + - calculate_diff_ratio(0, comparison_dur)) - if ArgsManager().base_profiling_path != ArgsManager().comparison_profiling_path: - result_data.sort(key=lambda x: x[-2], reverse=True) - return result_data diff --git a/profiler/compare_tools/generation/__init__.py b/profiler/compare_tools/generator/__init__.py similarity index 100% rename from profiler/compare_tools/generation/__init__.py rename to profiler/compare_tools/generator/__init__.py diff --git a/profiler/compare_tools/generator/base_generator.py b/profiler/compare_tools/generator/base_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..c472bc9922e6febf118f62a66424056243156c07 --- /dev/null +++ b/profiler/compare_tools/generator/base_generator.py @@ -0,0 +1,22 @@ +from abc import ABC, abstractmethod +from multiprocessing import Process + + +class BaseGenerator(Process, ABC): + def __init__(self, profiling_data_dict: dict, args: any): + super(BaseGenerator, self).__init__() + self._profiling_data_dict = profiling_data_dict + self._args = args + self._result_data = {} + + def run(self): + self.compare() + self.generate_view() + + @abstractmethod + def compare(self): + raise NotImplementedError("Function compare need to be implemented.") + + @abstractmethod + def generate_view(self): + raise NotImplementedError("Function generate_view need to be implemented.") diff --git a/profiler/compare_tools/generator/comparison_generator.py b/profiler/compare_tools/generator/comparison_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..a8f8835e20c81f87154968f666cfe50831f2122f --- /dev/null +++ b/profiler/compare_tools/generator/comparison_generator.py @@ -0,0 +1,36 @@ +from generator.detail_performance_generator import DetailPerformanceGenerator +from generator.overall_performance_generator import OverallPerformanceGenerator +from profiling_parser.gpu_profiling_parser import GPUProfilingParser +from profiling_parser.npu_profiling_parser import NPUProfilingParser +from utils.constant import Constant +from utils.args_manager import ArgsManager + + +class ComparisonGenerator: + PARSER_DICT = {Constant.NPU: NPUProfilingParser, Constant.GPU: GPUProfilingParser} + + def __init__(self): + self._args_manager = ArgsManager() + self._overall_data = None + self._details_data = None + + def run(self): + self.load_data() + self.generate_compare_result() + + def load_data(self): + base_data = self.PARSER_DICT.get(self._args_manager.base_profiling_type)( + self._args_manager.args, self._args_manager.base_path_dict).load_data() + comparison_data = self.PARSER_DICT.get(self._args_manager.comparison_profiling_type)( + self._args_manager.args, self._args_manager.comparison_path_dict).load_data() + self._overall_data = {Constant.BASE_DATA: base_data.overall_metrics, + Constant.COMPARISON_DATA: comparison_data.overall_metrics} + self._details_data = {Constant.BASE_DATA: base_data, Constant.COMPARISON_DATA: comparison_data} + + def generate_compare_result(self): + generator_list = [OverallPerformanceGenerator(self._overall_data, self._args_manager.args), + DetailPerformanceGenerator(self._details_data, self._args_manager.args)] + for generator in generator_list: + generator.start() + for generator in generator_list: + generator.join() diff --git a/profiler/compare_tools/comparator/op_comparator.py b/profiler/compare_tools/generator/detail_performance_generator.py similarity index 58% rename from profiler/compare_tools/comparator/op_comparator.py rename to profiler/compare_tools/generator/detail_performance_generator.py index 8ccd428ef82250266722bed0dfd59a97c6dc39c0..a061ee882a9449e7ad2e82065a4c8f5cc56dd352 100644 --- a/profiler/compare_tools/comparator/op_comparator.py +++ b/profiler/compare_tools/generator/detail_performance_generator.py @@ -1,26 +1,70 @@ +import os from collections import deque +from datetime import datetime import numpy as np -from utils.args_manager import ArgsManager +from comparator.communication_comparator import CommunicationComparator +from comparator.operator_comparator import OperatorComparator +from comparator.operator_statistic_comparator import OperatorStatisticComparator +from compare_bean.communication_bean import CommunicationBean +from compare_bean.memory_compare_bean import MemoryCompareBean +from compare_bean.memory_statistic_bean import MemoryStatisticBean +from compare_bean.operator_compare_bean import OperatorCompareBean +from compare_bean.operator_statistic_bean import OperatorStatisticBean +from generator.base_generator import BaseGenerator +from profiling_parser.base_profiling_parser import ProfilingResult +from utils.constant import Constant from utils.name_function import NameFunction from utils.torch_op_node import TorchOpNode from utils.tree_builder import TreeBuilder +from view.excel_view import ExcelView -class OpComparator: - def __init__(self, args: any): - self._args = args - self._args_manager = ArgsManager() - self._base_profiling = self._args_manager.base_profiling - self._comparison_profiling = self._args_manager.comparison_profiling +class DetailPerformanceGenerator(BaseGenerator): + def __init__(self, profiling_data_dict: dict, args: any): + super().__init__(profiling_data_dict, args) - def compare(self) -> list: - base_ops = self._get_top_layer_ops(self._base_profiling) - if self._args.base_profiling_path == self._args.comparison_profiling_path: - comparison_ops = [] - else: - comparison_ops = self._get_top_layer_ops(self._comparison_profiling) + def compare(self): + if self._args.enable_operator_compare or self._args.enable_memory_compare or \ + self._args.enable_communication_compare: + print("[INFO] Start to compare performance detail data, please wait.") + comparator_list = self._create_comparator() + for comparator in comparator_list: + self._result_data.update(comparator.generate_data()) + + def generate_view(self): + if not self._result_data: + return + dir_path = self._args.output_path if self._args.output_path else "./" + file_name = "performance_comparison_result_{}.xlsx".format(datetime.utcnow().strftime("%Y%m%d%H%M%S")) + result_file_path = os.path.realpath(os.path.join(dir_path, file_name)) + ExcelView(self._result_data, result_file_path, self._args).generate_view() + print(f"[INFO] The comparison result file has been generated: {result_file_path}") + + def _create_comparator(self): + comparator_list = [] + if self._args.enable_operator_compare or self._args.enable_memory_compare: + op_compare_result = self.match_torch_op() + + if self._args.enable_communication_compare: + communication_data = { + Constant.BASE_DATA: self._profiling_data_dict.get(Constant.BASE_DATA).communication_dict, + Constant.COMPARISON_DATA: self._profiling_data_dict.get(Constant.COMPARISON_DATA).communication_dict} + comparator_list.append(CommunicationComparator(communication_data, CommunicationBean)) + + if self._args.enable_operator_compare: + comparator_list.append(OperatorComparator(op_compare_result, OperatorCompareBean)) + comparator_list.append(OperatorStatisticComparator(op_compare_result, OperatorStatisticBean)) + + if self._args.enable_memory_compare: + comparator_list.append(OperatorComparator(op_compare_result, MemoryCompareBean)) + comparator_list.append(OperatorStatisticComparator(op_compare_result, MemoryStatisticBean)) + return comparator_list + + def match_torch_op(self) -> list: + base_ops = self._get_top_layer_ops(self._profiling_data_dict.get(Constant.BASE_DATA)) + comparison_ops = self._get_top_layer_ops(self._profiling_data_dict.get(Constant.COMPARISON_DATA)) if not base_ops and not comparison_ops: return [] name_func = NameFunction(self._args).get_name_func() @@ -29,7 +73,6 @@ class OpComparator: compare_result_data = self._drill_down(compare_result_data, name_func) return compare_result_data - @classmethod def _matching_op(cls, base_ops: list, comparison_ops: list, name_func: any) -> list: if not comparison_ops: @@ -83,23 +126,9 @@ class OpComparator: result_data.append([None, comparison_ops[comparison_index]]) return result_data - def _get_top_layer_ops(self, profiling_instance: any) -> any: - torch_op_data = profiling_instance.torch_op_data - if not torch_op_data: - print(f"[WARNING] Can't find any torch op in the file: {profiling_instance.json_path}") - root_node = TreeBuilder.build_tree(torch_op_data) - - kernel_dict, memory_list = {}, [] - if self._args.enable_operator_compare: - kernel_dict = profiling_instance.kernel_dict - if not kernel_dict: - print(f"[WARNING] Can't find any flow event in the file: {profiling_instance.json_path}") - if self._args.enable_memory_compare: - memory_list = profiling_instance.memory_list - if not memory_list: - print(f"[WARNING] Can't find any memory event in the file: {profiling_instance.file_path}") - - TreeBuilder.update_tree_node(root_node, kernel_dict, memory_list) + def _get_top_layer_ops(self, profiling_data: ProfilingResult) -> any: + root_node = TreeBuilder.build_tree(profiling_data.torch_op_data, profiling_data.kernel_dict, + profiling_data.memory_list) level1_child_nodes = root_node.child_nodes result_data = [] for level1_node in level1_child_nodes: diff --git a/profiler/compare_tools/generator/overall_performance_generator.py b/profiler/compare_tools/generator/overall_performance_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..d2aa181371b2b4c2bcd6667ebe394ab5096674c6 --- /dev/null +++ b/profiler/compare_tools/generator/overall_performance_generator.py @@ -0,0 +1,19 @@ +from comparator.overall_performance_comparator import OverallPerformanceComparator +from compare_bean.profiling_info import ProfilingInfo +from generator.base_generator import BaseGenerator +from view.screen_view import ScreenView + + +class OverallPerformanceGenerator(BaseGenerator): + def __init__(self, profiling_data_dict: dict, args: any): + super().__init__(profiling_data_dict, args) + + def compare(self): + if not self._args.enable_profiling_compare: + return + self._result_data = OverallPerformanceComparator(self._profiling_data_dict, ProfilingInfo).generate_data() + + def generate_view(self): + if not self._result_data: + return + ScreenView(self._result_data).generate_view() diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py index 4ab8bb898522d99cc78759496ad1d36ed6f421cd..6218c7e969850fb2a2926e9f38e0a45bd77c6770 100644 --- a/profiler/compare_tools/performance_compare.py +++ b/profiler/compare_tools/performance_compare.py @@ -3,21 +3,12 @@ import ast import datetime import os.path import sys -import time sys.path.append( os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse")) -from generation.comparison_generator import ComparisonGenerator +from generator.comparison_generator import ComparisonGenerator from utils.args_manager import ArgsManager -from profiling_analysis.profiling_parse import prof_main -from common_func.path_manager import PathManager - - -def performance_compare(args): - if not args.enable_profiling_compare: - return - prof_main() def main(): @@ -37,20 +28,7 @@ def main(): args = parser.parse_args() ArgsManager().init(args) - - try: - performance_compare(args) - except Exception: - print("[WARNING] Profiling failed to analyze.") - - if any([args.enable_operator_compare, args.enable_memory_compare, args.enable_communication_compare]): - print("[INFO] Start to compare performance data, please wait.") - dir_path = args.output_path if args.output_path else "./" - file_name = "performance_comparison_result_{}.xlsx".format( - time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))) - result_file_path = PathManager.get_realpath(os.path.join(dir_path, file_name)) - ComparisonGenerator(args).run(result_file_path) - print(f"[INFO] The comparison result file has been generated: {result_file_path}") + ComparisonGenerator().run() if __name__ == "__main__": diff --git a/profiler/compare_tools/profiling_analysis/__init__.py b/profiler/compare_tools/profiling_analysis/__init__.py deleted file mode 100644 index 8400fd5ecd1246eaee795cebfccfacc80a94f08c..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/profiling_analysis/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/compare_tools/profiling_analysis/gpu_parser.py b/profiler/compare_tools/profiling_analysis/gpu_parser.py deleted file mode 100644 index 8f1b6d9c033683621c77d7eaf7a0cf54ab31813f..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/profiling_analysis/gpu_parser.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from collections import Counter, defaultdict -import pandas as pd - -import profiling_analysis.parser_helper as parser_helper -from utils.file_reader import FileReader -from utils.constant import Constant - - -class OpTimeWarper: - def __init__( - self, - cube_time: float = 0.0, - sdma_time: float = 0.0, - vec_time: float = 0.0, - fa_time_fwd: float = 0.0, - fa_time_bwd: float = 0.0, - all_op_time: float = 0.0, - compute_stream_dur: float = 0.0, - cube_num: int = 0, - vec_num: int = 0, - sdma_num: int = 0, - fa_num_bwd: int = 0, - fa_num_fwd: int = 0 - ): - self.cube_time = cube_time - self.sdma_time = sdma_time - self.vec_time = vec_time - self.fa_time_fwd = fa_time_fwd - self.fa_time_bwd = fa_time_bwd - self.all_op_time = all_op_time - self.compute_stream_dur = compute_stream_dur - self.cube_num = cube_num - self.vec_num = vec_num - self.sdma_num = sdma_num - self.fa_num_bwd = fa_num_bwd - self.fa_num_fwd = fa_num_fwd - - -class GpuProfilingParser: - NCCL_MARK = 'nccl' - CUBE_MARK = 'gemm' - FA_MARK_LIST = [['fmha', 'kernel'], ['flash', 'kernel']] - SDMA_MARK_LIST = ['htod', 'dtod', 'dtoh', 'memset (device)'] - - def __init__(self, gpu_path): - self.trace_events = FileReader.read_trace_file(gpu_path).get('traceEvents') - self.compute_stream_id = self.infer_compute_stream_id() - self.one_step_time = 0 - self.profiling_info = parser_helper.ProfilingInfo('GPU') - - def is_flash_attention(self, name: str): - for fa_mark in self.FA_MARK_LIST: - if not len([1 for mark in fa_mark if mark not in name.lower()]): - return True - return False - - def is_sdma_time(self, name: str): - for mark in self.SDMA_MARK_LIST: - if mark in name.lower(): - return True - return False - - def update_op_list(self, op_list, marks): - cube_time = 0.0 - all_op_time = 0.0 - fa_time_bwd = 0.0 - fa_time_fwd = 0.0 - sdma_time = 0.0 - vec_time = 0.0 - cube_num = 0 - vec_num = 0 - sdma_num = 0 - fa_num_bwd = 0 - fa_num_fwd = 0 - compute_stream_dur = 0.0 - for event in self.trace_events: - if not isinstance(event, dict): - continue - if event.get('args') and event.get('args').get('stream') == self.compute_stream_id: - compute_stream_dur += float(event.get('dur')) - if not {'name', 'cat', 'dur', 'ts'} < event.keys(): - continue - name = event.get('name') - dur = event.get('dur') - ts = event.get('ts') - cat = event.get('cat', '') - if event.get('args') and event.get('args').get('stream') == self.compute_stream_id: - if self.is_sdma_time(name): - sdma_time += float(dur) - sdma_num += 1 - continue - if cat.lower() != 'kernel': - continue - if self.NCCL_MARK in name.lower(): - for timestep in range(ts + 1, ts + dur + 1): - marks[str(timestep)] += 1 # mark this timestep in communication stream - continue - else: - for timestep in range(ts + 1, ts + dur + 1): - marks[str(timestep)] += -100 # mark this timestep in compute stream - if self.is_flash_attention(name): - if 'bwd' in name.lower(): - fa_time_bwd += float(dur) - fa_num_bwd += 1 - else: - fa_time_fwd += float(dur) - fa_num_fwd += 1 - elif self.CUBE_MARK in name.lower(): - cube_num += 1 - cube_time += float(dur) - else: - vec_num += 1 - vec_time += float(dur) - all_op_time += float(dur) - op_list.append([ts, name, cat, dur]) - time_wrapper = OpTimeWarper( - cube_time=cube_time, - sdma_time=sdma_time, - vec_time=vec_time, - fa_time_fwd=fa_time_fwd, - fa_time_bwd=fa_time_bwd, - all_op_time=all_op_time, - compute_stream_dur=compute_stream_dur, - cube_num=cube_num, - vec_num=vec_num, - sdma_num=sdma_num, - fa_num_bwd=fa_num_bwd, - fa_num_fwd=fa_num_fwd - ) - return time_wrapper - - def parse_events(self): - op_list = [] - marks = defaultdict(int) # mark for compute communication_not_overlapped time - - time_wrapper = self.update_op_list(op_list, marks) - cube_time = time_wrapper.cube_time - fa_time_fwd = time_wrapper.fa_time_fwd - fa_time_bwd = time_wrapper.fa_time_bwd - all_op_time = time_wrapper.all_op_time - compute_stream_dur = time_wrapper.compute_stream_dur - cube_num = time_wrapper.cube_num - vec_num = time_wrapper.vec_num - sdma_num = time_wrapper.sdma_num - sdma_time = time_wrapper.sdma_time - vec_time = time_wrapper.vec_time - - self.profiling_info.compute_time = len([_ for _, value in marks.items() if value < 0]) / 10 ** 6 - self.profiling_info.communication_not_overlapped = len([_ for _, value in marks.items() if value > 0]) / 10 ** 6 - self.profiling_info.flash_attention_time_bwd = fa_time_bwd / 10 ** 6 - self.profiling_info.flash_attention_time_fwd = fa_time_fwd / 10 ** 6 - self.profiling_info.cube_time = cube_time / 10 ** 6 - self.profiling_info.vec_time = self.profiling_info.compute_time - (cube_time + fa_time_fwd + fa_time_bwd) / 10 ** 6 - self.profiling_info.cube_num = cube_num - self.profiling_info.vec_num = vec_num - self.profiling_info.sdma_num = sdma_num - self.profiling_info.fa_num_bwd = time_wrapper.fa_num_bwd - self.profiling_info.fa_num_fwd = time_wrapper.fa_num_fwd - self.profiling_info.sdma_time = sdma_time / 10 ** 6 - self.parse_e2e_time() - - self.profiling_info.scheduling_time = self.profiling_info.e2e_time - self.profiling_info.compute_time - \ - self.profiling_info.communication_not_overlapped - if self.profiling_info.e2e_time < Constant.EPS: - self.profiling_info.scheduling_ratio = 0.0 - else: - self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time - self.parse_memory_reserved() - - def parse_e2e_time(self): - compute_events_timeline = [event for event in self.trace_events if - event.get('args') and event.get('args').get('stream')] - compute_events_timeline = sorted(compute_events_timeline, key=lambda event: event.get('ts')) - self.profiling_info.e2e_time = (compute_events_timeline[-1].get('ts') + compute_events_timeline[-1].get('dur') - - compute_events_timeline[0].get('ts')) / 10 ** 6 - - def parse_memory_reserved(self): - memories = [ - event.get('args').get('Total Reserved') for event in self.trace_events - if event.get('name', '').lower() == '[memory]' and event.get('args').get('Device Id') >= 0 - ] - if not memories: - print("[INFO] Gpu profiling data doesn't contain memory info") - return - self.profiling_info.memory_used = max(memories) / 1024 ** 3 - - def infer_compute_stream_id(self): - kernel_stream_ids = [] - for event in self.trace_events: - is_kernel_exec_event = event.get('cat', '').lower() == 'kernel' and self.NCCL_MARK not in event.get('name', '').lower() - has_stream_id_event = event.get('args') and event.get('args').get('stream') - if is_kernel_exec_event and has_stream_id_event: - kernel_stream_ids.append(event.get('args').get('stream')) - if not kernel_stream_ids: - raise RuntimeError('[ERROR] The profiling data does not contain kernel running data.') - counter = Counter(kernel_stream_ids) - return counter.most_common(1)[0][0] diff --git a/profiler/compare_tools/profiling_analysis/npu_parser.py b/profiler/compare_tools/profiling_analysis/npu_parser.py deleted file mode 100644 index 25b140106631e6379c7c8d1d32631cf6d23e23b9..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/profiling_analysis/npu_parser.py +++ /dev/null @@ -1,297 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -from collections import defaultdict -import pandas as pd -import profiling_analysis.parser_helper as parser_helper -from utils.file_reader import FileReader -from common_func.path_manager import PathManager -from common_func.file_manager import FileManager - - -class NpuInfoWrapper: - def __init__( - self, - compute_time: int, - communication_time: int, - sdma_time: int, - sdma_num: int, - is_cluster: bool, - event_wait_sqe: dict, - ai_core_dict: dict, - event_wait_sqe_res: dict, - ai_core_res: dict, - ): - self.compute_time = compute_time - self.communication_time = communication_time - self.sdma_time = sdma_time - self.sdma_num = sdma_num - self.is_cluster = is_cluster - self.event_wait_sqe = event_wait_sqe - self.ai_core_dict = ai_core_dict - self.event_wait_sqe_res = event_wait_sqe_res - self.ai_core_res = ai_core_res - - -class NpuProfilingParser: - FLASH_ATTENTION = "flashattention" - ACLNNINPLACE_COPY = "aclnninplacecopy" - TENSORMOVE = "tensormove" - MATMUL = "matmul" - - def __init__(self, npu_step_time, npu_file_path): - self.npu_json_file = npu_file_path.get('trace_view') - self.npu_summary_file = npu_file_path.get('kernel_details') - self.npu_mem_file = npu_file_path.get('memory_record') - self.info_json = npu_file_path.get('info') - self.profiling_info = parser_helper.ProfilingInfo('NPU') - self.npu_step_time = npu_step_time - self.parallel_time = 0 - self.aicore_time = 0 - self.min_stream_ts = sys.float_info.max - self.max_stream_ts = sys.float_info.min - self.sdma_sqe = defaultdict(float) - self.sdma_num_cnt = defaultdict(int) - - def get_sdma_para(self, sdma_sqe, sdma_num_cnt, ai_core_dict, event_wait_sqe) -> (float, int): - compute_stream = [] - parallel_stream = [] - sdma_time = 0.0 - sdma_parallel_time = 0.0 - sdma_num = 0 - sdma_parallel_num = 0 - if len(ai_core_dict) == 1: - compute_stream.append(min(ai_core_dict.keys())) - elif len(ai_core_dict) == 2: # 2个ai_core,存在并行流(当前最多2条算子计算流) - compute_stream = list(event_wait_sqe.keys() & ai_core_dict.keys()) - parallel_stream = list(ai_core_dict.keys() - set(compute_stream)) - else: - print('[WARNING] Npu Compute Stream Num Error.') - if parallel_stream: - sdma_parallel_time = sdma_sqe[parallel_stream[0]] - sdma_parallel_num = sdma_num_cnt[parallel_stream[0]] - if compute_stream: - sdma_time = sdma_sqe[compute_stream[0]] + sdma_parallel_time - sdma_num = sdma_num_cnt[compute_stream[0]] + sdma_parallel_num - return sdma_time, sdma_num - - def parse_npu_json_events(self): - if not self.npu_json_file: - print('[WARNING] Npu trace json file is not available.') - return - compute_time = 0 - communication_time = 0 - min_ts = sys.float_info.max - max_ts = sys.float_info.min - is_cluster = False # 表明没有获取到compute time的耗时 - data = FileReader.read_trace_file(self.npu_json_file) - event_wait_sqe = defaultdict(list) - ai_core_dict = defaultdict(list) - event_wait_sqe_res = defaultdict(float) - ai_core_res = defaultdict(float) - for dic in data: - self.get_ts_by_task_type(dic, event_wait_sqe, ai_core_dict, event_wait_sqe_res, ai_core_res) - if ('name' in dic) and (dic.get('name', '') == 'Computing'): - is_cluster = True - ts = float(dic.get('ts', 0)) - dur = dic.get('dur') - compute_time += dur - min_ts = ts if ts < min_ts else min_ts - max_ts = (ts + dur) if (ts + dur) > max_ts else max_ts - if ('name' in dic) and (dic.get('name', '') == 'Communication(Not Overlapped)'): - is_cluster = True - ts = float(dic.get('ts')) - dur = dic.get('dur') - communication_time += dur - min_ts = ts if ts < min_ts else min_ts - max_ts = (ts + dur) if (ts + dur) > max_ts else max_ts - sdma_time, sdma_num = self.get_sdma_para(self.sdma_sqe, self.sdma_num_cnt, ai_core_dict, event_wait_sqe) - npu_info_wrapper = NpuInfoWrapper( - compute_time, communication_time, sdma_time, sdma_num, is_cluster, - event_wait_sqe, ai_core_dict, event_wait_sqe_res, ai_core_res) - self.update_npu_info(max_ts - min_ts, npu_info_wrapper) - - def update_npu_info(self, ts_dur, npu_info_wrapper): - compute_time = npu_info_wrapper.compute_time - communication_time = npu_info_wrapper.communication_time - is_cluster = npu_info_wrapper.is_cluster - event_wait_sqe = npu_info_wrapper.event_wait_sqe - ai_core_dict = npu_info_wrapper.ai_core_dict - event_wait_sqe_res = npu_info_wrapper.event_wait_sqe_res - ai_core_res = npu_info_wrapper.ai_core_res - sdma_time = npu_info_wrapper.sdma_time - sdma_num = npu_info_wrapper.sdma_num - # AI_CORE和EVENT_WAIT_SQE共存为计算流 - compute_stream = [] - parallel_stream = [] - if not is_cluster: - #单机单卡没有overlap analysis - if len(ai_core_dict) == 1: - compute_stream.append(min(ai_core_dict.keys())) - elif len(ai_core_dict) == 2: # 2个ai_core,存在并行流(当前最多2条算子计算流) - compute_stream = list(event_wait_sqe.keys() & ai_core_dict.keys()) - parallel_stream = list(ai_core_dict.keys() - set(compute_stream)) - else: - print('[WARNING] Npu trace json file lack of Stream info') - return - cs_event_wait_sqe_list = event_wait_sqe[compute_stream[0]] - if parallel_stream: - cs_ai_core_list = ai_core_dict[parallel_stream[0]] - sorted(cs_event_wait_sqe_list, key=lambda x: (x[0])) - sorted(cs_ai_core_list, key=lambda x: (x[0])) - self.parallel_time = self.interval_intersection(cs_event_wait_sqe_list, cs_ai_core_list) - self.profiling_info.compute_time = compute_time / 10 ** 6 if is_cluster else \ - ai_core_res[compute_stream[0]] / 10 ** 6 - self.profiling_info.other_time = max(0, self.profiling_info.compute_time - self.profiling_info.cube_time - \ - self.profiling_info.flash_attention_time_fwd - self.profiling_info.flash_attention_time_bwd - \ - self.profiling_info.vec_time) - self.profiling_info.e2e_time = ts_dur / 10 ** 6 if is_cluster else \ - (self.max_stream_ts - self.min_stream_ts) / 10 ** 6 - self.profiling_info.communication_not_overlapped = communication_time / 10 ** 6 \ - if is_cluster else (event_wait_sqe_res[compute_stream[0]] - self.parallel_time) / 10 ** 6 - time_required = self.profiling_info.compute_time + self.profiling_info.communication_not_overlapped - self.profiling_info.sdma_time += sdma_time / 10 ** 6 - self.profiling_info.sdma_num += sdma_num - if self.npu_step_time: - self.profiling_info.scheduling_time = self.npu_step_time - time_required - else: - self.profiling_info.scheduling_time = self.profiling_info.e2e_time - time_required - self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time \ - if self.profiling_info.e2e_time != 0 else 0 - - def parse_info_json(self): - if not self.info_json: - return - json_data = FileReader.read_trace_file(self.info_json) - if not json_data: - return - if "ProfilerActivity.CPU" in json_data.get('config', {}).get('common_config', {}).get('activities', []): - return - if 'Level0' != json_data.get('config', {}).get('experimental_config', {}).get('_profiler_level', ''): - return - self.profiling_info.minimal_profiling = True - - def parse_npu_csv_events(self): - self.parse_mem_csv() - if not self.npu_summary_file: - print('[WARNING] Npu kernel details csv file is not available.') - return - PathManager.check_path_readable(self.npu_summary_file) - FileManager.check_file_size(self.npu_summary_file) - info = pd.read_csv(self.npu_summary_file, index_col=None) - cube_time = 0.0 - vec_time = 0.0 - sdma_time = 0.0 - fa_time_fwd = 0.0 - fa_time_bwd = 0.0 - cube_num = 0 - vec_num = 0 - fa_num_bwd = 0 - fa_num_fwd = 0 - sdma_num = 0 - if info.get('mac_time(us)') is None and info.get('aiv_vec_time(us)') is None: - self.profiling_info.hide_op_details = True - return - for i in range(len(info['Model ID'])): - op_type = info.loc[i, 'Type'] - name = info.loc[i, 'Name'] - aiv_vec_time = info.loc[i, 'aiv_vec_time(us)'] if info.get('aiv_vec_time(us)') is not None else None - mac_time = info.loc[i, 'mac_time(us)'] if info.get('mac_time(us)') is not None else None - if pd.isna(aiv_vec_time) and pd.isna(mac_time): - continue - task_durations = info.loc[i, 'Duration(us)'] - if self.FLASH_ATTENTION in op_type.lower(): - if 'bwd' in op_type.lower() or 'grad' in op_type.lower(): - fa_time_bwd += task_durations - fa_num_bwd += 1 - else: - fa_time_fwd += task_durations - fa_num_fwd += 1 - elif self.MATMUL in op_type.lower(): - cube_time += task_durations - cube_num += 1 - elif name.lower().startswith(self.ACLNNINPLACE_COPY) and self.TENSORMOVE in name.lower(): - sdma_time += task_durations - sdma_num += 1 - else: - is_vec = (aiv_vec_time and aiv_vec_time > 0) or (mac_time is not None and mac_time == 0) - if is_vec: - vec_time += task_durations - vec_num += 1 - else: - cube_time += task_durations - cube_num += 1 - - self.profiling_info.cube_time = cube_time / 10 ** 6 - self.profiling_info.vec_time = vec_time / 10 ** 6 - self.profiling_info.flash_attention_time_bwd = fa_time_bwd / 10 ** 6 - self.profiling_info.flash_attention_time_fwd = fa_time_fwd / 10 ** 6 - self.profiling_info.cube_num = cube_num - self.profiling_info.vec_num = vec_num - self.profiling_info.fa_num_bwd = fa_num_bwd - self.profiling_info.fa_num_fwd = fa_num_fwd - self.profiling_info.sdma_time = sdma_time / 10 ** 6 - self.profiling_info.sdma_num = sdma_num - - - def parse_mem_csv(self): - if not self.npu_mem_file: - print('[INFO] Npu op memory csv file is not available.') - return - try: - PathManager.check_path_readable(self.npu_mem_file) - FileManager.check_file_size(self.npu_mem_file) - info = pd.read_csv(self.npu_mem_file, usecols=['Total Reserved(MB)'], index_col=None) - except ValueError: - print('[ERROR] Load memory info failed.') - else: - self.profiling_info.memory_used = max(info.get('Total Reserved(MB)')) / 1024 - - @staticmethod - def interval_intersection(cs_event_wait_sqe_list, cs_ai_core_list): - ans = 0 - i = 0 - j = 0 - while i < len(cs_event_wait_sqe_list) and j < len(cs_ai_core_list): - lo = max(cs_event_wait_sqe_list[i][0], cs_ai_core_list[j][0]) - hi = min(cs_event_wait_sqe_list[i][1], cs_ai_core_list[j][1]) - if lo <= hi: - ans += (hi - lo) - if cs_event_wait_sqe_list[i][1] < cs_ai_core_list[j][1]: - i += 1 - else: - j += 1 - return ans - - def get_ts_by_task_type(self, dic, event_wait_sqe, ai_core_dict, enent_wait_res, ai_core_res): - if not dic.get('args'): - return - args = dic.get('args') - if args.get('Stream Id'): - stream_id = args.get('Stream Id') - ts = float(dic.get('ts')) - dur = dic.get('dur') - if args.get('Task Type') == 'EVENT_WAIT_SQE': - enent_wait_res[stream_id] += dur - event_wait_sqe[stream_id].append([ts, ts + dur]) - elif args.get('Task Type') in ('SDMA_SQE', 'PCIE_DMA_SQE'): - self.sdma_sqe[stream_id] += dur - self.sdma_num_cnt[stream_id] += 1 - elif args.get('Task Type') in ('AI_CORE', 'MIX_AIC', 'MIX_AIV', 'AI_CPU', 'AI_VECTOR_CORE', 'FFTS_PLUS'): - ai_core_res[stream_id] += dur - ai_core_dict[stream_id].append([ts, ts + dur]) - self.min_stream_ts = ts if ts < self.min_stream_ts else self.min_stream_ts - self.max_stream_ts = (ts + dur) if (ts + dur) > self.max_stream_ts else self.max_stream_ts diff --git a/profiler/compare_tools/profiling_analysis/parser_helper.py b/profiler/compare_tools/profiling_analysis/parser_helper.py deleted file mode 100644 index caf09056ee4ee5884067abf9e5283fd1c9113c12..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/profiling_analysis/parser_helper.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import os - - -class ProfilingInfo: - def __init__(self, profiling_type: str): - self.profiling_type = profiling_type - self.cube_time = 0.0 - self.other_time = 0.0 - self.vec_time = 0.0 - self.cube_num = 0 - self.vec_num = 0 - self.sdma_num = 0 - self.fa_num_fwd = 0 - self.fa_num_bwd = 0 - self.compute_time = 0.0 - self.communication_not_overlapped = 0.0 - self.scheduling_ratio = 0.0 - self.memory_used = 0.0 - self.e2e_time = 0.0 - self.sdma_time = 0.0 - self.scheduling_time = 0.0 - self.flash_attention_time_bwd = 0.0 - self.flash_attention_time_fwd = 0.0 - self.minimal_profiling = False - self.hide_op_details = False diff --git a/profiler/compare_tools/profiling_analysis/profiling_parse.py b/profiler/compare_tools/profiling_analysis/profiling_parse.py deleted file mode 100644 index adf182900f8d0e76e5904dfe0838aa31496c74ed..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/profiling_analysis/profiling_parse.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os - -from prettytable import PrettyTable - -from profiling_analysis.gpu_parser import GpuProfilingParser -from profiling_analysis.npu_parser import NpuProfilingParser -from profiling_analysis.parser_helper import ProfilingInfo -from utils.args_manager import ArgsManager -from utils.constant import Constant - - -def generate_table_info(base_profiling_info, comp_profiling_info, table): - headers = [''] - base_col = [f'{base_profiling_info.profiling_type}'] - comp_col = [f'{comp_profiling_info.profiling_type}'] - if not base_profiling_info.hide_op_details and not comp_profiling_info.hide_op_details: - headers.extend(['Cube Time(Num)', 'Vector Time(Num)']) - base_col.extend([f'{base_profiling_info.cube_time:.3f}s({base_profiling_info.cube_num})', - f'{base_profiling_info.vec_time:.3f}s({base_profiling_info.vec_num})']) - comp_col.extend([f'{comp_profiling_info.cube_time:.3f}s({comp_profiling_info.cube_num})', - f'{comp_profiling_info.vec_time:.3f}s({comp_profiling_info.vec_num})']) - if base_profiling_info.other_time or comp_profiling_info.other_time: - headers.append('Other Time') - base_col.append(f'{base_profiling_info.other_time:.3f}s') - comp_col.append(f'{comp_profiling_info.other_time:.3f}s') - if base_profiling_info.flash_attention_time_fwd or comp_profiling_info.flash_attention_time_fwd: - headers.append('Flash Attention Time(Forward)(Num)') - base_col.append(f'{base_profiling_info.flash_attention_time_fwd:.3f}s({base_profiling_info.fa_num_fwd})') - comp_col.append(f'{comp_profiling_info.flash_attention_time_fwd:.3f}s({comp_profiling_info.fa_num_fwd})') - if base_profiling_info.flash_attention_time_bwd or comp_profiling_info.flash_attention_time_bwd: - headers.append('Flash Attention Time(Backward)(Num)') - base_col.append(f'{base_profiling_info.flash_attention_time_bwd:.3f}s({base_profiling_info.fa_num_bwd})') - comp_col.append(f'{comp_profiling_info.flash_attention_time_bwd:.3f}s({comp_profiling_info.fa_num_bwd})') - headers.extend(['Computing Time']) - base_col.extend([f'{base_profiling_info.compute_time:.3f}s']) - comp_col.extend([f'{comp_profiling_info.compute_time:.3f}s']) - if base_profiling_info.memory_used or comp_profiling_info.memory_used: - headers.append('Mem Usage') - base_col.append(f'{base_profiling_info.memory_used:.2f}G') - comp_col.append(f'{comp_profiling_info.memory_used:.2f}G') - headers.extend(['Uncovered Communication Time']) - base_col.extend( - [f'{base_profiling_info.communication_not_overlapped: .3f}s']) - comp_col.extend( - [f'{comp_profiling_info.communication_not_overlapped: .3f}s']) - if base_profiling_info.sdma_time or comp_profiling_info.sdma_time: - headers.append('SDMA Time(Num)') - base_col.append(f'{base_profiling_info.sdma_time:.3f}s({base_profiling_info.sdma_num})') - comp_col.append(f'{comp_profiling_info.sdma_time:.3f}s({comp_profiling_info.sdma_num})') - cue = '' - if ((base_profiling_info.profiling_type == "NPU" and not base_profiling_info.minimal_profiling) or - (comp_profiling_info.profiling_type == "NPU" and not comp_profiling_info.minimal_profiling)): - - cue = '(Not minimal profiling)' - - headers.extend(['Free Time', 'E2E Time' + cue]) - base_col.extend( - [f'{base_profiling_info.scheduling_time:.3f}s', f'{base_profiling_info.e2e_time:.3f}s']) - comp_col.extend( - [f'{comp_profiling_info.scheduling_time:.3f}s', f'{comp_profiling_info.e2e_time:.3f}s']) - table.field_names = headers - table.add_row(base_col) - table.add_row(comp_col) - - -def show_table(base_profiling_info, comp_profiling_info): - table = PrettyTable() - table.title = 'Model Profiling Time Distribution' - generate_table_info(base_profiling_info, comp_profiling_info, table) - print(table) - - -def parse_gpu(gpu_path): - gpu_parser = GpuProfilingParser(gpu_path) - gpu_parser.parse_events() - return gpu_parser.profiling_info - - -def parse_npu(npu_path): - npu_dir = {'trace_view': None, 'memory_record': None, 'kernel_details': None} - for root, _, files in os.walk(npu_path): - for file in files: - if file == 'trace_view.json': - npu_dir['trace_view'] = os.path.join(root, file) - if file == 'memory_record.csv': - npu_dir['memory_record'] = os.path.join(root, file) - if 'kernel_details' in file: - npu_dir['kernel_details'] = os.path.join(root, file) - if 'profiler_info' in file: - npu_dir['info'] = os.path.join(root, file) - - npu_parser = NpuProfilingParser(0, npu_dir) - npu_parser.parse_npu_csv_events() - npu_parser.parse_info_json() - npu_parser.parse_npu_json_events() - return npu_parser.profiling_info - - -def prof_main(): - base_info = ProfilingInfo('None') - comp_info = ProfilingInfo('None') - if ArgsManager().base_profiling_type == Constant.NPU: - base_info = parse_npu(ArgsManager().base_profiling.file_path) - elif ArgsManager().base_profiling_type == Constant.GPU: - base_info = parse_gpu(ArgsManager().base_profiling.file_path) - if ArgsManager().comparison_profiling_type == Constant.NPU: - comp_info = parse_npu(ArgsManager().comparison_profiling.file_path) - elif ArgsManager().comparison_profiling_type == Constant.GPU: - comp_info = parse_gpu(ArgsManager().comparison_profiling.file_path) - - show_table(base_info, comp_info) - - -if __name__ == '__main__': - prof_main() diff --git a/profiler/compare_tools/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/profiling_parser/base_profiling_parser.py index 9f11680257bb519e2aec4c8264bda6336be91108..34db040a92a6100be710d1ef863cb5c9da281249 100644 --- a/profiler/compare_tools/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/profiling_parser/base_profiling_parser.py @@ -1,6 +1,7 @@ from abc import abstractmethod, ABC +from decimal import Decimal -from compare_bean.origin_data_bean.compare_event import KernelEvent +from compare_bean.origin_data_bean.compare_event import KernelEvent, MemoryEvent from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean from compare_bean.profiling_info import ProfilingInfo from utils.args_manager import ArgsManager @@ -9,30 +10,54 @@ from utils.file_reader import FileReader class ProfilingResult: + def __init__(self, profiling_type): + self._profiling_type = profiling_type self.torch_op_data = [] self.kernel_dict = {} self.memory_list = [] self.communication_dict = {} self.overall_metrics = ProfilingInfo(profiling_type) + def update_torch_op_data(self, event: TraceEventBean): + event.is_torch_op = True + self.torch_op_data.append(event) + + def update_kernel_dict(self, start_time: Decimal, kernel_event: TraceEventBean): + self.kernel_dict.setdefault(start_time, []).append(KernelEvent(kernel_event, self._profiling_type)) + + def update_memory_list(self, memory_data: dict): + self.memory_list.append(MemoryEvent(memory_data)) + + def update_communication_dict(self, comm_name: str, comm_dur: float): + self.communication_dict.setdefault(comm_name, {}).setdefault("comm_list", []).append(comm_dur) + + def update_comm_task_data(self, comm_name: str, task_event: TraceEventBean): + self.communication_dict.setdefault(comm_name, {}).setdefault("comm_task", {}).setdefault( + task_event.name, []).append(task_event.dur) + class BaseProfilingParser(ABC): + def __init__(self, args: any, path_dict: dict): self._args = args self._profiling_type = path_dict.get(Constant.PROFILING_TYPE) self._profiling_path = path_dict.get(Constant.PROFILING_PATH) self._json_path = path_dict.get(Constant.TRACE_PATH) - self._trace_events = FileReader.read_trace_file(self._json_path) + self._trace_events = [] if self._profiling_path == Constant.NPU else {} self._enable_profiling_compare = ArgsManager().enable_profiling_compare self._enable_operator_compare = ArgsManager().enable_operator_compare self._enable_memory_compare = ArgsManager().enable_memory_compare self._enable_communication_compare = ArgsManager().enable_communication_compare self._dispatch_func = self._get_dispatch_func() - self._result_data = ProfilingResult(path_dict.get(Constant.PROFILING_TYPE)) + self._result_data = ProfilingResult(self._profiling_type) self._memory_events = [] self._flow_dict = {} self._all_kernels = {} + self._comm_task_list = [] + self._comm_list = [] + self._read_trace_event() + self._cur_func_index = 0 @abstractmethod def _update_memory_list(self): @@ -43,20 +68,16 @@ class BaseProfilingParser(ABC): raise NotImplementedError("Function _update_overall_metrics need to be implemented.") @abstractmethod - def _picking_communication_event(self, **kwargs): - raise NotImplementedError("Function _picking_communication_event need to be implemented.") - - @abstractmethod - def _picking_torch_op_event(self, **kwargs): - raise NotImplementedError("Function _picking_torch_op_event need to be implemented.") + def _is_kernel_event(self, event: TraceEventBean): + raise NotImplementedError("Function _is_kernel_event need to be implemented.") @abstractmethod - def _picking_kernel_event(self, **kwargs): - raise NotImplementedError("Function _picking_kernel_event need to be implemented.") + def _is_flow_event(self, event: TraceEventBean): + raise NotImplementedError("Function _is_flow_event need to be implemented.") @abstractmethod - def _picking_flow_event(self, **kwargs): - raise NotImplementedError("Function _picking_flow_event need to be implemented.") + def _is_torch_op_event(self, event: TraceEventBean): + raise NotImplementedError("Function _is_torch_op_event need to be implemented.") @abstractmethod def _get_dispatch_func(self): @@ -65,31 +86,58 @@ class BaseProfilingParser(ABC): def load_data(self) -> ProfilingResult: self._dispatch_events() self._update_kernel_dict() - self._update_memory_list() self._update_communication_dict() + if self._enable_memory_compare: + self._update_memory_list() if self._enable_profiling_compare: self._update_overall_metrics() self._check_result_data() return self._result_data - def _update_communication_dict(self): - pass - def _dispatch_events(self): + if not self._dispatch_func: + return + index_list = list(range(0, len(self._dispatch_func))) * 2 for event in self._trace_events: if not event.is_dict(): continue if event.is_m_mode(): continue - self.__picking_event(event) + self.__picking_event(event, index_list) - def __picking_event(self, event: TraceEventBean): - for func in self._dispatch_func: - res = func(event) + def __picking_event(self, event: TraceEventBean, index_list: list): + for index in range(self._cur_func_index, self._cur_func_index + len(self._dispatch_func)): + func_index = index_list[index] + res = self._dispatch_func[func_index](event) if res: + self._cur_func_index = func_index break + def _picking_torch_op_event(self, event: TraceEventBean): + if self._is_torch_op_event(event): + self._result_data.update_torch_op_data(event) + return True + return False + + def _picking_kernel_event(self, event: TraceEventBean): + if self._is_kernel_event(event): + self._all_kernels[f"{event.pid}-{event.tid}-{event.start_time}"] = event + return True + return False + + def _picking_flow_event(self, event: TraceEventBean): + if self._is_flow_event(event): + if event.is_flow_start(): + self._flow_dict.setdefault(event.id, {})["start"] = event + elif event.is_flow_end(): + self._flow_dict.setdefault(event.id, {})["end"] = event + return True + return False + def _update_kernel_dict(self): + if self._profiling_type == Constant.NPU: + for comm in self._comm_list: + self._all_kernels[f"{comm.pid}-{comm.tid}-{comm.start_time}"] = comm for flow_event in self._flow_dict.values(): start_event = flow_event.get("start") end_event = flow_event.get("end") @@ -98,8 +146,29 @@ class BaseProfilingParser(ABC): kernel_event = self._all_kernels.get(f"{end_event.pid}-{end_event.tid}-{end_event.start_time}") if not kernel_event: continue - self._result_data.kernel_dict.setdefault(start_event.start_time, []).append( - KernelEvent(kernel_event.event, self._profiling_type)) + self._result_data.update_kernel_dict(start_event.start_time, kernel_event) + + def _update_communication_dict(self): + if self._profiling_type == Constant.GPU: + self._comm_list = list(filter(lambda x: x.is_nccl_name(), self._all_kernels.values())) + self._comm_list.sort(key=lambda x: x.start_time) + self._comm_task_list.sort(key=lambda x: x.start_time) + task_index = 0 + for communication_op in self._comm_list: + name_list = communication_op.lower_name.split("_") + if len(name_list) < 2: + continue + comm_name = name_list[1] + self._result_data.update_communication_dict(comm_name, communication_op.dur) + while task_index < len(self._comm_task_list): + task_event = self._comm_task_list[task_index] + if task_event.start_time < communication_op.start_time: + task_index += 1 + continue + if task_event.start_time > communication_op.end_time: + break + self._result_data.update_comm_task_data(comm_name, task_event) + task_index += 1 def _check_result_data(self): if self._enable_operator_compare or self._enable_memory_compare: @@ -111,3 +180,9 @@ class BaseProfilingParser(ABC): print(f"[WARNING] Can't find any memory event in the file: {self._profiling_path}") if self._enable_communication_compare and not self._result_data.communication_dict: print(f"[WARNING] Can't find any communication op in the file: {self._profiling_path}") + + def _read_trace_event(self): + try: + self._trace_events = FileReader.read_trace_file(self._json_path) + except Exception: + print(f"[ERROR] Failed to read the file: {self._json_path}") diff --git a/profiler/compare_tools/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/profiling_parser/gpu_profiling_parser.py index 1cafeda5e23ff425ab2749583be9821ea804212d..7f0a9eeffc6393c0b6d4528e3f61218f38f7fa77 100644 --- a/profiler/compare_tools/profiling_parser/gpu_profiling_parser.py +++ b/profiler/compare_tools/profiling_parser/gpu_profiling_parser.py @@ -1,3 +1,4 @@ +import sys from collections import defaultdict, Counter from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean @@ -10,7 +11,6 @@ class GPUProfilingParser(BaseProfilingParser): CUBE_MARK = 'gemm' FA_MARK_LIST = [['fmha', 'kernel'], ['flash', 'kernel']] SDMA_MARK_LIST = ['htod', 'dtod', 'dtoh', 'memset (device)'] - BWD = 'bwd' FLOW_CAT = ("async_gpu", "async_cpu_to_gpu", "ac2g", "async") TORCH_OP_CAT = ("cpu_op", "user_annotation", "cuda_runtime", "operator") @@ -45,38 +45,34 @@ class GPUProfilingParser(BaseProfilingParser): record = addr_dict.get(memory_event.addr) if allocate_bytes > 0: if record: - self._result_data.memory_list.append(record) + self._result_data.update_memory_list(record) addr_dict[memory_event.addr] = {Constant.SIZE: allocate_bytes, Constant.TS: memory_event.start_time, Constant.ALLOCATION_TIME: memory_event.start_time} if allocate_bytes < 0 and record: if abs(allocate_bytes) == record.get(Constant.SIZE): record[Constant.RELEASE_TIME] = memory_event.start_time - self._result_data.memory_list.append(record) + self._result_data.update_memory_list(record) del addr_dict[memory_event.addr] + for record in addr_dict.values(): + self._result_data.update_memory_list(record) def _update_overall_metrics(self): self._calculate_performance_time() - self._result_data.overall_metrics.compute_time = len( - [_ for _, value in self._marks.items() if value < 0]) - self._result_data.overall_metrics.communication_not_overlapped = len( - [_ for _, value in self._marks.items() if value > 0]) - self._result_data.overall_metrics.vec_time = self._result_data.overall_metrics.compute_time - \ - self._result_data.overall_metrics.cube_time - \ - self._result_data.overall_metrics.fa_time_fwd - \ - self._result_data.overall_metrics.fa_time_bwd - self.__parse_e2e_time() - self._result_data.overall_metrics.scheduling_time = self._result_data.overall_metrics.e2e_time - \ - self._result_data.overall_metrics.compute_time - \ - self._result_data.overall_metrics.communication_not_overlapped self.__parse_memory_reserved() + self._result_data.overall_metrics.calculate_vec_time() + self._result_data.overall_metrics.calculate_schedule_time() self._result_data.overall_metrics.trans_time_to_s() def _calculate_performance_time(self): + min_ts = sys.float_info.max + max_ts = sys.float_info.min for event in self._trace_events: + if event.stream: + min_ts = min(event.start_time, min_ts) + max_ts = max(event.end_time, max_ts) if event.stream == self._compute_stream_id and self.__is_sdma_time(event.name): - self._result_data.overall_metrics.sdma_time += event.dur - self._result_data.overall_metrics.sdma_num += 1 + self._result_data.overall_metrics.update_sdma_info(event.dur) continue if not event.is_kernel_cat(): continue @@ -84,6 +80,14 @@ class GPUProfilingParser(BaseProfilingParser): if event.is_nccl_name(): continue self.__add_compute_time(event) + self._result_data.overall_metrics.set_e2e_time(float(max_ts - min_ts)) + self.__add_compute_and_overlap_time() + + def __add_compute_and_overlap_time(self): + compute_time = len([_ for _, value in self._marks.items() if value < 0]) + communication_not_overlapped = len([_ for _, value in self._marks.items() if value > 0]) + self._result_data.overall_metrics.set_compute_time(compute_time) + self._result_data.overall_metrics.set_comm_not_overlap(communication_not_overlapped) def __add_marks(self, event: TraceEventBean): if event.is_nccl_name(): @@ -93,32 +97,16 @@ class GPUProfilingParser(BaseProfilingParser): for timestep in range(int(event.start_time + 1), int(event.end_time + 1)): self._marks[str(timestep)] += -100 # mark this timestep in compute stream - def __add_fa_time(self, event: TraceEventBean): - if self.BWD in event.lower_name: - self._result_data.overall_metrics.fa_time_bwd += event.dur - self._result_data.overall_metrics.fa_num_bwd += 1 - else: - self._result_data.overall_metrics.fa_time_fwd += event.dur - self._result_data.overall_metrics.fa_num_fwd += 1 - def __add_compute_time(self, event: TraceEventBean): if self.__is_flash_attention(event.name): - self.__add_fa_time(event) + if event.is_backward(): + self._result_data.overall_metrics.update_fa_bwd_info(event.dur) + else: + self._result_data.overall_metrics.update_fa_fwd_info(event.dur) elif self.CUBE_MARK in event.lower_name: - self._result_data.overall_metrics.cube_num += 1 - self._result_data.overall_metrics.cube_time += event.dur + self._result_data.overall_metrics.update_cube_info(event.dur) else: - self._result_data.overall_metrics.vec_num += 1 - self._result_data.overall_metrics.vec_time += event.dur - - def _picking_communication_event(self, event: TraceEventBean): - if event.is_nccl_kernel(): - name_list = event.lower_name.split("_") - if len(name_list) > 2: - self._result_data.communication_dict.setdefault(name_list[1], {}).setdefault("comm_list", []).append( - event.dur) - return True - return False + self._result_data.overall_metrics.update_vec_info(event.dur) def _picking_memory_event(self, event: TraceEventBean): if event.is_memory_event(): @@ -126,52 +114,34 @@ class GPUProfilingParser(BaseProfilingParser): return True return False - def _picking_torch_op_event(self, event: TraceEventBean): - if event.lower_cat in self.TORCH_OP_CAT: - self._result_data.torch_op_data.append(event.event) - return True - return False - - def _picking_kernel_event(self, event: TraceEventBean): - if event.is_kernel_except_nccl(): - self._all_kernels[f"{event.pid}-{event.tid}-{event.start_time}"] = event - return True - return False + def _is_torch_op_event(self, event: TraceEventBean): + return event.lower_cat in self.TORCH_OP_CAT - def _picking_flow_event(self, event: TraceEventBean): - if event.lower_cat in self._flow_cat: - if event.is_flow_start(): - self._flow_dict.setdefault(event.id, {})["start"] = event - elif event.is_flow_end(): - self._flow_dict.setdefault(event.id, {})["end"] = event - return True - return False + def _is_kernel_event(self, event: TraceEventBean): + return event.is_kernel_cat() - def __parse_e2e_time(self): - compute_events_timeline = [event for event in self._trace_events if event.stream] - compute_events_timeline = sorted(compute_events_timeline, key=lambda event: event.start_time) - self._result_data.overall_metrics.e2e_time = (compute_events_timeline[-1].end_time - compute_events_timeline[ - 0].start_time) + def _is_flow_event(self, event: TraceEventBean): + return event.lower_cat in self._flow_cat def __parse_memory_reserved(self): - memories = [event.total_reserved for event in self._memory_events] - if not memories: + if not self._memory_events: print("[INFO] Gpu profiling data doesn't contain memory info.") return - self._result_data.overall_metrics.memory_used = max(memories) / 1024 ** 3 + memory_used = max([event.total_reserved for event in self._memory_events]) / 1024 ** 3 + self._result_data.overall_metrics.set_memory_used(memory_used) def _get_dispatch_func(self): - func_list = [] + func_set = set() if self._enable_memory_compare or self._enable_operator_compare: - func_list.append(self._picking_torch_op_event) - if self._enable_operator_compare: - func_list.append(self._picking_kernel_event) - func_list.append(self._picking_flow_event) - if self._enable_memory_compare or self._enable_profiling_compare: - func_list.append(self._picking_memory_event) + func_set.add(self._picking_torch_op_event) if self._enable_communication_compare: - func_list.append(self._picking_communication_event) - return func_list + func_set.add(self._picking_kernel_event) + if self._enable_operator_compare or self._args.max_kernel_num: + func_set.add(self._picking_kernel_event) + func_set.add(self._picking_flow_event) + if self._enable_memory_compare or self._enable_profiling_compare: + func_set.add(self._picking_memory_event) + return list(func_set) def _infer_compute_stream_id(self): if not self._enable_profiling_compare: diff --git a/profiler/compare_tools/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/profiling_parser/npu_profiling_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..4a2870b05fc1be54343abbdd4f5cd757809046be --- /dev/null +++ b/profiler/compare_tools/profiling_parser/npu_profiling_parser.py @@ -0,0 +1,241 @@ +import os +import sys +from math import ceil + +from compare_bean.origin_data_bean.kernel_details_bean import KernelDetailsBean +from compare_bean.origin_data_bean.memory_record_bean import MemoryRecordBean +from compare_bean.origin_data_bean.operator_memory_bean import OperatorMemoryBean +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean +from profiling_parser.base_profiling_parser import BaseProfilingParser +from utils.constant import Constant +from utils.file_reader import FileReader + + +class NPUProfilingParser(BaseProfilingParser): + FLOW_CAT = "async_npu" + TORCH_OP_CAT = "cpu_op" + ACTIVE_CPU = "ProfilerActivity.CPU" + LEVEL_0 = "Level0" + + def __init__(self, args: any, path_dict: dict): + super().__init__(args, path_dict) + self._operator_memory_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "operator_memory.csv") + self._memory_record_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "memory_record.csv") + self._kernel_detail_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "kernel_details.csv") + self._info_json_path = path_dict.get(Constant.INFO_JSON_PATH, "") + self._trace_events = [TraceEventBean(event) for event in self._trace_events] + self._hccl_pid = None + self._hccl_op_tid_list = [] + self._kernel_pid = None + self._overlap_pid = None + self._enqueue_dict = {} + self._dequeue_data = [] + self._overlap_analysis = [] + self._dispatch_func = self._get_dispatch_func() + self._filter_meta_id() + + def _get_dispatch_func(self): + func_list = set() + if self._enable_memory_compare or self._enable_operator_compare: + func_list.add(self._picking_torch_op_event) + if self._enable_operator_compare or self._args.max_kernel_num: + func_list.add(self._picking_kernel_event) + func_list.add(self._picking_flow_event) + if self._enable_memory_compare: + func_list.add(self._picking_task_queue_data) + if self._enable_communication_compare: + func_list.add(self._picking_hccl_event) + if self._enable_profiling_compare: + func_list.add(self._picking_overlap_analysis_data) + func_list.add(self._picking_kernel_event) + return list(func_list) + + def _update_memory_list(self): + try: + memory_data = FileReader.read_csv_file(self._operator_memory_path, OperatorMemoryBean) + except FileNotFoundError: + print("[WARNING] The file operator_memory.csv does not exist.") + return + except Exception: + print("[ERROR] Failed to read operator_memory.csv.") + return + if memory_data: + self._dequeue_data.sort(key=lambda x: x.start_time) + for data in memory_data: + if not data.allocation_time: + continue + if data.is_cann_op(): + matched_corr_id = self.__match_dequeue_data(data.allocation_time) + if matched_corr_id == Constant.INVALID_VALUE: + continue + self._result_data.update_memory_list({Constant.SIZE: data.size, + Constant.TS: self._enqueue_dict.get(matched_corr_id, 0), + Constant.NAME: data.name, + Constant.ALLOCATION_TIME: data.allocation_time, + Constant.RELEASE_TIME: data.release_time}) + else: + self._result_data.update_memory_list({Constant.SIZE: data.size, + Constant.TS: data.allocation_time, + Constant.ALLOCATION_TIME: data.allocation_time, + Constant.RELEASE_TIME: data.release_time}) + + def __match_dequeue_data(self, ts_time: float) -> int: + if not self._dequeue_data: + return Constant.INVALID_VALUE + left, right = 0, len(self._dequeue_data) - 1 + while right > left: + mid = left + ceil((right - left) / 2) + if ts_time >= self._dequeue_data[mid].start_time: + left = mid + else: + right = mid - 1 + return self._dequeue_data[left].corr_id if self._dequeue_data[left].start_time <= ts_time <= \ + self._dequeue_data[left].end_time else Constant.INVALID_VALUE + + def _update_overall_metrics(self): + self.__parse_info_json() + self.__parse_mem_csv() + self.__parse_kernel_csv() + self.__add_sdma_time() + self.__add_overlap_analysis_time() + self._result_data.overall_metrics.calculate_other_time() + self._result_data.overall_metrics.calculate_schedule_time() + self._result_data.overall_metrics.trans_time_to_s() + + def _picking_hccl_event(self, event: TraceEventBean): + if event.pid != self._hccl_pid or not event.is_x_mode(): + return False + if event.tid in self._hccl_op_tid_list: + self._comm_list.append(event) + else: + self._comm_task_list.append(event) + return True + + def _picking_task_queue_data(self, event: TraceEventBean): + if event.is_enqueue(): + self._enqueue_dict[event.corr_id] = event.start_time + return True + elif event.is_dequeue(): + self._dequeue_data.append(event) + return True + return False + + def _picking_overlap_analysis_data(self, event: TraceEventBean): + if event.pid == self._overlap_pid and event.is_x_mode(): + self._overlap_analysis.append(event) + return True + return False + + def _is_kernel_event(self, event: TraceEventBean): + return event.pid == self._kernel_pid and event.is_x_mode() + + def _is_flow_event(self, event: TraceEventBean): + return event.lower_cat == self.FLOW_CAT + + def _is_torch_op_event(self, event: TraceEventBean): + return event.lower_cat == self.TORCH_OP_CAT + + def _filter_meta_id(self): + for event in self._trace_events: + if not event.is_process_meta(): + continue + if event.is_hccl_process_name(): + self._hccl_pid = event.pid + elif event.is_npu_process_name(): + self._kernel_pid = event.pid + elif event.is_overlap_process_name(): + self._overlap_pid = event.pid + if not self._enable_communication_compare: + return + for event in self._trace_events: + if not event.is_thread_meta(): + continue + if event.pid == self._hccl_pid and event.is_communication_op_thread(): + self._hccl_op_tid_list.append(event.tid) + + def __parse_info_json(self): + try: + json_data = FileReader.read_trace_file(self._info_json_path) + except Exception: + print('[WARNING] Failed to read profiler_info.json.') + return + if not isinstance(json_data, dict) or not json_data: + print('[WARNING] Invalid profiler info.') + return + if self.ACTIVE_CPU in json_data.get('config', {}).get('common_config', {}).get('activities', []): + return + if self.LEVEL_0 != json_data.get('config', {}).get('experimental_config', {}).get('_profiler_level', ''): + return + self._result_data.overall_metrics.minimal_profiling = True + + def __parse_kernel_csv(self): + try: + kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) + except Exception: + print('[WARNING] Npu kernel details csv file is not available.') + return + if not kernel_details or kernel_details[0].is_hide_op_pmu(): + self._result_data.overall_metrics.hide_op_details = True + return + for kernel in kernel_details: + if kernel.is_invalid(): + continue + if kernel.is_flash_attention(): + if kernel.is_fa_bwd(): + self._result_data.overall_metrics.update_fa_bwd_info(kernel.duration) + else: + self._result_data.overall_metrics.update_fa_fwd_info(kernel.duration) + elif kernel.is_cube(): + self._result_data.overall_metrics.update_cube_info(kernel.duration) + elif kernel.is_sdma(): + self._result_data.overall_metrics.update_sdma_info(kernel.duration) + elif kernel.is_vector(): + self._result_data.overall_metrics.update_vec_info(kernel.duration) + else: + self._result_data.overall_metrics.update_cube_info(kernel.duration) + + def __parse_mem_csv(self): + try: + memory_record = FileReader.read_csv_file(self._memory_record_path, MemoryRecordBean) + except FileNotFoundError: + print('[INFO] Npu memory record csv file is not available.') + except Exception: + print('[WARNING] Load memory info failed.') + else: + memory_used = max([memory.total_reserved_mb for memory in memory_record]) / 1024 + self._result_data.overall_metrics.set_memory_used(memory_used) + + def __add_overlap_analysis_time(self): + if not self._overlap_analysis: + print('[ERROR] Failed to get overlap analysis data.') + return + min_ts = sys.float_info.max + max_ts = sys.float_info.min + for event in self._overlap_analysis: + if event.is_computing_event(): + self._result_data.overall_metrics.update_compute_time(event.dur) + min_ts = min(event.start_time, min_ts) + max_ts = max(event.end_time, max_ts) + elif event.is_comm_not_overlap(): + self._result_data.overall_metrics.update_comm_not_overlap(event.dur) + min_ts = min(event.start_time, min_ts) + max_ts = max(event.end_time, max_ts) + self._result_data.overall_metrics.set_e2e_time(float(max_ts - min_ts)) + + def __add_sdma_time(self) -> (float, int): + event_wait_stream, ai_core_stream = set(), set() + sdma_dict = {} + for event in self._all_kernels.values(): + stream_id = event.stream_id + if not stream_id: + continue + if event.is_event_wait(): + event_wait_stream.add(stream_id) + elif event.is_sdma_event(): + sdma_dict.setdefault(stream_id, []).append(event.dur) + elif event.is_compute_event(): + ai_core_stream.add(stream_id) + compute_stream = event_wait_stream & ai_core_stream + for stream in compute_stream: + dur_list = sdma_dict.get(stream, []) + self._result_data.overall_metrics.update_sdma_info(sum(dur_list), len(dur_list)) diff --git a/profiler/compare_tools/utils/args_manager.py b/profiler/compare_tools/utils/args_manager.py index 543e8f60a8e63055ba67e91d494322ad2acb02e8..49cde24f156a172a1b51b5e660fa2863e1b54d34 100644 --- a/profiler/compare_tools/utils/args_manager.py +++ b/profiler/compare_tools/utils/args_manager.py @@ -1,9 +1,9 @@ import os.path +import re from common_func.path_manager import PathManager from utils.constant import Constant from utils.file_reader import FileReader -from utils.profiling_parser import GPUProfilingParser, NPUProfilingParser class Singleton(object): @@ -19,38 +19,55 @@ class Singleton(object): @Singleton class ArgsManager: - PARSER_DICT = {Constant.NPU: NPUProfilingParser, Constant.GPU: GPUProfilingParser} def __init__(self): self._args = None - self._base_profiling_type = None - self._comparison_profiling_type = None - self._base_profiling = None - self._comparison_profiling = None + self._base_path_dict = {} + self._comparison_path_dict = {} + + @property + def args(self): + return self._args @property def base_profiling_type(self): - return self._base_profiling_type + return self._base_path_dict.get(Constant.PROFILING_TYPE) @property def comparison_profiling_type(self): - return self._comparison_profiling_type + return self._comparison_path_dict.get(Constant.PROFILING_TYPE) @property - def base_profiling(self): - return self._base_profiling + def base_profiling_path(self): + return self._args.base_profiling_path @property - def comparison_profiling(self): - return self._comparison_profiling + def comparison_profiling_path(self): + return self._args.comparison_profiling_path_dict @property - def base_profiling_path(self): - return self._args.base_profiling_path + def base_path_dict(self): + return self._base_path_dict @property - def comparison_profiling_path(self): - return self._args.comparison_profiling_path + def comparison_path_dict(self): + return self._comparison_path_dict + + @property + def enable_profiling_compare(self): + return self._args.enable_profiling_compare + + @property + def enable_operator_compare(self): + return self._args.enable_operator_compare + + @property + def enable_memory_compare(self): + return self._args.enable_memory_compare + + @property + def enable_communication_compare(self): + return self._args.enable_communication_compare @classmethod def check_profiling_path(cls, file_path: str): @@ -77,13 +94,16 @@ class ArgsManager: ascend_output = os.path.join(file_path, "ASCEND_PROFILER_OUTPUT") profiler_output = ascend_output if os.path.isdir(ascend_output) else file_path json_path = os.path.join(profiler_output, "trace_view.json") - memory_path = os.path.join(profiler_output, "operator_memory.csv") if not os.path.isfile(json_path): msg = f"Invalid profiling path: {file_path}" raise RuntimeError(msg) - memory_path = memory_path if os.path.isfile(memory_path) else None - return {Constant.PROFILING_TYPE: Constant.NPU, Constant.PROFILING_PATH: file_path, - Constant.TRACE_PATH: json_path, Constant.MEMORY_DATA_PATH: memory_path} + path_dict = {Constant.PROFILING_TYPE: Constant.NPU, Constant.PROFILING_PATH: file_path, + Constant.TRACE_PATH: json_path, Constant.ASCEND_OUTPUT_PATH: profiler_output} + sub_dirs = os.listdir(file_path) + for dir_name in sub_dirs: + if dir_name == "profiler_info.json" or re.match(r"profiler_info_[0-9]+\.json", dir_name): + path_dict.update({Constant.INFO_JSON_PATH: os.path.join(file_path, dir_name)}) + return path_dict def init(self, args: any): self._args = args @@ -106,24 +126,10 @@ class ArgsManager: base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path) self.check_profiling_path(base_profiling_path) - base_profiling_dict = self.parse_profiling_path(base_profiling_path) + self._base_path_dict = self.parse_profiling_path(base_profiling_path) comparison_profiling_path = PathManager.get_realpath(self._args.comparison_profiling_path) self.check_profiling_path(comparison_profiling_path) - comparison_profiling_dict = self.parse_profiling_path(comparison_profiling_path) + self._comparison_path_dict = self.parse_profiling_path(comparison_profiling_path) if self._args.output_path: self.check_output_path(PathManager.get_realpath(self._args.output_path)) - - Constant.BASE_PROFILING = Constant.BASE_PROFILING + self._args.base_profiling_path - self._base_profiling_type = base_profiling_dict.get(Constant.PROFILING_TYPE) - self._base_profiling = self.PARSER_DICT.get(self._base_profiling_type)(self._args, base_profiling_dict) - - if base_profiling_path == comparison_profiling_path: - Constant.COMPARISON_PROFILING = "Same To Base Profiling" - self._comparison_profiling_type = self._base_profiling_type - self._comparison_profiling = self._base_profiling - else: - Constant.COMPARISON_PROFILING = Constant.COMPARISON_PROFILING + self._args.comparison_profiling_path - self._comparison_profiling_type = comparison_profiling_dict.get(Constant.PROFILING_TYPE) - self._comparison_profiling = self.PARSER_DICT.get(self._comparison_profiling_type)(self._args, - comparison_profiling_dict) diff --git a/profiler/compare_tools/utils/common_func.py b/profiler/compare_tools/utils/common_func.py index 9e45d202be052a15ade10f301ffed65f2c20d24c..2211d15d2d7aa27ce05594f2f8a2c36cdcfdf55c 100644 --- a/profiler/compare_tools/utils/common_func.py +++ b/profiler/compare_tools/utils/common_func.py @@ -1,6 +1,33 @@ +from decimal import Decimal + + def calculate_diff_ratio(base_value: float, comparison_value: float): if not base_value and not comparison_value: ratio = 1 else: ratio = float('inf') if not base_value else comparison_value / base_value return [comparison_value - base_value, ratio] + + +def update_order_id(data_list: list): + for index, data in enumerate(data_list): + if data: + data[0] = index + 1 + + +def convert_to_float(data: any) -> float: + try: + float_value = float(data) + except Exception: + print('[ERROR] Invalid profiling data which failed to convert data to float.') + return 0.0 + return float_value + + +def convert_to_decimal(data: any) -> Decimal: + try: + decimal_value = Decimal(data) + except Exception: + print('[ERROR] Invalid profiling data which failed to convert data to decimal.') + return 0.0 + return decimal_value diff --git a/profiler/compare_tools/utils/compare_event.py b/profiler/compare_tools/utils/compare_event.py deleted file mode 100644 index 31cad3e59a9001b2f16a3279f5d307154604546e..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/utils/compare_event.py +++ /dev/null @@ -1,53 +0,0 @@ -from utils.constant import Constant - - -class KernelEvent: - def __init__(self, event: dict, device_type: int): - self._event = event - self._device_type = device_type - - @property - def kernel_name(self) -> str: - return self._event.get("name", "") - - @property - def device_dur(self) -> float: - return self._event.get("dur", 0) - - @property - def task_id(self) -> int: - return self._event.get("args", {}).get("Task Id") - - @property - def task_type(self) -> str: - return self._event.get("args", {}).get("Task Type") - - @property - def kernel_details(self): - if self._device_type == Constant.GPU: - return f"{self.kernel_name} [duration: {self.device_dur}]" - return f"{self.kernel_name}, {self.task_id}, {self.task_type} [duration: {self.device_dur}]\n" - - -class MemoryEvent: - def __init__(self, event: dict, name: str): - self._event = event - self._name = name - - @property - def size(self) -> float: - return self._event.get(Constant.SIZE, 0) - - @property - def duration(self) -> float: - if not self._event.get(Constant.ALLOCATION_TIME) or not self._event.get(Constant.RELEASE_TIME): - return 0 - return float(self._event.get(Constant.RELEASE_TIME)) - self._event.get(Constant.ALLOCATION_TIME, 0) - - @property - def memory_details(self) -> str: - name = self._event.get(Constant.NAME, "") if self._event.get(Constant.NAME, "") else self._name - release_time = self._event.get(Constant.RELEASE_TIME) - allocation_time = self._event.get(Constant.ALLOCATION_TIME) - duration = float(release_time) - float(allocation_time) if release_time and allocation_time else None - return f"{name}, ({allocation_time}, {release_time}), [duration: {duration}], [size: {self.size}]\n" diff --git a/profiler/compare_tools/utils/constant.py b/profiler/compare_tools/utils/constant.py index 5001de24039c5897f1e52472fd65e2c234ed3378..9cb7dfa392225e6686e06aef43391cf470fa804b 100644 --- a/profiler/compare_tools/utils/constant.py +++ b/profiler/compare_tools/utils/constant.py @@ -13,6 +13,7 @@ class Constant(object): BLUE_COLOR = "00BFFF" US_TO_MS = 1000 KB_TO_MB = 1024 + INVALID_VALUE = -1 # epsilon EPS = 1e-15 diff --git a/profiler/compare_tools/utils/excel_config.py b/profiler/compare_tools/utils/excel_config.py index 1783b5d81b941b4402cc00c248c2f0f050a164a7..123cf31836fbd21a8d94c2714ae7c82e40a9fc57 100644 --- a/profiler/compare_tools/utils/excel_config.py +++ b/profiler/compare_tools/utils/excel_config.py @@ -1,8 +1,22 @@ from utils.constant import Constant +class CellFormatType: + DEFAULT = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, + 'num_format': '#,##0'} # 数字显示整数,无背景色 + DEFAULT_FLOAT = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, + 'num_format': '#,##0.00'} # 保留2位小数,无背景色 + DEFAULT_RATIO = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', + 'border': True, 'num_format': '0.00%'} # 百分比显示,保留2位小数,无背景色 + RED_RATIO = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', + 'border': True, 'num_format': '0.00%', "fg_color": Constant.RED_COLOR} # 百分比显示,保留2位小数,单元格背景色为红色 + BOLD_STR = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, + 'bold': True} # 字符串,无背景色,字体加粗 + BLUE_BOLD = {"font_name": "Arial", 'font_size': 11, 'fg_color': Constant.BLUE_COLOR, 'align': 'left', + 'valign': 'vcenter', 'bold': True, 'border': True} # 蓝色背景,加粗 + + class ExcelConfig(object): - COL_IDS = "ABCDEFGHIJKLMNOPQRSTUVW" ORDER = "Order Id" OPERATOR_NAME = "Operator Name" INPUT_SHAPE = "Input Shape" @@ -34,67 +48,79 @@ class ExcelConfig(object): MIN_DURATION = "Min Duration(us)" HEADERS = { - Constant.OPERATOR_SHEET: [ORDER, OPERATOR_NAME, INPUT_SHAPE, INPUT_TYPE, KERNEL_DETAILS, DEVICE_DURATION, - OPERATOR_NAME, INPUT_SHAPE, INPUT_TYPE, KERNEL_DETAILS, DEVICE_DURATION, DIFF_DUR, - DIFF_RATIO], - Constant.MEMORY_SHEET: [ORDER, OPERATOR_NAME, INPUT_SHAPE, INPUT_TYPE, MEMORY_DETAILS, SIZE, OPERATOR_NAME, - INPUT_SHAPE, INPUT_TYPE, MEMORY_DETAILS, SIZE, DIFF_SIZE, DIFF_RATIO], - Constant.OPERATOR_TOP_SHEET: [TOP, OPERATOR_NAME, BASE_DEVICE_DURATION, BASE_OPERATOR_NUMBER, - COMPARISON_DEVICE_DURATION, COMPARISON_OPERATOR_NUMBER, DIFF_TIME, DIFF_RATIO], - Constant.MEMORY_TOP_SHEET: [TOP, OPERATOR_NAME, BASE_ALLOCATED_TIMES, BASE_ALLOCATED_MEMORY, - BASE_OPERATOR_NUMBER, COMPARISON_ALLOCATED_TIMES, COMPARISON_ALLOCATED_MEMORY, - COMPARISON_OPERATOR_NUMBER, DIFF_MEMORY, DIFF_RATIO], - Constant.COMMUNICATION_SHEET: [ORDER, COMM_OP_NAME, TASK_NAME, CALLS, TOTAL_DURATION, AVG_DURATION, - MAX_DURATION, MIN_DURATION, COMM_OP_NAME, TASK_NAME, CALLS, TOTAL_DURATION, - AVG_DURATION, MAX_DURATION, MIN_DURATION, DIFF_DUR, DIFF_RATIO] + Constant.OPERATOR_TABLE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": KERNEL_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": KERNEL_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_DUR, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} + ], + Constant.MEMORY_TABLE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": MEMORY_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": SIZE, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": MEMORY_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": SIZE, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_SIZE, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} + ], + Constant.OPERATOR_TOP_TABLE: [ + {"name": TOP, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": BASE_DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 25}, + {"name": BASE_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 25}, + {"name": COMPARISON_DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 30}, + {"name": COMPARISON_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 30}, + {"name": DIFF_TIME, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} + ], + Constant.MEMORY_TOP_TABLE: [ + {"name": TOP, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": BASE_ALLOCATED_TIMES, "type": CellFormatType.DEFAULT_FLOAT, "width": 25}, + {"name": BASE_ALLOCATED_MEMORY, "type": CellFormatType.DEFAULT_FLOAT, "width": 30}, + {"name": BASE_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 25}, + {"name": COMPARISON_ALLOCATED_TIMES, "type": CellFormatType.DEFAULT_FLOAT, "width": 27}, + {"name": COMPARISON_ALLOCATED_MEMORY, "type": CellFormatType.DEFAULT_FLOAT, "width": 33}, + {"name": COMPARISON_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 25}, + {"name": DIFF_MEMORY, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} + ], + Constant.COMMUNICATION_TABLE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": COMM_OP_NAME, "type": CellFormatType.BOLD_STR, "width": 25}, + {"name": TASK_NAME, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": COMM_OP_NAME, "type": CellFormatType.BOLD_STR, "width": 25}, + {"name": TASK_NAME, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": DIFF_DUR, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} + ] } - COLUMNS = {ORDER: 10, OPERATOR_NAME: 30, TOP: 10, BASE_OPERATOR_NUMBER: 25, BASE_DEVICE_DURATION: 25, - COMPARISON_OPERATOR_NUMBER: 30, COMPARISON_DEVICE_DURATION: 30, BASE_ALLOCATED_TIMES: 25, - BASE_ALLOCATED_MEMORY: 30, COMPARISON_ALLOCATED_TIMES: 27, COMPARISON_ALLOCATED_MEMORY: 33, - CALLS: 10, TOTAL_DURATION: 17, AVG_DURATION: 17, MAX_DURATION: 17, MIN_DURATION: 17, COMM_OP_NAME: 25} - - OVERHEAD = {Constant.OPERATOR_SHEET: ["B1:F1", "G1:K1"], Constant.MEMORY_SHEET: ["B1:F1", "G1:K1"], - Constant.COMMUNICATION_SHEET: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_SHEET: ["C1:D1", "E1:F1"], - Constant.MEMORY_TOP_SHEET: ["C1:E1", "F1:H1"]} - - FORMAT = {"int": {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, - 'num_format': '#,##0'}, - "float": {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, - 'num_format': '#,##0.00'}, - "ratio": {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', - 'border': True, 'num_format': '0.00%'}, - "ratio_red": {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', - 'border': True, 'num_format': '0.00%', "fg_color": Constant.RED_COLOR}, - "str_bold": {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, - 'bold': True}} - - FIELD_TYPE_MAP = {ORDER: "int", - OPERATOR_NAME: "str_bold", - INPUT_SHAPE: "int", - INPUT_TYPE: "str", - KERNEL_DETAILS: "int", - MEMORY_DETAILS: "int", - DEVICE_DURATION: "float", - DIFF_RATIO: "ratio", - DIFF_DUR: "float", - DIFF_SIZE: "float", - SIZE: "float", - TOP: "int", - BASE_DEVICE_DURATION: "float", - COMPARISON_DEVICE_DURATION: "float", - BASE_OPERATOR_NUMBER: "int", - COMPARISON_OPERATOR_NUMBER: "int", - DIFF_TIME: "float", - BASE_ALLOCATED_TIMES: "float", - COMPARISON_ALLOCATED_TIMES: "float", - BASE_ALLOCATED_MEMORY: "float", - COMPARISON_ALLOCATED_MEMORY: "float", - DIFF_MEMORY: "float", - COMM_OP_NAME: "str_bold", - TASK_NAME: "int", - CALLS: "int", - TOTAL_DURATION: "float", - AVG_DURATION: "float", - MAX_DURATION: "float", - MIN_DURATION: "float"} + OVERHEAD = {Constant.OPERATOR_TABLE: ["B1:F1", "G1:K1"], Constant.MEMORY_TABLE: ["B1:F1", "G1:K1"], + Constant.COMMUNICATION_TABLE: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_TABLE: ["C1:D1", "E1:F1"], + Constant.MEMORY_TOP_TABLE: ["C1:E1", "F1:H1"]} diff --git a/profiler/compare_tools/utils/file_reader.py b/profiler/compare_tools/utils/file_reader.py index ef0287b35f862ca5bd807de498cc8684256d7c43..dea123a192d2625864ee8303743d3cc266e37acb 100644 --- a/profiler/compare_tools/utils/file_reader.py +++ b/profiler/compare_tools/utils/file_reader.py @@ -12,8 +12,7 @@ class FileReader: def read_trace_file(cls, file_path: str) -> any: PathManager.check_path_readable(file_path) if not os.path.isfile(file_path): - msg = f"File not exists: {file_path}" - raise RuntimeError(msg) + raise FileNotFoundError("File not exists.") file_size = os.path.getsize(file_path) if file_size <= 0: return [] @@ -32,10 +31,10 @@ class FileReader: return json_data @classmethod - def read_csv_file(cls, file_path: str) -> any: + def read_csv_file(cls, file_path: str, bean_class: any = None) -> any: PathManager.check_path_readable(file_path) if not os.path.isfile(file_path): - return [] + raise FileNotFoundError("File not exists.") file_size = os.path.getsize(file_path) if file_size <= 0: return [] @@ -50,7 +49,8 @@ class FileReader: with open(file_path, newline="") as csv_file: reader = csv.DictReader(csv_file) for row in reader: - result_data.append(row) + row_data = bean_class(row) if bean_class else row + result_data.append(row_data) except Exception as e: msg = f"Failed to read the file: {file_path}" raise RuntimeError(msg) from e diff --git a/profiler/compare_tools/utils/profiling_parser.py b/profiler/compare_tools/utils/profiling_parser.py deleted file mode 100644 index 30dfce4ef8baf457797106a553c2c8c698eec0f3..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/utils/profiling_parser.py +++ /dev/null @@ -1,300 +0,0 @@ -from abc import abstractmethod -from math import ceil - -from utils.compare_event import KernelEvent -from utils.constant import Constant -from utils.file_reader import FileReader -from utils.trace_event_data import TraceEventData - - -class ProfilingParser: - def __init__(self, args: any, path_dict: dict): - self._args = args - self._profiling_path = path_dict.get(Constant.PROFILING_PATH) - self._torch_op_data = None - self._kernel_dict = None - self._memory_list = None - self._communication_data = None - self._communication_task_data = None - - @property - def file_path(self) -> str: - return self._profiling_path - - @property - def json_path(self) -> str: - return self._json_path - - @property - def torch_op_data(self) -> list: - if self._torch_op_data is None: - self.get_torch_op_data() - return self._torch_op_data - - @property - def kernel_dict(self) -> dict: - if self._kernel_dict is None: - self.get_kernel_dict() - return self._kernel_dict - - @property - def memory_list(self) -> list: - if self._memory_list is None: - self.get_memory_list() - return self._memory_list - - @property - def communication_data(self) -> dict: - if self._communication_data is None: - self.get_communication_data() - return self._communication_data - - @property - def communication_task_data(self) -> dict: - if self._communication_task_data is None: - self.get_communication_data() - return self._communication_task_data - - @abstractmethod - def get_torch_op_data(self): - raise NotImplementedError - - @abstractmethod - def get_kernel_dict(self): - raise NotImplementedError - - @abstractmethod - def get_memory_list(self): - raise NotImplementedError - - -class GPUProfilingParser(ProfilingParser): - def __init__(self, args: any, path_dict: dict): - super().__init__(args, path_dict) - self._json_path = path_dict.get(Constant.PROFILING_PATH) - - def get_torch_op_data(self): - torch_op_list = [] - json_data = FileReader.read_trace_file(self._json_path) - total_events = json_data.get("traceEvents", []) - for event in total_events: - if event.get("cat", "").lower() in ("cpu_op", "user_annotation", "cuda_runtime", "Operator"): - torch_op_list.append(event) - self._torch_op_data = torch_op_list - - def get_kernel_dict(self): - flow_kernel_dict = {} - json_data = FileReader.read_trace_file(self._json_path) - total_events = json_data.get("traceEvents", []) - flow_cat = (self._args.gpu_flow_cat,) if self._args.gpu_flow_cat else ("async_gpu", "async_cpu_to_gpu", - "ac2g", "async") - flow_start_dict, flow_end_dict, kernel_dict = {}, {}, {} - for event in total_events: - if event.get("cat", "") in flow_cat and event.get("ph") == "s": - flow_start_dict[event.get("id")] = event - elif event.get("cat", "") in flow_cat and event.get("ph") == "f": - flow_end_dict[event.get("id")] = event - elif event.get("cat", "").lower() == "kernel" and event.get("name", "").split("_")[0].lower() != "ncclkernel": - kernel_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), float(event.get("ts")))] = event - - for flow_id, start_flow in flow_start_dict.items(): - end_flow = flow_end_dict.get(flow_id) - if end_flow is None: - continue - kernel_event = kernel_dict.get( - "{}-{}-{}".format(end_flow.get("pid"), end_flow.get("tid"), float(end_flow.get("ts")))) - if kernel_event is None: - continue - flow_kernel_dict.setdefault(float(start_flow.get("ts")), []).append(KernelEvent(kernel_event, Constant.GPU)) - self._kernel_dict = flow_kernel_dict - - def get_memory_list(self): - self._memory_list = [] - memory_events = [] - json_data = FileReader.read_trace_file(self._json_path) - total_events = json_data.get("traceEvents", []) - for event in total_events: - if event.get("name", "").lower() == "[memory]": - memory_events.append(event) - memory_events.sort(key=lambda x: float(x.get("ts", 0))) - addr_dict = {} - for memory_event in memory_events: - args = memory_event.get("args", {}) - if args.get("Device Type", -1) != 1: - continue - allocate_bytes = args.get("Bytes", 0) / Constant.BYTE_TO_KB - record = addr_dict.get(args.get("Addr")) - if allocate_bytes > 0: - if record: - self._memory_list.append(record) - addr_dict[args.get("Addr")] = {Constant.SIZE: allocate_bytes, - Constant.TS: float(memory_event.get("ts", 0)), - Constant.ALLOCATION_TIME: float(memory_event.get("ts", 0))} - if allocate_bytes < 0 and record: - if abs(allocate_bytes) == record.get(Constant.SIZE): - record[Constant.RELEASE_TIME] = float(memory_event.get("ts", 0)) - self._memory_list.append(record) - del addr_dict[args.get("Addr")] - - def get_communication_data(self): - self._communication_data, self._communication_task_data = [], {} - json_data = FileReader.read_trace_file(self._json_path) - total_events = json_data.get("traceEvents", []) - for data in total_events: - if data.get("cat", "").lower() == "kernel" and data.get("name", "").split("_")[0].lower() == "ncclkernel": - self._communication_data.append(data) - - -class NPUProfilingParser(ProfilingParser): - def __init__(self, args: any, path_dict: str): - super().__init__(args, path_dict) - self._json_path = path_dict.get(Constant.TRACE_PATH) - self._memory_data_path = path_dict.get(Constant.MEMORY_DATA_PATH) - - def get_torch_op_data(self): - torch_op_list = [] - json_data = FileReader.read_trace_file(self._json_path) - for event in json_data: - if event.get("cat", "").lower() == "cpu_op": - torch_op_list.append(event) - self._torch_op_data = torch_op_list - - def get_kernel_dict(self): - flow_kernel_dict = {} - json_data = FileReader.read_trace_file(self._json_path) - flow_cat = "async_npu" - - flow_start_dict, flow_end_dict, kernel_dict = {}, {}, {} - for event in json_data: - if event.get("cat", "") == flow_cat and event.get("ph") == "s": - flow_start_dict[event.get("id")] = event - elif event.get("cat", "") == flow_cat and event.get("ph") == "f": - flow_end_dict[event.get("id")] = event - elif event.get("ph") == "X" and event.get("cat", "") != 'cpu_op': - kernel_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), float(event.get("ts")))] = event - - for flow_id, start_flow in flow_start_dict.items(): - end_flow = flow_end_dict.get(flow_id) - if end_flow is None: - continue - kernel_event = kernel_dict.get( - "{}-{}-{}".format(end_flow.get("pid"), end_flow.get("tid"), float(end_flow.get("ts")))) - if kernel_event is None: - continue - flow_kernel_dict.setdefault(float(start_flow.get("ts")), []).append(KernelEvent(kernel_event, Constant.NPU)) - self._kernel_dict = flow_kernel_dict - - def get_memory_list(self): - self._memory_list = [] - enqueue_dict, dequeue_data = {}, [] - json_data = FileReader.read_trace_file(self._json_path) - for data in json_data: - if data.get("cat", "").lower() == "enqueue": - enqueue_dict[data.get("args", {}).get("correlation_id", "")] = data - elif data.get("cat", "").lower() == "dequeue": - dequeue_data.append(data) - - if not self._memory_data_path: - return - memory_data = FileReader.read_csv_file(self._memory_data_path) - for data in memory_data: - if not data.get(Constant.ALLOCATION_TIME, 0): - continue - if "cann::" in data.get("Name", ""): - ts_time = float(data.get(Constant.ALLOCATION_TIME, 0)) - match_dequeue_data = self._match_cann_memory_data(dequeue_data, ts_time) - if match_dequeue_data is not None: - correlation_id = match_dequeue_data.get("args", {}).get("correlation_id", "") - ts = float(enqueue_dict.get(correlation_id, {}).get("ts", 0)) - self._memory_list.append({Constant.SIZE: float(data.get(Constant.SIZE, 0)), Constant.TS: ts, - Constant.NAME: data.get(Constant.NAME, ""), - Constant.ALLOCATION_TIME: float(data.get(Constant.ALLOCATION_TIME, 0)), - Constant.RELEASE_TIME: data.get(Constant.RELEASE_TIME, 0)}) - self._memory_list.append({Constant.SIZE: float(data.get(Constant.SIZE, 0)), - Constant.TS: float(data.get(Constant.ALLOCATION_TIME, 0)), - Constant.ALLOCATION_TIME: float(data.get(Constant.ALLOCATION_TIME, 0)), - Constant.RELEASE_TIME: data.get(Constant.RELEASE_TIME, 0)}) - - @classmethod - def _match_cann_memory_data(cls, dequeue_data: list, ts_time: float): - if not dequeue_data: - return None - right = len(dequeue_data) - 1 - left = 0 - while right > left: - mid = left + ceil((right - left) / 2) - if ts_time >= float(dequeue_data[mid].get("ts", 0)): - left = mid - else: - right = mid - 1 - end_time = float(dequeue_data[left].get("ts", 0)) + dequeue_data[left].get("dur", 0) - return dequeue_data[left] if end_time > ts_time else None - - def get_communication_data(self): - def get_pid(json_data): - pid = None - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_process_meta(): - continue - if trace_event.is_hccl_process(): - pid = trace_event.pid - break - return pid - - def get_tid_list(pid, tid_list, json_data): - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_thread_meta(): - continue - if trace_event.pid != pid: - continue - if trace_event.is_communication_op_thread(): - tid_list.append(trace_event.tid) - - def get_comm_data(pid, tid_list, json_data): - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_x_mode(): - continue - if trace_event.pid != pid: - continue - if trace_event.tid in tid_list: - self._communication_data.append(data) - - def get_comm_task_data(pid, tid_list, json_data): - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_x_mode(): - continue - if trace_event.pid != pid: - continue - if trace_event.tid in tid_list: - continue - ts = trace_event.start_time - for communication_op in self._communication_data: - comm_op_event = TraceEventData(communication_op) - if ts < comm_op_event.start_time or ts > comm_op_event.end_time: - continue - name_list = communication_op.get("name", "").split("_") - if len(name_list) >= 2: - self._communication_task_data.setdefault(name_list[1].lower(), []).append(data) - break - - self._communication_data, self._communication_task_data = [], {} - json_data = FileReader.read_trace_file(self._json_path) - - pid = get_pid(json_data) - if pid is None: - return - - tid_list = [] - get_tid_list(pid, tid_list, json_data) - if not tid_list: - return - - get_comm_data(pid, tid_list, json_data) - if not self._communication_data: - return - - get_comm_task_data(pid, tid_list, json_data) diff --git a/profiler/compare_tools/utils/torch_op_node.py b/profiler/compare_tools/utils/torch_op_node.py index c62526c766332257df3c739118545dccbe34a973..b040e597fd5e074c6fcb95e955e24fd7397f8764 100644 --- a/profiler/compare_tools/utils/torch_op_node.py +++ b/profiler/compare_tools/utils/torch_op_node.py @@ -1,11 +1,12 @@ from math import ceil -from utils.compare_event import MemoryEvent +from compare_bean.origin_data_bean.compare_event import MemoryEvent +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean from utils.constant import Constant class TorchOpNode: - def __init__(self, event=None, parent_node=None): + def __init__(self, event=TraceEventBean, parent_node=None): self._event = event self._parent_node = parent_node self._child_nodes = [] @@ -15,31 +16,31 @@ class TorchOpNode: @property def start_time(self): - return float(self._event.get("ts", 0)) + return self._event.start_time @property def end_time(self): - return float(self._event.get("ts", 0)) + self._event.get("dur", 0) + return self._event.end_time @property def name(self): - return str(self._event.get("name", Constant.NA)) + return self._event.name @property def input_shape(self): - return str(self._event.get("args", {}).get("Input Dims", Constant.NA)) + return str(self._event.args.get("Input Dims", Constant.NA)) @property def origin_input_shape(self): - return self._event.get("args", {}).get("Input Dims", Constant.NA) + return self._event.args.get("Input Dims", Constant.NA) @property def input_type(self): - return str(self._event.get("args", {}).get("Input type", Constant.NA)) + return str(self._event.args.get("Input type", Constant.NA)) @property def call_stack(self): - return str(self._event.get("args", {}).get("Call stack", Constant.NA)) + return str(self._event.args.get("Call stack", Constant.NA)) @property def parent(self): @@ -65,29 +66,20 @@ class TorchOpNode: self._child_nodes.append(child_node) def set_kernel_list(self, kernel_list: list): + if not kernel_list: + return self._kernel_list.extend(kernel_list) + kernel_num = len(kernel_list) + cur_node = self + while cur_node._parent_node: + cur_node._kernel_num += kernel_num + cur_node = cur_node._parent_node - def add_kernel_num(self, kernel_num: int): - self._kernel_num += kernel_num - - def set_memory_allocated(self, memory_allocated: dict): - self._memory_allocated_list.append(MemoryEvent(memory_allocated, self.name)) + def set_memory_allocated(self, memory_allocated: MemoryEvent): + self._memory_allocated_list.append(memory_allocated) def is_step_profiler(self) -> bool: return self.name.find("ProfilerStep#") != -1 def get_op_info(self) -> list: return [self.name, self.input_shape, self.input_type, self.call_stack] - - def match_child_node(self, ts_time: float) -> any: - if not self._child_nodes: - return None - right = len(self._child_nodes) - 1 - left = 0 - while right > left: - mid = left + ceil((right - left) / 2) - if ts_time >= self._child_nodes[mid].start_time: - left = mid - else: - right = mid - 1 - return self._child_nodes[left] if self._child_nodes[left].end_time > ts_time else None diff --git a/profiler/compare_tools/utils/trace_event_data.py b/profiler/compare_tools/utils/trace_event_data.py deleted file mode 100644 index ff70b230e740c5808d201577028d8f434da3e9c5..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/utils/trace_event_data.py +++ /dev/null @@ -1,42 +0,0 @@ -class TraceEventData: - - def __init__(self, event: dict): - self._event = event - - @property - def pid(self) -> int: - return self._event.get("pid", "") - - @property - def tid(self) -> int: - return self._event.get("tid", "") - - @property - def process_name(self) -> int: - return self._event.get("args", {}).get("name", "") - - @property - def start_time(self) -> float: - return float(self._event.get("ts", 0)) - - @property - def end_time(self) -> float: - return float(self._event.get("ts", 0)) + self._event.get("dur", 0) - - def is_m_mode(self) -> bool: - return self._event.get("ph", "") == "M" - - def is_x_mode(self) -> bool: - return self._event.get("ph", "") == "X" - - def is_process_meta(self) -> bool: - return self.is_m_mode() and self._event.get("name", "") == "process_name" - - def is_thread_meta(self) -> bool: - return self.is_m_mode() and self._event.get("name", "") == "thread_name" - - def is_communication_op_thread(self) -> bool: - return self._event.get("args", {}).get("name", "").find("Communication") != -1 - - def is_hccl_process(self) -> bool: - return self.process_name == "HCCL" diff --git a/profiler/compare_tools/utils/tree_builder.py b/profiler/compare_tools/utils/tree_builder.py index fc827f0fb853f9f41b135b8826aeadbb02b27564..e1ff3f22e52b5a728af7d1af627b433a513984e0 100644 --- a/profiler/compare_tools/utils/tree_builder.py +++ b/profiler/compare_tools/utils/tree_builder.py @@ -1,70 +1,30 @@ from queue import Queue -from typing import Optional, Dict, List -from utils.constant import Constant from utils.torch_op_node import TorchOpNode class TreeBuilder: @classmethod - def build_tree(cls, event_list: list) -> TorchOpNode: + def build_tree(cls, event_list: list, kernel_dict: dict, memory_list: list) -> TorchOpNode: root_node = TorchOpNode() - event_list.sort(key=lambda x: float(x.get("ts", 0))) + event_list.extend(memory_list) + event_list.sort(key=lambda x: x.start_time) last_node = root_node for event in event_list: while last_node: - if last_node == root_node or float(event.get("ts", 0)) < last_node.end_time: + if last_node != root_node and event.start_time > last_node.end_time: + last_node = last_node.parent + continue + if event.is_torch_op: tree_node = TorchOpNode(event, last_node) last_node.add_child_node(tree_node) last_node = tree_node - break - last_node = last_node.parent - return root_node - - @classmethod - def update_tree_node( - cls, - root_node: TorchOpNode, - flow_kernel_dict: Optional[Dict] = None, - memory_allocated_list: Optional[List] = None, - ): - def set_kernel_helper(node_queue, ts, kernel_num, kernel_list): - while not node_queue.empty(): - tree_node = node_queue.get() - tree_node.add_kernel_num(kernel_num) - matched_child_node = tree_node.match_child_node(ts) - if matched_child_node: - node_queue.put(matched_child_node) - else: - tree_node.set_kernel_list(kernel_list) - - flow_kernel_dict = flow_kernel_dict if flow_kernel_dict else {} - memory_allocated_list = memory_allocated_list if memory_allocated_list else [] - - if flow_kernel_dict: - for ts, kernel_list in flow_kernel_dict.items(): - matched_child_node = root_node.match_child_node(ts) - if not matched_child_node: - return - kernel_num = len(kernel_list) - node_queue = Queue() - node_queue.put(matched_child_node) - set_kernel_helper(node_queue, ts, kernel_num, kernel_list) - - for memory_allocated in memory_allocated_list: - ts = memory_allocated.get(Constant.TS) - matched_child_node = root_node.match_child_node(ts) - if not matched_child_node: - continue - node_queue = Queue() - node_queue.put(matched_child_node) - while not node_queue.empty(): - tree_node = node_queue.get() - matched_child_node = tree_node.match_child_node(ts) - if matched_child_node: - node_queue.put(matched_child_node) + tree_node.set_kernel_list(kernel_dict.get(event.start_time, [])) else: - tree_node.set_memory_allocated(memory_allocated) + event.set_name(last_node.name) + last_node.set_memory_allocated(event) + break + return root_node @classmethod def get_total_kernels(cls, root_node: TorchOpNode) -> list: diff --git a/profiler/compare_tools/view/base_view.py b/profiler/compare_tools/view/base_view.py new file mode 100644 index 0000000000000000000000000000000000000000..d18980b7de2098b5a1015d14fbd1b5be91a23bfc --- /dev/null +++ b/profiler/compare_tools/view/base_view.py @@ -0,0 +1,10 @@ +from abc import ABC, abstractmethod + + +class BaseView(ABC): + def __init__(self, data_dict: dict): + self._data_dict = data_dict + + @abstractmethod + def generate_view(self): + raise NotImplementedError("Function generate_view need to be implemented.") diff --git a/profiler/compare_tools/view/excel_view.py b/profiler/compare_tools/view/excel_view.py index 457012bbc8efd6f54780e591d0c6446f54a2e359..864a136a3fe55298ed5ae04722899a26af269b8d 100644 --- a/profiler/compare_tools/view/excel_view.py +++ b/profiler/compare_tools/view/excel_view.py @@ -2,19 +2,21 @@ import os from xlsxwriter import Workbook +from view.base_view import BaseView from view.work_sheet_creator import WorkSheetCreator from utils.constant import Constant -class ExcelViewer: +class ExcelView(BaseView): - def __init__(self, data_dict: dict, file_path: str): - self._data_dict = data_dict + def __init__(self, data_dict: dict, file_path: str, args: any): + super().__init__(data_dict) self._file_path = file_path + self._args = args def generate_view(self): workbook = Workbook(self._file_path) for sheet_name, data in self._data_dict.items(): - WorkSheetCreator(workbook, sheet_name, data).create_sheet() + WorkSheetCreator(workbook, sheet_name, data, self._args).create_sheet() workbook.close() os.chmod(self._file_path, Constant.FILE_AUTHORITY) diff --git a/profiler/compare_tools/view/screen_view.py b/profiler/compare_tools/view/screen_view.py new file mode 100644 index 0000000000000000000000000000000000000000..9c256ac3ab975f1d1ef40f48db37bc7b5bf4c4ff --- /dev/null +++ b/profiler/compare_tools/view/screen_view.py @@ -0,0 +1,19 @@ +from prettytable import PrettyTable + +from view.base_view import BaseView + + +class ScreenView(BaseView): + def __init__(self, data_dict: dict): + super().__init__(data_dict) + + def generate_view(self): + for sheet_name, data in self._data_dict.items(): + if not data.get("rows", []): + return + table = PrettyTable() + table.title = sheet_name + table.field_names = data.get("headers", []) + for row in data.get("rows", []): + table.add_row(row) + print(table) diff --git a/profiler/compare_tools/view/work_sheet_creator.py b/profiler/compare_tools/view/work_sheet_creator.py index 909d4445315b4bc6d2d2773a3e5c0ce567edf244..ef7f8deeddaaf165c8297497d8fb716cdd8e75f3 100644 --- a/profiler/compare_tools/view/work_sheet_creator.py +++ b/profiler/compare_tools/view/work_sheet_creator.py @@ -1,67 +1,52 @@ from xlsxwriter import Workbook -from utils.args_manager import ArgsManager -from utils.constant import Constant -from utils.excel_config import ExcelConfig +from utils.excel_config import ExcelConfig, CellFormatType class WorkSheetCreator: - def __init__(self, work_book: Workbook, sheet_name: str, data: list): + def __init__(self, work_book: Workbook, sheet_name: str, data: dict, args: any): self._work_book = work_book self._sheet_name = sheet_name self._data = data + self._args = args self._work_sheet = None self._row_id = 1 + self._field_format = {} + self._diff_ratio_index = None + self._col_ids = "ABCDEFGHIJKLMNOPQRSTUVW" def create_sheet(self): + if not self._data.get("rows", []): + return self._work_sheet = self._work_book.add_worksheet(self._sheet_name) self._write_headers() self._write_data() def _write_headers(self): - header_format = self._work_book.add_format( - {"font_name": "Arial", 'font_size': 11, 'fg_color': Constant.BLUE_COLOR, 'align': 'left', - 'valign': 'vcenter', 'bold': True, 'border': True}) - headers = ExcelConfig.HEADERS.get(self._sheet_name, []) - overhead = ExcelConfig.OVERHEAD.get(self._sheet_name, []) + header_format = self._work_book.add_format(CellFormatType.BLUE_BOLD) + overhead = self._data.get("overhead", []) if overhead: - base_path = f"Base Profiling: {ArgsManager().base_profiling_path}" + base_path = f"Base Profiling: {self._args.base_profiling_path}" self._work_sheet.merge_range(overhead[0], base_path, header_format) - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - comparison_path = "Same To Base Profiling" - else: - comparison_path = f"Comparison Profiling: {ArgsManager().comparison_profiling_path}" + comparison_path = f"Comparison Profiling: {self._args.comparison_profiling_path}" self._work_sheet.merge_range(overhead[1], comparison_path, header_format) self._row_id += 2 - for index, header in enumerate(headers): - column_width = ExcelConfig.COLUMNS.get(header, 20) - col_id = ExcelConfig.COL_IDS[index] - self._work_sheet.set_column(f"{col_id}:{col_id}", column_width) - self._work_sheet.write(f"{col_id}{self._row_id}", header, header_format) + for index, header in enumerate(self._data.get("headers")): + col_id = self._col_ids[index] + self._work_sheet.set_column(f"{col_id}:{col_id}", header.get("width")) + self._work_sheet.write(f"{col_id}{self._row_id}", header.get("name"), header_format) + self._field_format[index] = self._work_book.add_format(header.get("type")) + if header.get("name") == ExcelConfig.DIFF_RATIO: + self._diff_ratio_index = index self._row_id += 1 def _write_data(self): - default_format = self._work_book.add_format(ExcelConfig.FORMAT.get("int")) - red_ratio_format = self._work_book.add_format(ExcelConfig.FORMAT.get("ratio_red")) - headers = ExcelConfig.HEADERS.get(self._sheet_name, []) - field_format = {} - diff_ratio_index = None - for index, header in enumerate(headers): - format_dict = ExcelConfig.FORMAT.get(ExcelConfig.FIELD_TYPE_MAP.get(header, "int")) - if not format_dict: - format_dict = ExcelConfig.FORMAT.get("int") - field_format[index] = self._work_book.add_format(format_dict) - if header == ExcelConfig.DIFF_RATIO: - diff_ratio_index = index - order_id = 1 - for data in self._data: - self._work_sheet.write(f"{ExcelConfig.COL_IDS[0]}{self._row_id}", order_id, default_format) + red_ratio_format = self._work_book.add_format(CellFormatType.RED_RATIO) + for data in self._data.get("rows"): for index, cell_data in enumerate(data): - data_index = index + 1 - cell_format = field_format.get(data_index, default_format) - if data_index == diff_ratio_index and cell_data and cell_data > 1: + cell_format = self._field_format.get(index) + if index == self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data - self._work_sheet.write(f"{ExcelConfig.COL_IDS[data_index]}{self._row_id}", cell_data, cell_format) - order_id += 1 + self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) self._row_id += 1 diff --git a/profiler/merge_profiling_timeline/main.py b/profiler/merge_profiling_timeline/main.py index 3df7fc038609c1a4568d4591b96d8122f34f33b1..678f5d5a8f7be8c45d6c4935f2941bd716d77a78 100644 --- a/profiler/merge_profiling_timeline/main.py +++ b/profiler/merge_profiling_timeline/main.py @@ -137,7 +137,7 @@ def merge_timeline_custom(args): timeline_files = natural_sort(os.listdir(args.input)) timeline_files_dict = {} for idx, timeline_file in enumerate(timeline_files): - timeline_files_dict[idx] = (os.path.join(args.input, timeline_file),0) + timeline_files_dict[idx] = os.path.join(args.input, timeline_file) # 合并部分profiling items process_list = args.items.split(",") if args.items else None merge_timeline_events(timeline_files_dict, process_list) @@ -183,8 +183,8 @@ def merge_timeline_events(timeline_file_dict, process_list): continue # convert tid to int - if isinstance(event.get("tid"), str): - event["tid"] = int(''.join(x for x in event["tid"] if x.isdigit())) + if not isinstance(event['tid'], int): + print(f"[WARNNING] {event['tid']} is not int type") # 进程名加上rank_id区分不同rank if event.get("name") == "process_name" and event.get("ph") == "M": @@ -204,7 +204,8 @@ def merge_timeline_events(timeline_file_dict, process_list): print(f"File {out_path} existed before and is now overwritten.") os.remove(out_path) try: - with open(out_path, 'w') as f: + # 设置文件权限为640,安全考虑 + with os.fdopen(os.open(out_path, os.O_WRONLY | os.O_CREAT, 0o640), 'w') as f: json.dump(new_events, f) except FileNotFoundError: print(f"Param -o (output path) is not exists, please check it.") diff --git a/profiler/test/resource/advisor/cluster_analysis_output/cluster_communication.json b/profiler/test/resource/advisor/cluster_analysis_output/cluster_communication.json new file mode 100644 index 0000000000000000000000000000000000000000..bb47a7c19807ffb70f9ac5e7bf7561fb6eecbc75 --- /dev/null +++ b/profiler/test/resource/advisor/cluster_analysis_output/cluster_communication.json @@ -0,0 +1 @@ +{"(0, 1)": {"step": {"hcom_broadcast__501_0@6297232067196334501": {"0": {"Communication Time Info": {"Start Timestamp(us)": 1695054582608213.0,"Elapse Time(ms)": 121.73694384375,"Transit Time(ms)": 0.0080599375,"Wait Time(ms)": 115.96064153125,"Synchronization Time(ms)": 102.699834,"Idle Time(ms)": 5.768242375,"Wait Time Ratio": 0.9999,"Synchronization Time Ratio": 0.9999},"Communication Bandwidth Info": {"RDMA": {"Transit Size(MB)": 0,"Transit Time(ms)": 0,"Bandwidth(GB/s)": 0,"Large Packet Ratio": 0,"Size Distribution": {}},"HCCS": {"Transit Size(MB)": 0.06393599999999998,"Transit Time(ms)": 0.02366978125,"Bandwidth(GB/s)": 2.7012,"Large Packet Ratio": 0.0,"Size Distribution": {"0.003072": [18,0.0202498125],"0.002944": [2,0.00230996875],"0.002752": [1,0.00111]}},"PCIE": {"Transit Size(MB)": 0.021376,"Transit Time(ms)": 0.011499890625,"Bandwidth(GB/s)": 1.8588,"Large Packet Ratio": 0.0,"Size Distribution": {"0.003072": [6,0.009869906250000001],"0.002944": [1,0.001629984375]}},"SDMA": {"Transit Size(MB)": 0.08531199999999997,"Transit Time(ms)": 0.035169671875,"Bandwidth(GB/s)": 2.4257,"Large Packet Ratio": 0,"Size Distribution": {}}}},"1": {"Communication Time Info": {"Start Timestamp(us)": 1695054582707330.2,"Elapse Time(ms)": 20.798642234375,"Transit Time(ms)": 0.01630984375,"Wait Time(ms)": 0.00015000000000000001,"Synchronization Time(ms)": 1e-05,"Idle Time(ms)": 20.782182390624996,"Wait Time Ratio": 0.0091,"Synchronization Time Ratio": 0.0006},"Communication Bandwidth Info": {"RDMA": {"Transit Size(MB)": 0,"Transit Time(ms)": 0,"Bandwidth(GB/s)": 0,"Large Packet Ratio": 0,"Size Distribution": {}},"HCCS": {"Transit Size(MB)": 0.10380800000000005,"Transit Time(ms)": 0.03860956250000001,"Bandwidth(GB/s)": 2.6887,"Large Packet Ratio": 0.0,"Size Distribution": {"0.003072": [29,0.033139625000000006],"0.002944": [5,0.005469937500000001]}},"PCIE": {"Transit Size(MB)": 0.030399999999999996,"Transit Time(ms)": 0.01630984375,"Bandwidth(GB/s)": 1.8639,"Large Packet Ratio": 0.0,"Size Distribution": {"0.003072": [9,0.014699859374999999],"0.002752": [1,0.001609984375]}},"SDMA": {"Transit Size(MB)": 0.13420800000000005,"Transit Time(ms)": 0.05491940625000001,"Bandwidth(GB/s)": 2.4437,"Large Packet Ratio": 0,"Size Distribution": {}}}}},"hcom_broadcast__501_1@6297232067196334501": {"0": {"Communication Time Info": {"Start Timestamp(us)": 1695054582728707.2,"Elapse Time(ms)": 27.107999203125,"Transit Time(ms)": 0,"Wait Time(ms)": 27.096639281249992,"Synchronization Time(ms)": 27.096639281249992,"Idle Time(ms)": 0.011359921875008894,"Wait Time Ratio": 1.0,"Synchronization Time Ratio": 1.0},"Communication Bandwidth Info": {"RDMA": {"Transit Size(MB)": 0,"Transit Time(ms)": 0,"Bandwidth(GB/s)": 0,"Large Packet Ratio": 0,"Size Distribution": {}},"HCCS": {"Transit Size(MB)": 5.6e-05,"Transit Time(ms)": 0.001129984375,"Bandwidth(GB/s)": 0.0496,"Large Packet Ratio": 0.0,"Size Distribution": {"5.6e-05": [1,0.001129984375]}},"PCIE": {"Transit Size(MB)": 0,"Transit Time(ms)": 0,"Bandwidth(GB/s)": 0,"Large Packet Ratio": 0,"Size Distribution": {}},"SDMA": {"Transit Size(MB)": 5.6e-05,"Transit Time(ms)": 0.001129984375,"Bandwidth(GB/s)": 0.0496,"Large Packet Ratio": 0,"Size Distribution": {}}}},"1": {"Communication Time Info": {"Start Timestamp(us)": 1695054582738906.0,"Elapse Time(ms)": 17.367596484375,"Transit Time(ms)": 0,"Wait Time(ms)": 0.0002,"Synchronization Time(ms)": 0.0002,"Idle Time(ms)": 17.367396484375,"Wait Time Ratio": 1.0,"Synchronization Time Ratio": 1.0},"Communication Bandwidth Info": {"RDMA": {"Transit Size(MB)": 0,"Transit Time(ms)": 0,"Bandwidth(GB/s)": 0,"Large Packet Ratio": 0,"Size Distribution": {}},"HCCS": {"Transit Size(MB)": 0.000184,"Transit Time(ms)": 0.0020599843749999997,"Bandwidth(GB/s)": 0.0893,"Large Packet Ratio": 0.0,"Size Distribution": {"5.6e-05": [1,0.00114],"0.000128": [1,0.000919984375]}},"PCIE": {"Transit Size(MB)": 0,"Transit Time(ms)": 0,"Bandwidth(GB/s)": 0,"Large Packet Ratio": 0,"Size Distribution": {}},"SDMA": {"Transit Size(MB)": 0.000184,"Transit Time(ms)": 0.0020599843749999997,"Bandwidth(GB/s)": 0.0893,"Large Packet Ratio": 0,"Size Distribution": {}}}}}}}} \ No newline at end of file diff --git a/profiler/test/resource/advisor/cluster_analysis_output/cluster_step_trace_time.csv b/profiler/test/resource/advisor/cluster_analysis_output/cluster_step_trace_time.csv new file mode 100644 index 0000000000000000000000000000000000000000..964f26b3c08d941814d81253c334d5a56bc3f559 --- /dev/null +++ b/profiler/test/resource/advisor/cluster_analysis_output/cluster_step_trace_time.csv @@ -0,0 +1,10 @@ +Step,Type,Index,Computing,Communication(Not Overlapped),Overlapped,Communication,Free,Stage,Bubble,Communication(Not Overlapped and Exclude Receive) +,rank,0,80309.68717187493,683731.2897031249,5524.544625000097,689255.834328125,990605.1042031233,1754645.75,0.0,683731.2897031249 +,rank,1,80435.74650000008,133385.97745312497,76.47925000000396,133462.45670312498,1488610.0587500026,1702431.75,0.0,133385.97745312497 +,rank,2,80335.81743750002,530279.2325156251,2056.8394374999916,532336.0719531251,1110332.1177812554,1720947.0,0.0,530279.2325156251 +,rank,3,80077.26998437475,265574.66,1775.3800000000629,267350.04000000004,1366552.5200156313,1712204.5,0.0,265574.66 +,rank,4,81662.19999999994,478477.3900000001,2224.999999999942,480702.39,1130793.2500000068,1690932.75,0.0,478477.3900000001 +,rank,5,81367.62001562494,756374.86,7225.910000000033,763600.77,892836.8999843737,1730579.25,0.0,756374.86 +,rank,6,81359.02999999997,244123.84000000003,1568.109999999986,245691.95,1390633.03,1716115.75,0.0,244123.84000000003 +,rank,7,81416.40999999992,539037.92,2378.3699999999953,541416.29,1061577.0900000043,1682031.25,0.0,539037.92 +,stage,"(0, 1, 2, 3, 4, 5, 6, 7)",81662.19999999994,756374.86,7225.910000000033,763600.77,1488610.0587500026,1754645.75,0,756374.86 diff --git a/profiler/test/resource/event_list.json b/profiler/test/resource/event_list.json new file mode 100644 index 0000000000000000000000000000000000000000..653c847f1cf060ce18c5dacee212947b0c00d0f8 --- /dev/null +++ b/profiler/test/resource/event_list.json @@ -0,0 +1,26 @@ +[ + { + "pid": 0, + "tid": 0, + "args": { + "Input Dims": [[1,1], [1,1]], + "name": 0 + }, + "ts": 0, + "dur": 1, + "ph": "M", + "name": "process_name" + }, + { + "pid": 1, + "tid": 1, + "args": { + "Input Dims": [[1,1], [1,1]], + "name": 1 + }, + "ts": 1, + "dur": 1, + "ph": "M", + "name": "process_name" + } +] diff --git a/profiler/test/resource/test.csv b/profiler/test/resource/test.csv new file mode 100644 index 0000000000000000000000000000000000000000..33a231187a1b22ded34343c29a16ff5d358cd025 --- /dev/null +++ b/profiler/test/resource/test.csv @@ -0,0 +1,9 @@ +Name,Type,Accelerator Core,Start Time(us),Duration(us),Wait Time(us),Block Dim +Cast16,N/A,N/A,1702866902796675,932.0498,0,0 +trans_Cast_19,N/A,N/A,1702866902996859,5.86,199251.9502,0 +trans_TransData_20,N/A,N/A,1702866902996917,7.77,52.14,0 +trans_Cast_21,N/A,N/A,1702866902996929,4.35,4.23,0 +Conv2D17,N/A,N/A,1702866902996956.8,24.26,23.4,0 +trans_TransData_22,N/A,N/A,1702866902996981,23.98,0,0 +Add18,N/A,N/A,1702866903139579,1.22,142574.02,0 +atomic_memset-1_18_186609_1,N/A,N/A,1702866903316761.5,1.36,177181.28,0 diff --git a/profiler/test/run_ut.py b/profiler/test/run_ut.py index 50cc98a643f47173b1a48ee6d353ac942f2a0e7d..ee27abaace177c7eab9021bcd3fcf51f0368c832 100644 --- a/profiler/test/run_ut.py +++ b/profiler/test/run_ut.py @@ -5,14 +5,23 @@ import sys def set_python_path(): - cluster_analyse_root = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse") - compare_tools_root = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "compare_tools") + cluster_analyse_root = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse") + compare_tools_root = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "compare_tools") + advisor_root = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "advisor") + advisor_backend_root = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "advisor", "advisor_backend") + # Update PYTHONPATH python_path = os.environ.get("PYTHONPATH", "") if not python_path: python_path += cluster_analyse_root else: python_path += f":{cluster_analyse_root}" python_path += f":{compare_tools_root}" + python_path += f":{advisor_root}" + python_path += f":{advisor_backend_root}" os.environ["PYTHONPATH"] = python_path diff --git a/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_cluster_advice_base.py b/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_cluster_advice_base.py new file mode 100644 index 0000000000000000000000000000000000000000..6235c06efb37a553ddb61a60a7a705fdb603edea --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_cluster_advice_base.py @@ -0,0 +1,72 @@ +import os +import shutil +import unittest +from unittest import mock +from unittest.mock import MagicMock + +from advisor_backend.cluster_advice.cluster_advice_base import ClusterAdviceBase + + +class MockChildClusterAdvice(ClusterAdviceBase): + + def __init__(self, collection_path: str): + super().__init__(collection_path) + + def run(self): + return True + + def output(self): + return True + + +class TestClusterAdviceBase(unittest.TestCase): + + def setUp(self): + self.tmp_dir = "./tmp_dir" + os.makedirs(self.tmp_dir) + + def tearDown(self): + shutil.rmtree(self.tmp_dir) + + def test_compute_max_gap_ratio_with_zero_mean(self): + result = ClusterAdviceBase.compute_max_gap_ratio([1, 2], 0) + self.assertEqual(0, result) + + def test_compute_max_gap_ratio_with_normal_input(self): + result = ClusterAdviceBase.compute_max_gap_ratio([3, 1], 2.0) + self.assertEqual(1.0, result) + + def test_compute_max_gap_ratio_with_abnormal_input(self): + with self.assertRaises(TypeError): + ClusterAdviceBase.compute_max_gap_ratio(["name", "age"], 2.0) + + def test_path_check_with_output_path(self): + analysis_output = os.path.join(self.tmp_dir, "cluster_analysis_output") + os.makedirs(analysis_output) + mock_inst = MockChildClusterAdvice(self.tmp_dir) + mock_inst.cluster_analyze = MagicMock(name="cluster_analyze") + mock_inst.path_check() + mock_inst.cluster_analyze.assert_not_called() + + def test_path_check_without_output_path(self): + mock_inst = MockChildClusterAdvice(self.tmp_dir) + mock_inst.cluster_analyze = MagicMock(name="cluster_analyze") + mock_inst.path_check() + mock_inst.cluster_analyze.assert_called_once() + + def test_cluster_analyze_normal(self): + mock_inst = MockChildClusterAdvice(self.tmp_dir) + with mock.patch("advisor_backend.cluster_advice.cluster_advice_base.Interface") as mock_if: + mock_if_inst = mock_if.return_value + mock_if_inst.run = MagicMock(name="run") + mock_inst.cluster_analyze() + mock_if_inst.run.assert_called_once() + + def test_cluster_analyze_abnormal(self): + mock_inst = MockChildClusterAdvice(self.tmp_dir) + with self.assertRaises(ValueError): + with mock.patch("advisor_backend.cluster_advice.cluster_advice_base.Interface") as mock_if: + mock_if_inst = mock_if.return_value + mock_if_inst.run = mock.Mock(name="run", side_effect=Exception('Error!')) + mock_inst.cluster_analyze() + mock_if_inst.run.assert_called_once() diff --git a/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_kernel_cluster_advice.py b/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_kernel_cluster_advice.py new file mode 100644 index 0000000000000000000000000000000000000000..0509b197cafae23e4f53ed0b300c1934a66b1197 --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_kernel_cluster_advice.py @@ -0,0 +1,104 @@ +import os +import stat +import shutil +import unittest +from unittest import mock +from unittest.mock import MagicMock + +from common_func.constant import Constant +from advisor_backend.cluster_advice.kernel_cluster_advice import KernelClusterAdvice + + +class TestClusterAdviceBase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.tmp_dir = './tmp_dir' + cls.data_map_normal = { + 0: os.path.join(cls.tmp_dir, 'rank_0'), + 1: os.path.join(cls.tmp_dir, 'rank_1') + } + cls.data_map_abnormal = { + 2: os.path.join(cls.tmp_dir, 'rank_2') + } + ascend_output_0 = os.path.join(cls.tmp_dir, 'rank_0', Constant.SINGLE_OUTPUT) + os.makedirs(ascend_output_0) + ascend_output_1 = os.path.join(cls.tmp_dir, 'rank_1', Constant.SINGLE_OUTPUT) + os.makedirs(ascend_output_1) + ascend_output_2 = os.path.join(cls.tmp_dir, 'rank_2', Constant.SINGLE_OUTPUT) + os.makedirs(ascend_output_2) + # write data to csv file + flags = os.O_WRONLY | os.O_CREAT + mode = stat.S_IWUSR | stat.S_IRUSR + with os.fdopen(os.open( + os.path.join(ascend_output_0, Constant.KERNEL_DETAILS_CSV), flags, mode), 'w') as fp: + fp.write('Step Id,Name,Input Shapes,Input Data Types,Output Shapes,Duration(us)\n') + fp.write('0,ZerosLike46,"""544404480""",FLOAT16,"""544404480""",10.0\n') + fp.write('0,ZerosLike46,"""544404480""",FLOAT16,"""544404480""",20.0\n') + with os.fdopen(os.open( + os.path.join(ascend_output_1, Constant.KERNEL_DETAILS_CSV), flags, mode), 'w') as fp: + fp.write('Step Id,Name,Input Shapes,Input Data Types,Output Shapes,Duration(us)\n') + fp.write('0,Mul85,"""4,1024,12288;4,1024,1""",FLOAT16,"""4,1024,12288""",30.0\n') + fp.write('0,Mul85,"""4,1024,12288;4,1024,1""",FLOAT16,"""4,1024,12288""",40.0\n') + cls.all_kernel_data = { + 'rank id': {0: 0, 1: 0, 2: 1, 3: 1}, + 'Name': {0: 'ZerosLike46', 1: 'ZerosLike46', 2: 'Mul85', 3: 'Mul85'}, + 'Input Shapes': {0: '"544404480"', 1: '"544404480"', 2: '"4,1024,12288;4,1024,1"', 3: '"4,1024,12288;4,1024,1"'}, + 'Input Data Types': {0: 'FLOAT16', 1: 'FLOAT16', 2: 'FLOAT16', 3: 'FLOAT16'}, + 'Output Shapes': {0: '"544404480"', 1: '"544404480"', 2: '"4,1024,12288"', 3: '"4,1024,12288"'}, + 'Duration(us)': {0: 10.0, 1: 20.0, 2: 30.0, 3: 40.0} + } + cls.expect_result = { + 'rank id': {0: 0, 1: 1}, + 'Name': {0: 'ZerosLike46', 1: 'Mul85'}, + 'Input Shapes': {0: '"544404480"', 1: '"4,1024,12288;4,1024,1"'}, + 'Input Data Types': {0: 'FLOAT16', 1: 'FLOAT16'}, + 'Output Shapes': {0: '"544404480"', 1: '"4,1024,12288"'}, + 'Duration(us)_mean': {0: 15.0, 1: 35.0}, + 'Duration(us)_var': {0: 50.0, 1: 50.0}, + 'Duration(us)_max': {0: 20.0, 1: 40.0}, + 'Duration(us)_min': {0: 10.0, 1: 30.0}, + 'Duration(us)_count': {0: 2, 1: 2}, + 'Duration(us)_sum': {0: 30.0, 1: 70.0} + } + with os.fdopen(os.open( + os.path.join(ascend_output_2, Constant.KERNEL_DETAILS_CSV), flags, mode), 'w') as fp: + fp.write('Worng Title\n') + fp.write('0\n') + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.tmp_dir) + + def test_run(self): + advice_inst = KernelClusterAdvice(self.tmp_dir) + advice_inst.load_kernel_details_data = MagicMock(name="load_kernel_details_data") + advice_inst.calculate_data = MagicMock(name="calculate_data") + advice_inst.run() + advice_inst.load_kernel_details_data.assert_called_once() + advice_inst.calculate_data.assert_called_once() + + def load_kernel_details_data_with_normal_data(self): + advice_inst = KernelClusterAdvice(self.tmp_dir) + with mock.patch("cluster_data_preprocess.pytorch_data_preprocessor.PytorchDataPreprocessor") as py_mock, \ + mock.patch("common_func.path_manager.PathManager.check_path_readable"): + py_mock_inst = py_mock.return_valuee + py_mock_inst.get_data_map.return_value = self.data_map_normal + advice_inst.load_kernel_details_data() + self.assertEqual(self.all_kernel_data, advice_inst.all_kernel_data.to_dict()) + + def load_kernel_details_data_with_abnormal_data(self): + advice_inst = KernelClusterAdvice(self.tmp_dir) + with self.assertRaises(RuntimeError): + with mock.patch("cluster_data_preprocess.pytorch_data_preprocessor.PytorchDataPreprocessor") as py_mock, \ + mock.patch("common_func.path_manager.PathManager.check_path_readable"): + py_mock_inst = py_mock.return_valuee + py_mock_inst.get_data_map.return_value = self.data_map_abnormal + advice_inst.load_kernel_details_data() + + def calculate_data(self): + advice_inst = KernelClusterAdvice(self.tmp_dir) + advice_inst.all_kernel_data = self.all_kernel_data + result = advice_inst.calculate_data() + self.assertEqual(self.expect_result, result.to_dict()) diff --git a/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_slow_link_advice.py b/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_slow_link_advice.py new file mode 100644 index 0000000000000000000000000000000000000000..79307813f5b2d9b272148a8b29e79f571cf9db1b --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_slow_link_advice.py @@ -0,0 +1,61 @@ +import unittest + +from advisor_backend.cluster_advice.slow_link_advice import SlowLinkAdvice + + +class TestSlowLinkAdvice(unittest.TestCase): + + DATA = 'data' + BOTTLENECK = 'bottleneck' + ADVICE = 'advice' + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.prof_dir = './resource/advisor' + cls.expect_data = { + 0: { + 'RDMA time(ms)': 0, + 'RDMA size(mb)': 0, + 'SDMA time(ms)': 0.08536799999999997, + 'SDMA size(mb)': 0.03629965625, + 'RDMA bandwidth(GB/s)': 0, + 'SDMA bandwidth(GB/s)': 0.4252 + }, + 1: { + 'RDMA time(ms)': 0, + 'RDMA size(mb)': 0, + 'SDMA time(ms)': 0.13439200000000004, + 'SDMA size(mb)': 0.05697939062500001, + 'RDMA bandwidth(GB/s)': 0, + 'SDMA bandwidth(GB/s)': 0.424 + } + } + cls.expect_bottleneck = 'SDMA bandwidth(GB/s): \n' \ + 'The average is 0.425, ' \ + 'while the maximum is 0.425GB/s and ' \ + 'the minimum is 0.424GB/s. ' \ + 'the difference is 0.001GB/s. \n' + + def test_compute_ratio_abnormal(self): + result = SlowLinkAdvice.compute_ratio(19.0, 0) + self.assertEqual(0, result) + + def test_load_communication_json_abnormal(self): + slow_link_inst = SlowLinkAdvice("./tmp_dir") + with self.assertRaises(RuntimeError): + result = slow_link_inst.load_communication_json() + + def test_compute_bandwidth_abnormal(self): + slow_link_inst = SlowLinkAdvice("./tmp_dir") + op_dict = {"Name": "ZhangSan"} + with self.assertRaises(ValueError): + slow_link_inst.compute_bandwidth(op_dict) + + def test_run(self): + slow_link_inst = SlowLinkAdvice(self.prof_dir) + result = slow_link_inst.run() + data = dict(result[self.DATA]) + bottleneck = result[self.BOTTLENECK] + self.assertEqual(self.expect_data, data) + self.assertEqual(self.expect_bottleneck, bottleneck) diff --git a/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_slow_rank_advice.py b/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_slow_rank_advice.py new file mode 100644 index 0000000000000000000000000000000000000000..6a45553e1eb9522acc778575e104eb8903b0f7be --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/cluster_advice/test_slow_rank_advice.py @@ -0,0 +1,42 @@ +import unittest + +from advisor_backend.cluster_advice.slow_rank_advice import SlowRankAdvice + + +class TestSlowRankAdvice(unittest.TestCase): + + DATA = 'data' + BOTTLENECK = 'bottleneck' + ADVICE = 'advice' + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.prof_dir = './resource/advisor' + cls.expect_data = { + 0: [80309.68717187493, 683731.2897031249, 990605.1042031233, 0], + 1: [80435.74650000008, 133385.97745312497, 1488610.0587500026, 0], + 2: [80335.81743750002, 530279.2325156251, 1110332.1177812554, 0], + 3: [80077.26998437475, 265574.66, 1366552.5200156313, 0], + 4: [81662.19999999994, 478477.3900000001, 1130793.2500000068, 0], + 5: [81367.62001562494, 756374.86, 892836.8999843737, 0], + 6: [81359.02999999997, 244123.84000000003, 1390633.03, 0], + 7: [81416.40999999992, 539037.92, 1061577.0900000043, 0] + } + cls.expect_bottleneck = 'Communication has some issues in the cluster, ' \ + 'because the max difference of Communication time has reached 622.989ms. \n' \ + 'Free has some issues in the cluster, ' \ + 'because the max difference of Free time has reached 595.773ms. \n' + + def test_run(self): + slow_rank_inst = SlowRankAdvice(self.prof_dir) + result = slow_rank_inst.run() + data = dict(result[self.DATA]) + bottleneck = result[self.BOTTLENECK] + self.assertEqual(self.expect_data, data) + self.assertEqual(self.expect_bottleneck, bottleneck) + + def test_load_step_time_abnormal(self): + slow_rank_inst = SlowRankAdvice("./tmp_dir") + with self.assertRaises(RuntimeError): + slow_rank_inst.load_step_time() diff --git a/profiler/test/ut/advisor/advisor_backend/compute_advice/kernel_details.csv b/profiler/test/ut/advisor/advisor_backend/compute_advice/kernel_details.csv new file mode 100644 index 0000000000000000000000000000000000000000..fad4a9e0b12daf1d897961cf34b6a679bf5ba23b --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/compute_advice/kernel_details.csv @@ -0,0 +1,592 @@ +Step Id,Model ID,Task ID,Stream ID,Name,Type,Accelerator Core,Start Time(us),Duration(us),Wait Time(us),Block Dim,Mix Block Dim,Input Shapes,Input Data Types,Input Formats,Output Shapes,Output Data Types,Output Formats,Context ID,aicore_time(us),aic_total_cycles,aic_mac_fp16_ratio,aic_mac_int8_ratio,aic_cube_fops,aic_vector_fops,aiv_time(us),aiv_total_cycles,aiv_vec_fp32_ratio,aiv_vec_fp16_ratio,aiv_vec_int32_ratio,aiv_vec_misc_ratio,aiv_cube_fops,aiv_vector_fops +1,4294967295,1259,16,ZerosLike46,ZerosLike,AI_VECTOR_CORE,1699529622790614.8,607.98,0,48,0,"""544404480""",FLOAT16,FORMAT_ND,"""544404480""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,587.05,52130051,0,0,0.0822,0,0,137176320 +1,4294967295,N/A,N/A,hcom_broadcast__445_0,hcom_broadcast_,HCCL,1699529622793969.5,308926.68,2746.77,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,N/A,N/A,hcom_broadcast__445_1,hcom_broadcast_,HCCL,1699529623106430.8,62.94,3534.57,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1265,16,Cast66,Cast,AI_VECTOR_CORE,1699529623106755.2,3.14,261.56,9,0,"""4,1025""",INT64,FORMAT_ND,"""4,1025""",INT32,FORMAT_ND,N/A,0,0,0,0,0,0,1.77,29508,0,0,0.0062,0,0,5856 +1,4294967295,1266,16,Slice67,Slice,AI_VECTOR_CORE,1699529623107140.5,1.84,382.11,4,0,"""4,1025;2;2""",INT32;INT64;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""4,1024""",INT32,FORMAT_ND,N/A,0,0,0,0,0,0,1.13,8379,0,0,0.001,0,0,256 +1,4294967295,1267,16,Slice68,Slice,AI_VECTOR_CORE,1699529623107503.5,1.84,361.16,4,0,"""4,1025;2;2""",INT32;INT64;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""4,1024""",INT32,FORMAT_ND,N/A,0,0,0,0,0,0,1.09,8072,0,0,0.001,0,0,256 +1,4294967295,1268,16,OnesLike69,OnesLike,AI_VECTOR_CORE,1699529623107787.8,7.96,282.41,48,0,"""1,1024,1024""",FLOAT,FORMAT_ND,"""1,1024,1024""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,5.9,524112,0,0,0.032,0,0,536576 +1,4294967295,1269,16,atomic_memset-1_69_2208226_1_0,MemSet,AI_VECTOR_CORE,1699529623107900.8,2.1,105.04,4,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,1.37,10118,0,0,0.0783,0,0,25344 +1,4294967295,1270,16,Tril70,Tril,AI_VECTOR_CORE,1699529623107989.5,16.88,86.65,24,0,"""1,1024,1024""",FLOAT,FORMAT_ND,"""1,1024,1024""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,13.92,617852,0,0,0.0276,0,0,545280 +1,4294967295,1271,16,OnesLike71,OnesLike,AI_VECTOR_CORE,1699529623108118.8,1.54,112.37,4,0,"""4,1024""",FLOAT,FORMAT_ND,"""4,1024""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,6648,0,0,0.0132,0,0,2816 +1,4294967295,1273,16,Less73,Less,AI_VECTOR_CORE,1699529623109147.5,10.04,1027.21,48,0,"""1,1,1024,1024;""",FLOAT;FLOAT,NCHW;NCHW,"""1,1,1024,1024""",BOOL,NCHW,N/A,0,0,0,0,0,0,8.15,723441,0.034,0,0.0027,0.0226,0,3731456 +1,4294967295,1274,16,Cast74,Cast,AI_VECTOR_CORE,1699529623109835,1.86,677.46,4,0,"""4,1024""",INT32,FORMAT_ND,"""4,1024""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.17,8641,0.0074,0,0.0028,0,0,4864 +1,4294967295,1275,16,Less75,Less,AI_VECTOR_CORE,1699529623110211,2.22,374.14,4,0,"""4,1024;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""4,1024""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,1.5,11084,0.0087,0,0.0047,0.0058,0,16000 +1,4294967295,1276,16,Cast74,Cast,AI_VECTOR_CORE,1699529623110441.5,1.54,228.28,4,0,"""4,1024""",INT32,FORMAT_ND,"""4,1024""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.85,6323,0.0101,0,0.0038,0,0,4864 +1,4294967295,1277,16,GreaterEqual76,GreaterEqual,AI_VECTOR_CORE,1699529623110656.8,1.88,213.71,4,0,"""4,1024;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""4,1024""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,1.18,8719,0.011,0,0.006,0.0073,0,16000 +1,4294967295,1278,16,LogicalOr77,LogicalOr,AI_VECTOR_CORE,1699529623110872,1.52,213.37,2,0,"""4,1024;4,1024""",BOOL;BOOL,FORMAT_ND;FORMAT_ND,"""4,1024""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.97,3572,0.009,0,0.0034,0.0358,0,18816 +1,4294967295,1280,16,Sub78,Sub,AI_VECTOR_CORE,1699529623111416,2.38,542.48,4,0,"""4,1024;""",INT32;INT32,FORMAT_ND;FORMAT_ND,"""4,1024""",INT32,FORMAT_ND,N/A,0,0,0,0,0,0,1.54,11415,0,0.0056,0.0032,0,0,5248 +1,4294967295,1281,16,LogicalNot79,LogicalNot,AI_VECTOR_CORE,1699529623111630.5,1.38,212.12,2,0,"""4,1024""",BOOL,FORMAT_ND,"""4,1024""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.86,3175,0.0403,0,0.0069,0.0605,0,33472 +1,4294967295,1282,16,Cast80,Cast,AI_VECTOR_CORE,1699529623111879.8,1.86,247.87,4,0,"""4,1024""",BOOL,FORMAT_ND,"""4,1024""",INT32,FORMAT_ND,N/A,0,0,0,0,0,0,1.06,7855,0,0.0081,0.0031,0.0041,0,8960 +1,4294967295,1283,16,Mul81,Mul,AI_VECTOR_CORE,1699529623111989,1.62,107.39,4,0,"""4,1024;4,1024""",INT32;INT32,FORMAT_ND;FORMAT_ND,"""4,1024""",INT32,FORMAT_ND,N/A,0,0,0,0,0,0,0.84,6209,0,0.0103,0.0039,0,0,4864 +1,4294967295,1284,16,GatherV282,GatherV2,AI_VECTOR_CORE,1699529623112316.2,115.52,325.63,48,0,"""6400,12288;4,1024;1""",FLOAT16;INT32;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""4,1024,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,107.04,9504958,0,0,0,0,0,3072 +1,4294967295,1285,16,LogicalNot83,LogicalNot,AI_VECTOR_CORE,1699529623112759.2,1.68,327.48,2,0,"""4,1024,1""",BOOL,FORMAT_ND,"""4,1024,1""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,1.05,3875,0.033,0,0.0057,0.0495,0,33472 +1,4294967295,1286,16,Cast84,Cast,AI_VECTOR_CORE,1699529623112902.5,1.62,141.57,2,0,"""4,1024,1""",BOOL,FORMAT_ND,"""4,1024,1""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.99,3663,0,0,0.0033,0.0087,0,4480 +1,4294967295,1287,16,Mul85,Mul,AI_VECTOR_CORE,1699529623113004.8,47.06,100.63,48,0,"""4,1024,12288;4,1024,1""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4,1024,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,44.04,3910748,0.1005,0,0.308,0.1005,0,114038784 +1,4294967295,N/A,N/A,hcom_allReduce__445_2,hcom_allReduce_,HCCL,1699529623114016.8,1470.84,964.94,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1290,16,BroadcastTo86,BroadcastTo,AI_VECTOR_CORE,1699529623115493.5,5.84,5.91,32,0,"""1,1024;2""",INT64;INT64,FORMAT_ND;FORMAT_ND,"""4,1024""",INT64,FORMAT_ND,N/A,0,0,0,0,0,0,2.93,173392,0,0,0.0004,0,0,2048 +1,4294967295,1291,16,GatherV287,GatherV2,AI_VECTOR_CORE,1699529623115584,48.38,84.66,48,0,"""1024,12288;4,1024;1""",FLOAT16;INT64;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""4,1024,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,42.6,3783265,0,0,0.0022,0,0,271296 +1,4294967295,1292,16,Add88,Add,AI_VECTOR_CORE,1699529623115719,237.48,86.62,48,0,"""4,1024,12288;4,1024,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4,1024,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,229.47,20377297,0.0193,0,0.0004,0.0193,0,75767808 +1,4294967295,1293,16,Transpose89,Transpose,AI_VECTOR_CORE,1699529623116041.2,87.74,84.77,48,0,"""4,1024,12288;3""",FLOAT16;INT32,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,79.7,7077727,0,0,0.0001,0,0,12288 +1,4294967295,122,17,trans_Cast_5,Cast,AI_CPU,1699529623116055.2,197.54,0,1,0,"""""",INT32,FORMAT_ND,"""""",UINT64,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,124,17,StatelessDropOutGenMask23/DSASTATELESSGENBITMASK,DSAStatelessGenBitMask,DSA_SQE,1699529623116253.8,395.5,0.96,1,0,"""1;;1;2""",INT64;UINT64;FLOAT16;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""6291456""",UINT8,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1295,16,DropOutDoMask24,DropOutDoMask,AI_VECTOR_CORE,1699529623116650.2,63.12,1,48,0,"""1024,4,12288;6291456;""",FLOAT16;UINT8;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,57.45,5101337,0.1544,0,0.0784,0.1542,0,163866624 +1,4294967295,1296,16,LayerNorm90,LayerNormV3,AI_VECTOR_CORE,1699529623117336.5,359.52,623.13,48,0,"""1024,4,12288;12288;12288""",FLOAT16;FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,4,12288;1024,4,1;1024,4,1""",FLOAT16;FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,353.97,31432740,0.3014,0,0.0528,0,0,659432448 +1,4294967295,1297,16,MatMul91,MatMul,AI_CORE,1699529623118495.2,1478.18,799.23,24,0,"""4096,12288;4608,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4096,4608""",FLOAT16,FORMAT_ND,N/A,1456.97,64689637,0.893,0,473250242560,0,0,0,0,0,0,0,0,0 +1,4294967295,1298,16,Add92,Add,AI_VECTOR_CORE,1699529623120058.2,29.38,84.82,48,0,"""1024,4,4608;4608""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,4608""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,21.48,1907688,0.0773,0,0.0015,0.0773,0,28405120 +1,4294967295,1299,16,AsStrided93,AsStrided,AI_VECTOR_CORE,1699529623120820,12.9,732.37,33,0,"""1024,4,4608;3;3;""",FLOAT16;INT64;INT64;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12,1024,512""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,9.82,599632,0,0,0.0001,0,0,2112 +1,4294967295,1300,16,AsStrided94,AsStrided,AI_VECTOR_CORE,1699529623121385.8,11.46,552.85,33,0,"""1024,4,4608;3;3;""",FLOAT16;INT64;INT64;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,12,512""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,9.11,556256,0,0,0.0001,0,0,2112 +1,4294967295,1301,16,Transpose95,Transpose,AI_VECTOR_CORE,1699529623121608.8,32.94,211.54,48,0,"""1024,12,512;3""",FLOAT16;INT32,FORMAT_ND;FORMAT_ND,"""12,512,1024""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,19.88,1765240,0,0,0.0281,0,0,1585152 +1,4294967295,1302,16,BatchMatMul96,BatchMatMul,AI_CORE,1699529623122090.8,45.5,449.06,24,0,"""12,1024,512;12,512,1024""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""12,1024,1024""",FLOAT16,FORMAT_ND,N/A,43.74,1942091,0.8787,0,13979443200,0,0,0,0,0,0,0,0,0 +1,4294967295,1303,16,Mul97,Mul,AI_VECTOR_CORE,1699529623122796.5,15.14,660.25,48,0,"""12,1024,1024;""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""12,1024,1024""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,13.3,1180873,0.0832,0,0.002,0.0832,0,18948096 +1,4294967295,1304,16,ScaledMaskedSoftmax98,ScaledMaskedSoftmax,AI_VECTOR_CORE,1699529623123188,91.84,376.36,48,0,"""4,3,1024,1024;1,1,1024,1024""",FLOAT16;BOOL,NCHW;NCHW,"""4,3,1024,1024""",FLOAT16,NCHW,N/A,0,0,0,0,0,0,88.46,7855202,0.2329,0,0.0508,0.1161,0,246528000 +1,4294967295,129,17,trans_Cast_18,Cast,AI_CPU,1699529623123908.5,112.88,628.66,1,0,"""""",INT32,FORMAT_ND,"""""",UINT64,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,131,17,StatelessDropOutGenMask99/DSASTATELESSGENBITMASK,DSAStatelessGenBitMask,DSA_SQE,1699529623124022.2,99.52,0.87,1,0,"""1;;1;2""",INT64;UINT64;FLOAT16;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1572864""",UINT8,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1306,16,DropOutDoMask100,DropOutDoMask,AI_VECTOR_CORE,1699529623124211.2,26.34,89.48,48,0,"""4,3,1024,1024;1572864;""",FLOAT16;UINT8;FLOAT16,NCHW;NCHW;NCHW,"""4,3,1024,1024""",FLOAT16,NCHW,N/A,0,0,0,0,0,0,23.04,2045647,0.0963,0,0.049,0.0961,0,40974336 +1,4294967295,1308,16,AsStrided93,AsStrided,AI_VECTOR_CORE,1699529623125352.8,12.52,1115.16,33,0,"""1024,4,4608;3;3;""",FLOAT16;INT64;INT64;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12,1024,512""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,10.54,643173,0,0,0.0001,0,0,2112 +1,4294967295,1309,16,BatchMatMul101,BatchMatMul,AI_CORE,1699529623125453.5,46,88.23,24,0,"""12,1024,1024;12,1024,512""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""12,1024,512""",FLOAT16,FORMAT_ND,N/A,44.58,1979427,0.8303,0,13462937600,0,0,0,0,0,0,0,0,0 +1,4294967295,1310,16,Transpose102,Transpose,AI_VECTOR_CORE,1699529623126024.2,18.18,524.75,48,0,"""4,3,1024,512;4""",FLOAT16;INT32,FORMAT_ND;FORMAT_ND,"""1024,4,3,512""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,16.31,1448672,0,0,0.0003,0,0,12288 +1,4294967295,1311,16,MatMul103,MatMul,AI_CORE,1699529623126984,496.7,941.57,24,0,"""4096,1536;12288,1536""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4096,12288""",FLOAT16,FORMAT_ND,N/A,493.96,21932026,0.872,0,156661923840,0,0,0,0,0,0,0,0,0 +1,4294967295,N/A,N/A,hcom_allReduce__445_3,hcom_allReduce_,HCCL,1699529623128091.8,3360.4,611.05,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,136,17,trans_Cast_5,Cast,AI_CPU,1699529623129610.5,101.04,0,1,0,"""""",INT32,FORMAT_ND,"""""",UINT64,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,138,17,StatelessDropOutGenMask23/DSASTATELESSGENBITMASK,DSAStatelessGenBitMask,DSA_SQE,1699529623129712.2,395.42,0.71,1,0,"""1;;1;2""",INT64;UINT64;FLOAT16;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""6291456""",UINT8,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1314,16,BroadcastTo21,BroadcastTo,AI_VECTOR_CORE,1699529623131457.8,29.76,1350.08,48,0,"""1,1,12288;3""",FLOAT16;INT64,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,27.13,2408851,0,0,0.1645,0,0,12676480 +1,4294967295,1315,16,Add22,Add,AI_VECTOR_CORE,1699529623131572.5,229.24,84.99,48,0,"""1024,4,12288;1024,4,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,224.13,19902329,0.0198,0,0.0004,0.0198,0,75767808 +1,4294967295,1317,16,DropOutDoMask24,DropOutDoMask,AI_VECTOR_CORE,1699529623131887,108.22,85.26,48,0,"""1024,4,12288;6291456;""",FLOAT16;UINT8;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,95.29,8462180,0.0931,0,0.0473,0.0929,0,163866624 +1,4294967295,1319,16,Add22,Add,AI_VECTOR_CORE,1699529623132098.2,215.62,103.03,48,0,"""1024,4,12288;1024,4,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,194.29,17253082,0.0228,0,0.0005,0.0228,0,75767808 +1,4294967295,1320,16,LayerNorm90,LayerNormV3,AI_VECTOR_CORE,1699529623132398.8,326.14,84.88,48,0,"""1024,4,12288;12288;12288""",FLOAT16;FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,4,12288;1024,4,1;1024,4,1""",FLOAT16;FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,313.77,27862997,0.34,0,0.0595,0,0,659432448 +1,4294967295,1321,16,MatMul104,MatMul,AI_CORE,1699529623132809.8,2051.28,84.86,24,0,"""4096,12288;6144,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4096,6144""",FLOAT16,FORMAT_ND,N/A,2020.55,89712465,0.8638,0,634864066560,0,0,0,0,0,0,0,0,0 +1,4294967295,1322,16,Add105,Add,AI_VECTOR_CORE,1699529623134946.8,41.96,85.72,48,0,"""1024,4,6144;6144""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,6144""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,33.35,2961301,0.0664,0,0.0014,0.0664,0,37885952 +1,4294967295,1323,16,FastGelu106,FastGelu,AI_VECTOR_CORE,1699529623135073.5,86.52,84.79,48,0,"""1024,4,6144""",FLOAT16,FORMAT_ND,"""1024,4,6144""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,84.59,7511618,0.6282,0,0.0007,0.4188,0,704815104 +1,4294967295,1324,16,MatMul107,MatMul,AI_CORE,1699529623135245,2014.06,84.98,24,0,"""4096,6144;12288,6144""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4096,12288""",FLOAT16,FORMAT_ND,N/A,1961.33,87082961,0.8866,0,632460369920,0,0,0,0,0,0,0,0,0 +1,4294967295,143,17,trans_Cast_5,Cast,AI_CPU,1699529623135336.5,105.6,0,1,0,"""""",INT32,FORMAT_ND,"""""",UINT64,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,145,17,StatelessDropOutGenMask23/DSASTATELESSGENBITMASK,DSAStatelessGenBitMask,DSA_SQE,1699529623135442.8,395.74,0.65,1,0,"""1;;1;2""",INT64;UINT64;FLOAT16;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""6291456""",UINT8,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,N/A,N/A,hcom_allReduce__445_4,hcom_allReduce_,HCCL,1699529623137347,2833.82,1508.51,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1327,16,BroadcastTo21,BroadcastTo,AI_VECTOR_CORE,1699529623140184.5,29.64,3.68,48,0,"""1,1,12288;3""",FLOAT16;INT64,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,26.69,2370382,0,0,0.1671,0,0,12676480 +1,4294967295,1328,16,Add22,Add,AI_VECTOR_CORE,1699529623140300.5,188.62,86.36,48,0,"""1024,4,12288;1024,4,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,182.89,16240937,0.0242,0,0.0005,0.0242,0,75767808 +1,4294967295,1330,16,DropOutDoMask24,DropOutDoMask,AI_VECTOR_CORE,1699529623140593.5,111.4,104.38,48,0,"""1024,4,12288;6291456;""",FLOAT16;UINT8;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,100.98,8966743,0.0878,0,0.0446,0.0877,0,163866624 +1,4294967295,1332,16,Add22,Add,AI_VECTOR_CORE,1699529623140792.2,225.58,87.35,48,0,"""1024,4,12288;1024,4,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,214.4,19038435,0.0207,0,0.0004,0.0207,0,75767808 +1,4294967295,1333,16,LayerNorm90,LayerNormV3,AI_VECTOR_CORE,1699529623141102.5,315.64,84.67,48,0,"""1024,4,12288;12288;12288""",FLOAT16;FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,4,12288;1024,4,1;1024,4,1""",FLOAT16;FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,306.41,27209492,0.3482,0,0.061,0,0,659432448 +1,4294967295,1334,16,MatMul91,MatMul,AI_CORE,1699529623141504.2,1470.9,86.11,24,0,"""4096,12288;4608,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4096,4608""",FLOAT16,FORMAT_ND,N/A,1445.06,64160812,0.896,0,470947020800,0,0,0,0,0,0,0,0,0 +1,4294967295,150,17,trans_Cast_18,Cast,AI_CPU,1699529623142063.8,92.7,0,1,0,"""""",INT32,FORMAT_ND,"""""",UINT64,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,152,17,StatelessDropOutGenMask99/DSASTATELESSGENBITMASK,DSAStatelessGenBitMask,DSA_SQE,1699529623142157.2,99.54,0.8,1,0,"""1;;1;2""",INT64;UINT64;FLOAT16;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1572864""",UINT8,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1335,16,Add92,Add,AI_VECTOR_CORE,1699529623143060,26.16,803.21,48,0,"""1024,4,4608;4608""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,4608""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,19.82,1760345,0.0838,0,0.0017,0.0838,0,28405120 +1,4294967295,1336,16,AsStrided93,AsStrided,AI_VECTOR_CORE,1699529623143171,12.48,84.84,33,0,"""1024,4,4608;3;3;""",FLOAT16;INT64;INT64;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12,1024,512""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,9.89,603856,0,0,0.0001,0,0,2112 +1,4294967295,1337,16,AsStrided94,AsStrided,AI_VECTOR_CORE,1699529623143268.2,11.18,84.77,33,0,"""1024,4,4608;3;3;""",FLOAT16;INT64;INT64;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,12,512""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,8.98,548191,0,0,0.0001,0,0,2112 +1,4294967295,1338,16,Transpose95,Transpose,AI_VECTOR_CORE,1699529623143364.2,30.78,84.82,48,0,"""1024,12,512;3""",FLOAT16;INT32,FORMAT_ND;FORMAT_ND,"""12,512,1024""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,19.86,1763138,0,0,0.0281,0,0,1585152 +1,4294967295,1339,16,BatchMatMul96,BatchMatMul,AI_CORE,1699529623143480,44.46,84.97,24,0,"""12,1024,512;12,512,1024""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""12,1024,1024""",FLOAT16,FORMAT_ND,N/A,43.03,1910502,0.8899,0,13926973440,0,0,0,0,0,0,0,0,0 +1,4294967295,1340,16,Mul97,Mul,AI_VECTOR_CORE,1699529623143610.5,15.52,86.04,48,0,"""12,1024,1024;""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""12,1024,1024""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,13.55,1202923,0.0817,0,0.0019,0.0817,0,18948096 +1,4294967295,1341,16,ScaledMaskedSoftmax108,ScaledMaskedSoftmax,AI_VECTOR_CORE,1699529623143711.5,91.12,85.48,48,0,"""4,3,1024,1024;1,1,1024,1024""",FLOAT16;BOOL,NCHW;NCHW,"""4,3,1024,1024""",FLOAT16,NCHW,N/A,0,0,0,0,0,0,87.13,7736853,0.2364,0,0.0515,0.1178,0,246528000 +1,4294967295,1343,16,DropOutDoMask100,DropOutDoMask,AI_VECTOR_CORE,1699529623143887.2,27.72,84.63,48,0,"""4,3,1024,1024;1572864;""",FLOAT16;UINT8;FLOAT16,NCHW;NCHW;NCHW,"""4,3,1024,1024""",FLOAT16,NCHW,N/A,0,0,0,0,0,0,24.04,2134584,0.0922,0,0.047,0.0921,0,40974336 +1,4294967295,1345,16,AsStrided93,AsStrided,AI_VECTOR_CORE,1699529623144000,11.94,85.03,33,0,"""1024,4,4608;3;3;""",FLOAT16;INT64;INT64;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12,1024,512""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,10.54,643703,0,0,0.0001,0,0,2112 +1,4294967295,1346,16,BatchMatMul101,BatchMatMul,AI_CORE,1699529623144096.8,46.2,84.81,24,0,"""12,1024,1024;12,1024,512""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""12,1024,512""",FLOAT16,FORMAT_ND,N/A,44.6,1980419,0.8295,0,13457612800,0,0,0,0,0,0,0,0,0 +1,4294967295,1347,16,Transpose102,Transpose,AI_VECTOR_CORE,1699529623144228,16.7,85.05,48,0,"""4,3,1024,512;4""",FLOAT16;INT32,FORMAT_ND;FORMAT_ND,"""1024,4,3,512""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,14.12,1253432,0,0,0.0003,0,0,12288 +1,4294967295,1348,16,MatMul103,MatMul,AI_CORE,1699529623145012.2,494.96,767.55,24,0,"""4096,1536;12288,1536""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4096,12288""",FLOAT16,FORMAT_ND,N/A,492.05,21846850,0.8754,0,156677406720,0,0,0,0,0,0,0,0,0 +1,4294967295,N/A,N/A,hcom_allReduce__445_5,hcom_allReduce_,HCCL,1699529623146317.5,6328.78,810.29,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,157,17,trans_Cast_5,Cast,AI_CPU,1699529623148194,101.46,0,1,0,"""""",INT32,FORMAT_ND,"""""",UINT64,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,159,17,StatelessDropOutGenMask23/DSASTATELESSGENBITMASK,DSAStatelessGenBitMask,DSA_SQE,1699529623148296,395.42,0.54,1,0,"""1;;1;2""",INT64;UINT64;FLOAT16;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""6291456""",UINT8,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1351,16,BroadcastTo21,BroadcastTo,AI_VECTOR_CORE,1699529623152651,31.16,3959.58,48,0,"""1,1,12288;3""",FLOAT16;INT64,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,27.77,2465930,0,0,0.1606,0,0,12676480 +1,4294967295,1352,16,Add22,Add,AI_VECTOR_CORE,1699529623152769.2,232.58,87.09,48,0,"""1024,4,12288;1024,4,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,223.43,19840621,0.0198,0,0.0004,0.0198,0,75767808 +1,4294967295,1354,16,DropOutDoMask24,DropOutDoMask,AI_VECTOR_CORE,1699529623153086.5,115.7,84.67,48,0,"""1024,4,12288;6291456;""",FLOAT16;UINT8;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,102.46,9098884,0.0866,0,0.044,0.0864,0,163866624 +1,4294967295,1356,16,Add22,Add,AI_VECTOR_CORE,1699529623153304.8,228.26,102.55,48,0,"""1024,4,12288;1024,4,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,208.99,18558713,0.0212,0,0.0005,0.0212,0,75767808 +1,4294967295,1357,16,LayerNorm90,LayerNormV3,AI_VECTOR_CORE,1699529623153618,320.62,84.99,48,0,"""1024,4,12288;12288;12288""",FLOAT16;FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,4,12288;1024,4,1;1024,4,1""",FLOAT16;FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,308.47,27392439,0.3459,0,0.0606,0,0,659432448 +1,4294967295,1358,16,MatMul104,MatMul,AI_CORE,1699529623154023.5,2045.16,84.88,24,0,"""4096,12288;6144,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4096,6144""",FLOAT16,FORMAT_ND,N/A,2027.04,90000538,0.8616,0,635270758400,0,0,0,0,0,0,0,0,0 +1,4294967295,164,17,trans_Cast_5,Cast,AI_CPU,1699529623155039.2,73.96,0,1,0,"""""",INT32,FORMAT_ND,"""""",UINT64,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,166,17,StatelessDropOutGenMask23/DSASTATELESSGENBITMASK,DSAStatelessGenBitMask,DSA_SQE,1699529623155113.8,395.44,0.54,1,0,"""1;;1;2""",INT64;UINT64;FLOAT16;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""6291456""",UINT8,FORMAT_ND,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1359,16,Add105,Add,AI_VECTOR_CORE,1699529623156154,43,644.81,48,0,"""1024,4,6144;6144""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,6144""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,35.06,3113390,0.0631,0,0.0014,0.0631,0,37885952 +1,4294967295,1360,16,FastGelu106,FastGelu,AI_VECTOR_CORE,1699529623156282.8,86.54,85.75,48,0,"""1024,4,6144""",FLOAT16,FORMAT_ND,"""1024,4,6144""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,84.62,7514601,0.6279,0,0.0007,0.4186,0,704815104 +1,4294967295,1361,16,MatMul107,MatMul,AI_CORE,1699529623156454.2,1947.02,84.96,24,0,"""4096,6144;12288,6144""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4096,12288""",FLOAT16,FORMAT_ND,N/A,1935.34,85929293,0.8966,0,631139573760,0,0,0,0,0,0,0,0,0 +1,4294967295,N/A,N/A,hcom_allReduce__445_6,hcom_allReduce_,HCCL,1699529623158489.2,2804.14,87.98,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1364,16,BroadcastTo21,BroadcastTo,AI_VECTOR_CORE,1699529623161298.8,33.42,5.36,48,0,"""1,1,12288;3""",FLOAT16;INT64,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,30.16,2677771,0,0,0.1479,0,0,12676480 +1,4294967295,1365,16,Add22,Add,AI_VECTOR_CORE,1699529623161418.5,189.2,86.33,48,0,"""1024,4,12288;1024,4,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,181.83,16146320,0.0244,0,0.0005,0.0244,0,75767808 +1,4294967295,1367,16,DropOutDoMask24,DropOutDoMask,AI_VECTOR_CORE,1699529623161702.5,128.72,94.8,48,0,"""1024,4,12288;6291456;""",FLOAT16;UINT8;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,120.71,10719001,0.0735,0,0.0373,0.0734,0,163866624 +1,4294967295,1369,16,Add22,Add,AI_VECTOR_CORE,1699529623161917.8,205.02,86.53,48,0,"""1024,4,12288;1024,4,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""1024,4,12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,187.44,16644507,0.0236,0,0.0005,0.0236,0,75767808 +1,4294967295,1370,16,LayerNorm90,LayerNormV3,AI_VECTOR_CORE,1699529623162207.8,318.1,84.98,48,0,"""1024,4,12288;12288;12288""",FLOAT16;FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,"""1024,4,12288;1024,4,1;1024,4,1""",FLOAT16;FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,304.5,27039923,0.3504,0,0.0614,0,0,659432448 +1,4294967295,1371,16,MatMul109,MatMul,AI_CORE,1699529623162611,2072.3,85.15,24,0,"""4096,12288;6400,12288""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""4096,6400""",FLOAT16,FORMAT_ND,N/A,2063.09,91600983,0.8868,0,665458606080,0,0,0,0,0,0,0,0,0 +1,4294967295,1372,16,Transpose110,Transpose,AI_VECTOR_CORE,1699529623164770,11,86.7,48,0,"""4,1024;2""",INT32;INT32,FORMAT_ND;FORMAT_ND,"""1024,4""",INT32,FORMAT_ND,N/A,0,0,0,0,0,0,6.56,582111,0,0,0.0112,0,0,208896 +1,4294967295,1373,16,Cast111,Cast,AI_VECTOR_CORE,1699529623164866.2,40.96,85.25,48,0,"""1024,4,6400""",FLOAT16,FORMAT_ND,"""1024,4,6400""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,35.76,3175474,0.129,0,0.0001,0,0,26226688 +1,4294967295,1374,16,ArgMaxWithValue112,ArgMaxWithValue,AI_VECTOR_CORE,1699529623164993.5,77.78,86.29,48,0,"""1024,4,6400""",FLOAT,FORMAT_ND,"""1024,4;1024,4""",INT32;FLOAT,FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,73.46,6522988,0.0666,0,0.0069,0,0,29235200 +1,4294967295,1375,16,Cast113,Cast,AI_VECTOR_CORE,1699529623165157.5,2.48,86.22,8,0,"""1024,4""",INT32,FORMAT_ND,"""1024,4""",INT64,FORMAT_ND,N/A,0,0,0,0,0,0,1.37,20336,0,0,0.0087,0,0,5632 +1,4294967295,N/A,N/A,hcom_allReduce__445_7,hcom_allReduce_,HCCL,1699529623165249.2,93.32,89.27,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1378,16,Sub114,Sub,AI_VECTOR_CORE,1699529623165345,76.28,2.43,48,0,"""1024,4,6400;1024,4,1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""1024,4,6400""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,71.25,6327424,0.0647,0,0.202,0,0,67115008 +1,4294967295,1379,16,Cast115,Cast,AI_VECTOR_CORE,1699529623165506.5,1.9,85.22,4,0,"""1024,4""",INT32,FORMAT_ND,"""1024,4""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.2,8897,0.0072,0,0.0027,0,0,4864 +1,4294967295,1380,16,Less116,Less,AI_VECTOR_CORE,1699529623165595,1.88,86.6,4,0,"""1024,4;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""1024,4""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,1.17,8684,0.0111,0,0.006,0.0074,0,16000 +1,4294967295,1381,16,Cast115,Cast,AI_VECTOR_CORE,1699529623165683.2,1.56,86.37,4,0,"""1024,4""",INT32,FORMAT_ND,"""1024,4""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.8,5932,0.0108,0,0.004,0,0,4864 +1,4294967295,1382,16,GreaterEqual117,GreaterEqual,AI_VECTOR_CORE,1699529623165772,2.2,87.19,4,0,"""1024,4;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""1024,4""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,1.41,10401,0.0092,0,0.005,0.0062,0,16000 +1,4294967295,1383,16,LogicalOr118,LogicalOr,AI_VECTOR_CORE,1699529623165859.5,1.36,85.3,2,0,"""1024,4;1024,4""",BOOL;BOOL,FORMAT_ND;FORMAT_ND,"""1024,4""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.88,3247,0.0099,0,0.0037,0.0394,0,18816 +1,4294967295,1385,16,Sub119,Sub,AI_VECTOR_CORE,1699529623165961.5,2.12,100.64,4,0,"""1024,4;""",INT32;INT32,FORMAT_ND;FORMAT_ND,"""1024,4""",INT32,FORMAT_ND,N/A,0,0,0,0,0,0,1.27,9425,0,0.0068,0.0038,0,0,5248 +1,4294967295,1386,16,LogicalNot120,LogicalNot,AI_VECTOR_CORE,1699529623166066.8,1.44,103.13,2,0,"""1024,4""",BOOL,FORMAT_ND,"""1024,4""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.88,3263,0.0392,0,0.0067,0.0588,0,33472 +1,4294967295,1387,16,Cast121,Cast,AI_VECTOR_CORE,1699529623166162.5,1.92,94.31,4,0,"""1024,4""",BOOL,FORMAT_ND,"""1024,4""",INT32,FORMAT_ND,N/A,0,0,0,0,0,0,1.12,8297,0,0.0077,0.0029,0.0039,0,8960 +1,4294967295,1388,16,Mul122,Mul,AI_VECTOR_CORE,1699529623166250,1.76,85.58,4,0,"""1024,4;1024,4""",INT32;INT32,FORMAT_ND;FORMAT_ND,"""1024,4""",INT32,FORMAT_ND,N/A,0,0,0,0,0,0,1.01,7476,0,0.0086,0.0032,0,0,4864 +1,4294967295,1390,16,Cast124,Cast,AI_VECTOR_CORE,1699529623166339.5,2.82,87.74,8,0,"""4096""",INT32,FORMAT_ND,"""4096""",INT64,FORMAT_ND,N/A,0,0,0,0,0,0,1.62,23990,0,0,0.0073,0,0,5632 +1,4294967295,1391,16,Index125,Index,AI_VECTOR_CORE,1699529623166427.5,26.5,85.18,48,0,"""4096,6400;2;1;4096;4096""",FLOAT;INT64;INT64;INT64;INT64,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""4096""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,16.92,1502562,0,0,0.0001,0,0,3072 +1,4294967295,1393,16,LogicalNot120,LogicalNot,AI_VECTOR_CORE,1699529623166539.8,1.5,85.75,2,0,"""1024,4""",BOOL,FORMAT_ND,"""1024,4""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.94,3460,0.037,0,0.0064,0.0555,0,33472 +1,4294967295,1394,16,Cast126,Cast,AI_VECTOR_CORE,1699529623166627.5,1.62,86.25,4,0,"""1024,4""",BOOL,FORMAT_ND,"""1024,4""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.89,6583,0.0097,0,0.0036,0.0049,0,8960 +1,4294967295,1396,16,Mul127,Mul,AI_VECTOR_CORE,1699529623166716.8,2.14,87.63,4,0,"""1024,4;1024,4""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""1024,4""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.38,10179,0.0063,0,0.0024,0,0,4864 +1,4294967295,N/A,N/A,hcom_allReduce__445_8,hcom_allReduce_,HCCL,1699529623166808.8,2890.44,89.86,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1401,16,Exp128,Exp,AI_VECTOR_CORE,1699529623169833.5,65.76,134.31,48,0,"""1024,4,6400""",FLOAT,FORMAT_ND,"""1024,4,6400""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,62.67,5565430,0.1472,0,0.0001,0,0,52441088 +1,4294967295,1403,16,ReduceSum129,ReduceSumD,AI_VECTOR_CORE,1699529623170148.8,95.3,249.49,48,0,"""1024,4,6400""",FLOAT,FORMAT_ND,"""1024,4""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,89.25,7925675,0.0522,0,0.0011,0,0,26744832 +1,4294967295,N/A,N/A,hcom_allReduce__445_9,hcom_allReduce_,HCCL,1699529623170348.5,1147.68,104.45,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1406,16,Log130,Log,AI_VECTOR_CORE,1699529623171499.2,1.96,3.07,4,0,"""1024,4""",FLOAT,FORMAT_ND,"""1024,4""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.15,8495,0.0151,0,0.0028,0,0,8960 +1,4294967295,1407,16,Sub131,Sub,AI_VECTOR_CORE,1699529623171586.5,2.1,85.29,4,0,"""1024,4;1024,4""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""1024,4""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.3,9613,0.0067,0,0.0025,0,0,4864 +1,4294967295,1408,16,RealDiv132,RealDiv,AI_VECTOR_CORE,1699529623171673.5,72.34,84.9,48,0,"""1024,4,6400;1024,4,1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""1024,4,6400""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,66.8,5931711,0.1381,0,0.2155,0,0,93329408 +1,4294967295,1409,16,Transpose133,Transpose,AI_VECTOR_CORE,1699529623171831,10.62,85.16,48,0,"""1024,4;2""",FLOAT;INT32,FORMAT_ND;FORMAT_ND,"""4,1024""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,6.51,577893,0,0,0.0113,0,0,208896 +1,4294967295,1410,16,Mul134,Mul,AI_VECTOR_CORE,1699529623171926.5,1.96,84.88,4,0,"""4096;4096""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""4096""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.07,7933,0.0081,0,0.003,0,0,4864 +1,4294967295,1411,16,atomic_memset-1_134_2208226_2_0,MemSet,AI_VECTOR_CORE,1699529623172014.8,1.3,86.29,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.74,1374,0,0,0.1441,0,0,6336 +1,4294967295,1412,16,ReduceSum135,ReduceSumD,AI_VECTOR_CORE,1699529623172101.2,9.36,85.2,52,0,"""4096""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,9.07,436422,0.0004,0,0.0013,0,0,28000 +1,4294967295,1413,16,atomic_memset-1_134_2208226_2_0,MemSet,AI_VECTOR_CORE,1699529623172195.8,1.06,85.14,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.55,1013,0,0,0.1955,0,0,6336 +1,4294967295,1414,16,ReduceSum135,ReduceSumD,AI_VECTOR_CORE,1699529623172282,3.92,85.19,52,0,"""4096""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,3.41,164076,0.0009,0,0.0035,0,0,28000 +1,4294967295,1415,16,RealDiv136,RealDiv,AI_VECTOR_CORE,1699529623172371,1.4,85.08,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.86,1589,0.0006,0,0.0038,0,0,256 +1,4294967295,N/A,N/A,hcom_allReduce__328_0,hcom_allReduce_,HCCL,1699529623172480.2,5.84,107.85,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1420,16,RealDiv137,RealDiv,AI_VECTOR_CORE,1699529623172489.2,1.48,3.16,1,0,"""1;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""1""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.92,1699,0.0006,0,0.0035,0,0,256 +1,4294967295,1421,16,RealDiv138,RealDiv,AI_VECTOR_CORE,1699529623172597,1.48,106.27,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.92,1703,0.0006,0,0.0035,0,0,256 +1,4294967295,1423,16,RealDiv139,RealDiv,AI_VECTOR_CORE,1699529623172990,1714.62,391.52,48,0,"""544404480;""",FLOAT16;FLOAT16,FORMAT_ND;FORMAT_ND,"""544404480""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,1707.97,151667661,0.1683,0,0.0048,0,0,1656400608 +1,4294967295,N/A,N/A,hcom_reduceScatter__328_1,hcom_reduceScatter_,HCCL,1699529623174793.8,4614.42,89.13,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1426,16,Cast47,Cast,AI_VECTOR_CORE,1699529623179411,242.74,2.83,48,0,"""78643200""",FLOAT16,FORMAT_ND,"""78643200""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,238.72,21198472,0.058,0,0,0,0,78655488 +1,4294967295,1427,16,Cast48,Cast,AI_VECTOR_CORE,1699529623179739.5,61.6,85.76,48,0,"""12582912""",FLOAT16,FORMAT_ND,"""12582912""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,58.75,5216944,0.0377,0,0.0004,0,0,12656640 +1,4294967295,1428,16,Cast49,Cast,AI_VECTOR_CORE,1699529623179886.2,255.18,85.15,48,0,"""56623104""",FLOAT16,FORMAT_ND,"""56623104""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,251.3,22315385,0.0396,0,0.0004,0,0,56930304 +1,4294967295,1429,16,Cast50,Cast,AI_VECTOR_CORE,1699529623180226.5,90.98,85.07,48,0,"""18874368""",FLOAT16,FORMAT_ND,"""18874368""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,88.76,7881820,0.0374,0,0,0,0,18886656 +1,4294967295,1430,16,Cast51,Cast,AI_VECTOR_CORE,1699529623180402.5,335.76,85.02,48,0,"""75497472""",FLOAT16,FORMAT_ND,"""75497472""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,332.55,29530127,0.0399,0,0.0004,0,0,75902976 +1,4294967295,1431,16,Cast51,Cast,AI_VECTOR_CORE,1699529623180823.5,334.22,85.24,48,0,"""75497472""",FLOAT16,FORMAT_ND,"""75497472""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,330.97,29390281,0.0401,0,0.0004,0,0,75902976 +1,4294967295,1432,16,Cast49,Cast,AI_VECTOR_CORE,1699529623181244,251.32,86.28,48,0,"""56623104""",FLOAT16,FORMAT_ND,"""56623104""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,247.98,22020403,0.0402,0,0.0004,0,0,56930304 +1,4294967295,1433,16,Cast50,Cast,AI_VECTOR_CORE,1699529623181580.8,85.8,85.43,48,0,"""18874368""",FLOAT16,FORMAT_ND,"""18874368""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,83.32,7398912,0.0399,0,0.0001,0,0,18886656 +1,4294967295,1434,16,Cast51,Cast,AI_VECTOR_CORE,1699529623181753.2,338.12,86.7,48,0,"""75497472""",FLOAT16,FORMAT_ND,"""75497472""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,335.44,29787285,0.0396,0,0.0004,0,0,75902976 +1,4294967295,1435,16,Cast51,Cast,AI_VECTOR_CORE,1699529623182176.5,334.7,85.13,48,0,"""75497472""",FLOAT16,FORMAT_ND,"""75497472""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,330.6,29357247,0.0402,0,0.0004,0,0,75902976 +1,4294967295,1436,16,Cast52,Cast,AI_VECTOR_CORE,1699529623182596.2,3.22,85.05,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.68,37316,0.0051,0,0.0019,0,0,14592 +1,4294967295,1437,16,Cast52,Cast,AI_VECTOR_CORE,1699529623182686,2.98,86.53,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.48,32933,0.0058,0,0.0022,0,0,14592 +1,4294967295,1438,16,Cast53,Cast,AI_VECTOR_CORE,1699529623182774.2,2.28,85.27,5,0,"""4608""",FLOAT16,FORMAT_ND,"""4608""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.54,14275,0.005,0,0.0021,0,0,5568 +1,4294967295,1439,16,Cast52,Cast,AI_VECTOR_CORE,1699529623182862.8,2.68,86.22,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.5,33388,0.0058,0,0.0022,0,0,14592 +1,4294967295,1440,16,Cast52,Cast,AI_VECTOR_CORE,1699529623182950.8,2.24,85.32,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.18,26115,0.0074,0,0.0028,0,0,14592 +1,4294967295,1441,16,Cast52,Cast,AI_VECTOR_CORE,1699529623183040.2,1.58,87.26,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.83,18449,0.0104,0,0.0039,0,0,14592 +1,4294967295,1442,16,Cast54,Cast,AI_VECTOR_CORE,1699529623183147.5,2.8,105.67,6,0,"""6144""",FLOAT16,FORMAT_ND,"""6144""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.73,19171,0.005,0,0.0019,0,0,7296 +1,4294967295,1443,16,Cast52,Cast,AI_VECTOR_CORE,1699529623183256.2,1.98,105.95,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.93,20605,0.0093,0,0.0035,0,0,14592 +1,4294967295,1444,16,Cast52,Cast,AI_VECTOR_CORE,1699529623183343.5,1.68,85.27,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.82,18109,0.0106,0,0.004,0,0,14592 +1,4294967295,1445,16,Cast52,Cast,AI_VECTOR_CORE,1699529623183430.2,1.9,85.07,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.98,21651,0.0089,0,0.0033,0,0,14592 +1,4294967295,1446,16,Cast53,Cast,AI_VECTOR_CORE,1699529623183517,1.68,84.85,5,0,"""4608""",FLOAT16,FORMAT_ND,"""4608""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.96,8835,0.0081,0,0.0034,0,0,5568 +1,4294967295,1447,16,Cast52,Cast,AI_VECTOR_CORE,1699529623183603.8,1.82,85.07,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.86,19128,0.01,0,0.0038,0,0,14592 +1,4294967295,1448,16,Cast52,Cast,AI_VECTOR_CORE,1699529623183690.8,1.7,85.18,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.85,18958,0.0101,0,0.0038,0,0,14592 +1,4294967295,1449,16,Cast52,Cast,AI_VECTOR_CORE,1699529623183777.8,1.78,85.3,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.93,20640,0.0093,0,0.0035,0,0,14592 +1,4294967295,1450,16,Cast54,Cast,AI_VECTOR_CORE,1699529623183864.8,2.2,85.22,6,0,"""6144""",FLOAT16,FORMAT_ND,"""6144""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.36,15122,0.0063,0,0.0024,0,0,7296 +1,4294967295,1451,16,Cast52,Cast,AI_VECTOR_CORE,1699529623183952,1.98,85.05,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.09,24139,0.008,0,0.003,0,0,14592 +1,4294967295,1452,16,Cast52,Cast,AI_VECTOR_CORE,1699529623184039.2,1.84,85.27,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.97,21436,0.009,0,0.0034,0,0,14592 +1,4294967295,1453,16,Cast52,Cast,AI_VECTOR_CORE,1699529623184126.2,1.84,85.16,12,0,"""12288""",FLOAT16,FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.01,22510,0.0085,0,0.0032,0,0,14592 +1,4294967295,1454,16,Fill140,Fill,AI_VECTOR_CORE,1699529623184213.5,1.38,85.41,1,0,"""1;""",INT64;FLOAT,FORMAT_ND;FORMAT_ND,"""1""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.93,1715,0,0,0.0012,0,0,64 +1,4294967295,1456,16,Reciprocal141,Reciprocal,AI_VECTOR_CORE,1699529623184301,1.4,86.12,1,0,"""1""",FLOAT,FORMAT_ND,"""1""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.97,1802,0.0006,0,0.005,0,0,352 +1,4294967295,1457,16,atomic_memset-1_141_2208226_3_0,MemSet,AI_VECTOR_CORE,1699529623184387.5,1.2,85.1,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.73,1346,0,0,0.1471,0,0,6336 +1,4294967295,1458,16,ReduceSum142,ReduceSumD,AI_VECTOR_CORE,1699529623184473.8,348.64,85.05,48,0,"""78643200""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,344.77,30615668,0.0403,0,0.0002,0,0,79228416 +1,4294967295,1459,16,atomic_memset-1_142_2208226_4_0,MemSet,AI_VECTOR_CORE,1699529623185175.8,1.2,353.36,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.76,1397,0,0,0.1417,0,0,6336 +1,4294967295,1460,16,ReduceSum143,ReduceSumD,AI_VECTOR_CORE,1699529623185266,38.78,89.05,48,0,"""12582912""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,36.33,3225709,0.0628,0,0.0019,0,0,13168128 +1,4294967295,1461,16,atomic_memset-1_143_2208226_5_0,MemSet,AI_VECTOR_CORE,1699529623185646,1.2,341.22,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.76,1402,0,0,0.1412,0,0,6336 +1,4294967295,1462,16,ReduceSum144,ReduceSumD,AI_VECTOR_CORE,1699529623185736.2,141.04,89.05,48,0,"""56623104""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,138.06,12260024,0.0727,0,0.0005,0,0,57208320 +1,4294967295,1463,16,atomic_memset-1_144_2208226_6_0,MemSet,AI_VECTOR_CORE,1699529623186198.5,1.16,321.21,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.62,1138,0,0,0.174,0,0,6336 +1,4294967295,1464,16,ReduceSum145,ReduceSumD,AI_VECTOR_CORE,1699529623186286.8,52.02,87.09,48,0,"""18874368""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,49.82,4424430,0.068,0,0.0014,0,0,19459584 +1,4294967295,1465,16,atomic_memset-1_145_2208226_7_0,MemSet,AI_VECTOR_CORE,1699529623186653,1.42,314.23,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.87,1617,0,0,0.1224,0,0,6336 +1,4294967295,1466,16,ReduceSum146,ReduceSumD,AI_VECTOR_CORE,1699529623186740.8,185.76,86.33,48,0,"""75497472""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,182.92,16243624,0.073,0,0.0004,0,0,76082688 +1,4294967295,1467,16,atomic_memset-1_145_2208226_7_0,MemSet,AI_VECTOR_CORE,1699529623187242.5,1.18,315.99,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.63,1165,0,0,0.17,0,0,6336 +1,4294967295,1468,16,ReduceSum146,ReduceSumD,AI_VECTOR_CORE,1699529623187328.8,180.26,85.07,48,0,"""75497472""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,177.44,15756756,0.0752,0,0.0004,0,0,76082688 +1,4294967295,1469,16,atomic_memset-1_143_2208226_5_0,MemSet,AI_VECTOR_CORE,1699529623187818.8,1.34,309.74,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.8,1485,0,0,0.1333,0,0,6336 +1,4294967295,1470,16,ReduceSum144,ReduceSumD,AI_VECTOR_CORE,1699529623187905.8,136.4,85.66,48,0,"""56623104""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,132.62,11776356,0.0756,0,0.0005,0,0,57208320 +1,4294967295,1471,16,atomic_memset-1_144_2208226_6_0,MemSet,AI_VECTOR_CORE,1699529623188343.8,1.24,301.6,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.75,1387,0,0,0.1428,0,0,6336 +1,4294967295,1472,16,ReduceSum145,ReduceSumD,AI_VECTOR_CORE,1699529623188430,47.76,85.01,48,0,"""18874368""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,45.6,4048926,0.0743,0,0.0016,0,0,19459584 +1,4294967295,1473,16,atomic_memset-1_145_2208226_7_0,MemSet,AI_VECTOR_CORE,1699529623188778.8,1.1,300.99,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.58,1077,0,0,0.1838,0,0,6336 +1,4294967295,1474,16,ReduceSum146,ReduceSumD,AI_VECTOR_CORE,1699529623188866.8,183.42,86.9,48,0,"""75497472""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,179.91,15975952,0.0742,0,0.0004,0,0,76082688 +1,4294967295,1475,16,atomic_memset-1_145_2208226_7_0,MemSet,AI_VECTOR_CORE,1699529623189365,1.32,314.83,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.81,1499,0,0,0.1321,0,0,6336 +1,4294967295,1476,16,ReduceSum146,ReduceSumD,AI_VECTOR_CORE,1699529623189451.5,181.34,85.18,48,0,"""75497472""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,178.08,15813688,0.075,0,0.0004,0,0,76082688 +1,4294967295,1477,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623190010.5,1.38,377.66,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.88,1634,0,0,0.1212,0,0,6336 +1,4294967295,1478,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623190098.8,9.2,86.87,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,4.78,424757,0.0006,0,0.0008,0,0,26112 +1,4294967295,1479,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623190418.5,1.16,310.55,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.67,1235,0,0,0.1603,0,0,6336 +1,4294967295,1480,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623190505.2,3.68,85.59,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.7,150972,0.0016,0,0.0022,0,0,26112 +1,4294967295,1481,16,atomic_memset-1_147_2208226_9_0,MemSet,AI_VECTOR_CORE,1699529623191021.5,1.22,512.57,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.72,1324,0,0,0.1495,0,0,6336 +1,4294967295,1482,16,ReduceSum148,ReduceSumD,AI_VECTOR_CORE,1699529623191110,8.9,87.28,48,0,"""4608""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,4.58,407096,0.0004,0,0.0013,0,0,26112 +1,4294967295,1483,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623191474.8,1,355.85,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.52,971,0,0,0.2039,0,0,6336 +1,4294967295,1484,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623191560.8,3.64,85,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.65,146305,0.0016,0,0.0023,0,0,26112 +1,4294967295,1485,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623191966,0.98,401.61,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.5,926,0,0,0.2138,0,0,6336 +1,4294967295,1486,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623192052.8,3.68,85.77,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.71,151585,0.0016,0,0.0022,0,0,26112 +1,4294967295,1487,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623192549.5,0.96,493.07,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.49,906,0,0,0.2185,0,0,6336 +1,4294967295,1488,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623192636.5,3.62,86.04,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.68,149619,0.0016,0,0.0022,0,0,26112 +1,4294967295,1489,16,atomic_memset-1_148_2208226_10_0,MemSet,AI_VECTOR_CORE,1699529623192955.2,1.2,315.13,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.74,1364,0,0,0.1452,0,0,6336 +1,4294967295,1490,16,ReduceSum149,ReduceSumD,AI_VECTOR_CORE,1699529623193043.5,7.96,87.05,48,0,"""6144""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,4.08,361934,0.0004,0,0.0009,0,0,19968 +1,4294967295,1491,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623193346,1.08,294.54,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.49,909,0,0,0.2178,0,0,6336 +1,4294967295,1492,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623193433.8,3.68,86.67,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.67,148446,0.0016,0,0.0023,0,0,26112 +1,4294967295,1493,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623193748.8,1.1,311.32,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.53,988,0,0,0.2004,0,0,6336 +1,4294967295,1494,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623193845.5,3.66,95.65,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.68,149231,0.0016,0,0.0023,0,0,26112 +1,4294967295,1495,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623194171,1.04,321.84,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.49,901,0,0,0.2198,0,0,6336 +1,4294967295,1496,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623194256.2,3.7,84.21,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.7,151114,0.0016,0,0.0022,0,0,26112 +1,4294967295,1497,16,atomic_memset-1_147_2208226_9_0,MemSet,AI_VECTOR_CORE,1699529623194561.5,1.12,301.55,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.56,1036,0,0,0.1911,0,0,6336 +1,4294967295,1498,16,ReduceSum148,ReduceSumD,AI_VECTOR_CORE,1699529623194647.2,3.66,84.63,48,0,"""4608""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.7,150921,0.001,0,0.0035,0,0,26112 +1,4294967295,1499,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623194957,0.98,306.09,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.45,833,0,0,0.2377,0,0,6336 +1,4294967295,1500,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623195042.5,3.68,84.52,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.7,151064,0.0016,0,0.0022,0,0,26112 +1,4294967295,1501,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623195343.5,1.06,297.32,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.51,945,0,0,0.2095,0,0,6336 +1,4294967295,1502,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623195429.2,3.68,84.69,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.75,155236,0.0015,0,0.0022,0,0,26112 +1,4294967295,1503,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623195735,0.98,302.07,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.47,864,0,0,0.2292,0,0,6336 +1,4294967295,1504,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623195820.2,3.62,84.27,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.67,148598,0.0016,0,0.0023,0,0,26112 +1,4294967295,1505,16,atomic_memset-1_148_2208226_10_0,MemSet,AI_VECTOR_CORE,1699529623196138.8,1.04,314.88,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.52,971,0,0,0.2039,0,0,6336 +1,4294967295,1506,16,ReduceSum149,ReduceSumD,AI_VECTOR_CORE,1699529623196224.2,3.62,84.46,48,0,"""6144""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.71,151579,0.0009,0,0.0022,0,0,19968 +1,4294967295,1507,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623196532.5,1.02,304.63,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.51,940,0,0,0.2106,0,0,6336 +1,4294967295,1508,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623196618,3.6,84.48,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.66,147605,0.0016,0,0.0023,0,0,26112 +1,4294967295,1509,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623196928.8,0.92,307.15,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.44,823,0,0,0.2406,0,0,6336 +1,4294967295,1510,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623197014.2,3.66,84.58,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.66,147811,0.0016,0,0.0023,0,0,26112 +1,4294967295,1511,16,atomic_memset-1_146_2208226_8_0,MemSet,AI_VECTOR_CORE,1699529623197322.8,1.12,304.84,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.63,1163,0,0,0.1702,0,0,6336 +1,4294967295,1512,16,ReduceSum147,ReduceSumD,AI_VECTOR_CORE,1699529623197408.2,3.68,84.38,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.73,154016,0.0016,0,0.0022,0,0,26112 +1,4294967295,1513,16,Mul150,Mul,AI_VECTOR_CORE,1699529623197673.2,314.42,261.32,48,0,"""78643200;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""78643200""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,309.99,27526929,0.0446,0,0,0,0,78655488 +1,4294967295,1514,16,Mul151,Mul,AI_VECTOR_CORE,1699529623198073.2,89.28,85.58,48,0,"""12582912;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12582912""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,87.16,7740167,0.0254,0,0.0006,0,0,12724224 +1,4294967295,1515,16,Mul152,Mul,AI_VECTOR_CORE,1699529623198263,357.46,100.47,48,0,"""56623104;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""56623104""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,354.95,31519921,0.0281,0,0,0,0,56635392 +1,4294967295,1516,16,Mul153,Mul,AI_VECTOR_CORE,1699529623198706.8,124.98,86.29,48,0,"""18874368;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""18874368""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,123.16,10936729,0.027,0,0.0006,0,0,19083264 +1,4294967295,1517,16,Mul154,Mul,AI_VECTOR_CORE,1699529623198917.5,474.8,85.77,48,0,"""75497472;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""75497472""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,471.45,41864403,0.0282,0,0.0006,0,0,76308480 +1,4294967295,1518,16,Mul154,Mul,AI_VECTOR_CORE,1699529623199477.8,470.96,85.45,48,0,"""75497472;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""75497472""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,467.36,41501789,0.0284,0,0.0006,0,0,76308480 +1,4294967295,1519,16,Mul152,Mul,AI_VECTOR_CORE,1699529623200034.2,354.46,85.54,48,0,"""56623104;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""56623104""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,350.97,31166296,0.0284,0,0,0,0,56635392 +1,4294967295,1520,16,Mul153,Mul,AI_VECTOR_CORE,1699529623200474.2,119.28,85.54,48,0,"""18874368;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""18874368""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,116.55,10349250,0.0285,0,0.0006,0,0,19083264 +1,4294967295,1521,16,Mul154,Mul,AI_VECTOR_CORE,1699529623200679,469.36,85.47,48,0,"""75497472;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""75497472""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,465.77,41360501,0.0285,0,0.0006,0,0,76308480 +1,4294967295,1522,16,Mul154,Mul,AI_VECTOR_CORE,1699529623201235.5,471.8,87.14,48,0,"""75497472;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""75497472""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,467.92,41551557,0.0284,0,0.0006,0,0,76308480 +1,4294967295,1523,16,Mul155,Mul,AI_VECTOR_CORE,1699529623201792.2,3.42,84.95,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.76,39093,0.0049,0,0.0018,0,0,14592 +1,4294967295,1524,16,Mul155,Mul,AI_VECTOR_CORE,1699529623201880,3.06,84.33,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.62,35963,0.0053,0,0.002,0,0,14592 +1,4294967295,1525,16,Mul156,Mul,AI_VECTOR_CORE,1699529623201967.5,2.12,84.44,5,0,"""4608;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""4608""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.28,11817,0.0061,0,0.0025,0,0,5568 +1,4294967295,1526,16,Mul155,Mul,AI_VECTOR_CORE,1699529623202054,3,84.38,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.76,39153,0.0049,0,0.0018,0,0,14592 +1,4294967295,1527,16,Mul155,Mul,AI_VECTOR_CORE,1699529623202141.5,2.6,84.5,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.35,29960,0.0064,0,0.0024,0,0,14592 +1,4294967295,1528,16,Mul155,Mul,AI_VECTOR_CORE,1699529623202228.5,1.62,84.4,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.88,19582,0.0098,0,0.0037,0,0,14592 +1,4294967295,1529,16,Mul157,Mul,AI_VECTOR_CORE,1699529623202314.5,2.24,84.38,6,0,"""6144;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""6144""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.34,14830,0.0065,0,0.0024,0,0,7296 +1,4294967295,1530,16,Mul155,Mul,AI_VECTOR_CORE,1699529623202415.5,1.94,98.76,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.95,20982,0.0092,0,0.0034,0,0,14592 +1,4294967295,1531,16,Mul155,Mul,AI_VECTOR_CORE,1699529623202518.8,1.96,101.31,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.07,23659,0.0081,0,0.003,0,0,14592 +1,4294967295,1532,16,Mul155,Mul,AI_VECTOR_CORE,1699529623202615.5,1.58,94.79,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.81,17930,0.0107,0,0.004,0,0,14592 +1,4294967295,1533,16,Mul156,Mul,AI_VECTOR_CORE,1699529623202701.8,1.82,84.67,5,0,"""4608;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""4608""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.14,10562,0.0068,0,0.0028,0,0,5568 +1,4294967295,1534,16,Mul155,Mul,AI_VECTOR_CORE,1699529623202788.5,1.96,84.93,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.1,24360,0.0079,0,0.003,0,0,14592 +1,4294967295,1535,16,Mul155,Mul,AI_VECTOR_CORE,1699529623202875,1.86,84.54,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.89,19664,0.0098,0,0.0037,0,0,14592 +1,4294967295,1536,16,Mul155,Mul,AI_VECTOR_CORE,1699529623202961,1.78,84.14,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.84,18582,0.0103,0,0.0039,0,0,14592 +1,4294967295,1537,16,Mul157,Mul,AI_VECTOR_CORE,1699529623203047.2,2.12,84.47,6,0,"""6144;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""6144""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.22,13499,0.0071,0,0.0027,0,0,7296 +1,4294967295,1538,16,Mul155,Mul,AI_VECTOR_CORE,1699529623203134,1.76,84.63,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.89,19704,0.0097,0,0.0037,0,0,14592 +1,4294967295,1539,16,Mul155,Mul,AI_VECTOR_CORE,1699529623203220.5,1.86,84.74,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.93,20574,0.0093,0,0.0035,0,0,14592 +1,4294967295,1540,16,Mul155,Mul,AI_VECTOR_CORE,1699529623203306.5,1.86,84.14,12,0,"""12288;1""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""12288""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.93,20673,0.0093,0,0.0035,0,0,14592 +1,4294967295,N/A,N/A,hcom_allReduce__935_0,hcom_allReduce_,HCCL,1699529623203395.5,3585.86,87.14,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,N/A,N/A,hcom_allReduce__328_2,hcom_allReduce_,HCCL,1699529623206988.5,5.78,7.14,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1545,16,atomic_memset-1_157_2208226_11_0,MemSet,AI_VECTOR_CORE,1699529623208680.2,1.46,1685.97,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.97,1796,0,0,0.1102,0,0,6336 +1,4294967295,1546,16,LpNormReduceV2158,LpNormReduceV2,AI_VECTOR_CORE,1699529623208768.5,396.14,86.79,48,0,"""78643200""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,392.58,34861469,0.1059,0,0.002,0,0,238434304 +1,4294967295,1547,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623209249.2,1.24,84.61,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.73,1345,0.0015,0,0.0045,0,0,320 +1,4294967295,1549,16,Square160,Square,AI_VECTOR_CORE,1699529623209335.5,1.34,85.01,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.81,1501,0.0007,0,0.004,0,0,256 +1,4294967295,1550,16,Add161,Add,AI_VECTOR_CORE,1699529623209457.2,1.46,120.41,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.91,1678,0.0006,0,0.0036,0,0,256 +1,4294967295,1551,16,atomic_memset-1_161_2208226_12_0,MemSet,AI_VECTOR_CORE,1699529623209829.5,1.3,370.79,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.76,1398,0,0,0.1416,0,0,6336 +1,4294967295,1552,16,LpNormReduceV2162,LpNormReduceV2,AI_VECTOR_CORE,1699529623209916.5,39.88,85.7,48,0,"""12582912""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,36.88,3274660,0.1826,0,0.0045,0,0,38736384 +1,4294967295,1553,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623210042,1.3,85.62,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.76,1403,0.0014,0,0.0043,0,0,320 +1,4294967295,1555,16,Square160,Square,AI_VECTOR_CORE,1699529623210229.2,1.5,185.95,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.95,1758,0.0006,0,0.0034,0,0,256 +1,4294967295,1556,16,Add163,Add,AI_VECTOR_CORE,1699529623210343,1.24,112.25,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.71,1315,0.0008,0,0.0046,0,0,256 +1,4294967295,1557,16,atomic_memset-1_163_2208226_13_0,MemSet,AI_VECTOR_CORE,1699529623210579.8,1.42,235.51,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.87,1601,0,0,0.1237,0,0,6336 +1,4294967295,1558,16,LpNormReduceV2164,LpNormReduceV2,AI_VECTOR_CORE,1699529623210668.2,141.96,87.08,48,0,"""56623104""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,138.82,12327282,0.2161,0,0.0042,0,0,172169216 +1,4294967295,1559,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623210894.8,1.3,84.54,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.77,1416,0.0014,0,0.0042,0,0,320 +1,4294967295,1561,16,Square160,Square,AI_VECTOR_CORE,1699529623211002.5,1.36,106.45,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.82,1520,0.0007,0,0.0039,0,0,256 +1,4294967295,1562,16,Add163,Add,AI_VECTOR_CORE,1699529623211109,1.44,105.14,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,1662,0.0006,0,0.0036,0,0,256 +1,4294967295,1563,16,atomic_memset-1_164_2208226_14_0,MemSet,AI_VECTOR_CORE,1699529623211348.8,1.22,238.31,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.78,1434,0,0,0.1381,0,0,6336 +1,4294967295,1564,16,LpNormReduceV2165,LpNormReduceV2,AI_VECTOR_CORE,1699529623211444.2,53.92,94.28,48,0,"""18874368""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,51.65,4586599,0.1952,0,0.0044,0,0,57927168 +1,4294967295,1565,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623211602.2,1.34,104.08,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.79,1470,0.0014,0,0.0041,0,0,320 +1,4294967295,1567,16,Square160,Square,AI_VECTOR_CORE,1699529623211806.2,1.14,202.66,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.7,1304,0.0008,0,0.0046,0,0,256 +1,4294967295,1568,16,Add163,Add,AI_VECTOR_CORE,1699529623211913,1.1,105.61,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.63,1172,0.0009,0,0.0051,0,0,256 +1,4294967295,1569,16,atomic_memset-1_165_2208226_15_0,MemSet,AI_VECTOR_CORE,1699529623212153.5,1.22,239.4,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.75,1380,0,0,0.1435,0,0,6336 +1,4294967295,1570,16,LpNormReduceV2166,LpNormReduceV2,AI_VECTOR_CORE,1699529623212242.5,187.98,87.78,48,0,"""75497472""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,182.68,16222215,0.2188,0,0.0042,0,0,229290240 +1,4294967295,1571,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623212515.2,1.18,84.77,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.71,1306,0.0015,0,0.0046,0,0,320 +1,4294967295,1573,16,Square160,Square,AI_VECTOR_CORE,1699529623212618.8,1.28,102.32,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.8,1471,0.0007,0,0.0041,0,0,256 +1,4294967295,1574,16,Add163,Add,AI_VECTOR_CORE,1699529623212727,1.46,106.97,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.99,1832,0.0005,0,0.0033,0,0,256 +1,4294967295,1575,16,atomic_memset-1_165_2208226_15_0,MemSet,AI_VECTOR_CORE,1699529623212965.5,1.08,237.04,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.58,1082,0,0,0.183,0,0,6336 +1,4294967295,1576,16,LpNormReduceV2166,LpNormReduceV2,AI_VECTOR_CORE,1699529623213052,181.48,85.42,48,0,"""75497472""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,177.84,15791890,0.2247,0,0.0043,0,0,229290240 +1,4294967295,1577,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623213318,1.22,84.52,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.74,1373,0.0015,0,0.0044,0,0,320 +1,4294967295,1579,16,Square160,Square,AI_VECTOR_CORE,1699529623213417.8,1.46,98.53,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.97,1794,0.0006,0,0.0033,0,0,256 +1,4294967295,1580,16,Add163,Add,AI_VECTOR_CORE,1699529623213534,1.5,114.79,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1,1851,0.0005,0,0.0032,0,0,256 +1,4294967295,1581,16,atomic_memset-1_163_2208226_13_0,MemSet,AI_VECTOR_CORE,1699529623213771.5,1.26,236,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.76,1407,0,0,0.1407,0,0,6336 +1,4294967295,1582,16,LpNormReduceV2164,LpNormReduceV2,AI_VECTOR_CORE,1699529623213858.5,138.68,85.74,48,0,"""56623104""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,134.71,11962518,0.2227,0,0.0043,0,0,172169216 +1,4294967295,1583,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623214081.8,1.22,84.57,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.7,1304,0.0015,0,0.0046,0,0,320 +1,4294967295,1585,16,Square160,Square,AI_VECTOR_CORE,1699529623214222.8,1.46,139.78,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.98,1812,0.0006,0,0.0033,0,0,256 +1,4294967295,1586,16,Add163,Add,AI_VECTOR_CORE,1699529623214330.2,1.44,106.04,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.95,1761,0.0006,0,0.0034,0,0,256 +1,4294967295,1587,16,atomic_memset-1_164_2208226_14_0,MemSet,AI_VECTOR_CORE,1699529623214552,1.08,220.31,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.58,1080,0,0,0.1833,0,0,6336 +1,4294967295,1588,16,LpNormReduceV2165,LpNormReduceV2,AI_VECTOR_CORE,1699529623214638.8,47.92,85.67,48,0,"""18874368""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,45.66,4054876,0.2207,0,0.0049,0,0,57927168 +1,4294967295,1589,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623214771.8,1.12,85.08,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.62,1143,0.0017,0,0.0052,0,0,320 +1,4294967295,1591,16,Square160,Square,AI_VECTOR_CORE,1699529623215020,1.22,247.13,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.72,1335,0.0007,0,0.0045,0,0,256 +1,4294967295,1592,16,Add163,Add,AI_VECTOR_CORE,1699529623215145.5,1.12,124.28,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.6,1116,0.0009,0,0.0054,0,0,256 +1,4294967295,1593,16,atomic_memset-1_165_2208226_15_0,MemSet,AI_VECTOR_CORE,1699529623215388.5,1.34,241.88,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.82,1522,0,0,0.1301,0,0,6336 +1,4294967295,1594,16,LpNormReduceV2166,LpNormReduceV2,AI_VECTOR_CORE,1699529623215475.8,182.24,85.91,48,0,"""75497472""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,178.91,15887314,0.2234,0,0.0043,0,0,229290240 +1,4294967295,1595,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623215748.2,1.2,90.26,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.69,1282,0.0016,0,0.0047,0,0,320 +1,4294967295,1597,16,Square160,Square,AI_VECTOR_CORE,1699529623215836.5,1.7,87.05,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.17,2173,0.0005,0,0.0028,0,0,256 +1,4294967295,1598,16,Add163,Add,AI_VECTOR_CORE,1699529623215924.8,1.5,86.55,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.97,1800,0.0006,0,0.0033,0,0,256 +1,4294967295,1599,16,atomic_memset-1_165_2208226_15_0,MemSet,AI_VECTOR_CORE,1699529623216141.8,1.14,215.5,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.63,1172,0,0,0.1689,0,0,6336 +1,4294967295,1600,16,LpNormReduceV2166,LpNormReduceV2,AI_VECTOR_CORE,1699529623216230.2,181.3,87.36,48,0,"""75497472""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,176.99,15716480,0.2258,0,0.0043,0,0,229290240 +1,4294967295,1601,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623216496,1.6,84.45,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.04,1929,0.001,0,0.0031,0,0,320 +1,4294967295,1603,16,Square160,Square,AI_VECTOR_CORE,1699529623216641.5,1.42,143.9,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.93,1713,0.0006,0,0.0035,0,0,256 +1,4294967295,1604,16,Add163,Add,AI_VECTOR_CORE,1699529623216748.2,1.48,105.33,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.99,1834,0.0005,0,0.0033,0,0,256 +1,4294967295,1605,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623216996.2,1.08,246.52,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.59,1093,0,0,0.1812,0,0,6336 +1,4294967295,1606,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623217085,9.32,87.67,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,4.81,426930,0.0015,0,0.001,0,0,53760 +1,4294967295,1607,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623217179,1.1,84.68,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.63,1164,0.0017,0,0.0052,0,0,320 +1,4294967295,1609,16,Square160,Square,AI_VECTOR_CORE,1699529623217443.8,1.18,263.65,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.69,1269,0.0008,0,0.0047,0,0,256 +1,4294967295,1610,16,Add163,Add,AI_VECTOR_CORE,1699529623217550.8,1.08,105.82,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.6,1101,0.0009,0,0.0054,0,0,256 +1,4294967295,1611,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623217759,1,207.17,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.49,903,0,0,0.2193,0,0,6336 +1,4294967295,1612,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623217845.8,3.78,85.75,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.82,161523,0.0039,0,0.0027,0,0,53760 +1,4294967295,1613,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623217934,1.3,84.47,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.79,1456,0.0014,0,0.0041,0,0,320 +1,4294967295,1615,16,Square160,Square,AI_VECTOR_CORE,1699529623218210.8,1.2,275.45,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.68,1260,0.0008,0,0.0048,0,0,256 +1,4294967295,1616,16,Add163,Add,AI_VECTOR_CORE,1699529623218311.2,1.14,99.3,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.61,1129,0.0009,0,0.0053,0,0,256 +1,4294967295,1617,16,atomic_memset-1_167_2208226_17_0,MemSet,AI_VECTOR_CORE,1699529623218559.5,1.32,247.11,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.79,1468,0,0,0.1349,0,0,6336 +1,4294967295,1618,16,LpNormReduceV2168,LpNormReduceV2,AI_VECTOR_CORE,1699529623218646.5,8.42,85.68,48,0,"""4608""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,4.19,372346,0.0009,0,0.0017,0,0,41472 +1,4294967295,1619,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623218739.8,1.14,84.83,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.61,1136,0.0018,0,0.0053,0,0,320 +1,4294967295,1621,16,Square160,Square,AI_VECTOR_CORE,1699529623218983.5,1.22,242.61,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.68,1266,0.0008,0,0.0047,0,0,256 +1,4294967295,1622,16,Add163,Add,AI_VECTOR_CORE,1699529623219100.5,1.14,115.78,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.59,1093,0.0009,0,0.0055,0,0,256 +1,4294967295,1623,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623219311.8,1.06,210.11,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.53,979,0,0,0.2022,0,0,6336 +1,4294967295,1624,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623219399,3.68,86.19,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.71,151624,0.0041,0,0.0028,0,0,53760 +1,4294967295,1625,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623219501,1.1,98.32,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.6,1105,0.0018,0,0.0054,0,0,320 +1,4294967295,1627,16,Square160,Square,AI_VECTOR_CORE,1699529623219745.8,1.2,243.65,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.69,1272,0.0008,0,0.0047,0,0,256 +1,4294967295,1628,16,Add163,Add,AI_VECTOR_CORE,1699529623219941.2,1.14,194.3,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.6,1104,0.0009,0,0.0054,0,0,256 +1,4294967295,1629,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623220174,1.04,231.61,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.48,886,0,0,0.2235,0,0,6336 +1,4294967295,1630,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623220260.2,3.66,85.21,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.69,150508,0.0041,0,0.0029,0,0,53760 +1,4294967295,1631,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623220350.8,1.14,86.84,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.62,1145,0.0017,0,0.0052,0,0,320 +1,4294967295,1633,16,Square160,Square,AI_VECTOR_CORE,1699529623220613.2,1.26,261.36,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.71,1306,0.0008,0,0.0046,0,0,256 +1,4294967295,1634,16,Add163,Add,AI_VECTOR_CORE,1699529623220741.2,1.36,126.74,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.81,1496,0.0007,0,0.004,0,0,256 +1,4294967295,1635,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623220911.2,1.04,168.64,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.54,992,0,0,0.1996,0,0,6336 +1,4294967295,1636,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623220997,3.62,84.71,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.65,146389,0.0043,0,0.003,0,0,53760 +1,4294967295,1637,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623221087.2,1.18,86.63,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.61,1123,0.0018,0,0.0053,0,0,320 +1,4294967295,1639,16,Square160,Square,AI_VECTOR_CORE,1699529623221302.2,1.2,213.82,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.69,1268,0.0008,0,0.0047,0,0,256 +1,4294967295,1640,16,Add163,Add,AI_VECTOR_CORE,1699529623221419.2,1.12,115.8,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.61,1134,0.0009,0,0.0053,0,0,256 +1,4294967295,1641,16,atomic_memset-1_168_2208226_18_0,MemSet,AI_VECTOR_CORE,1699529623221614.5,1.22,194.13,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.7,1303,0,0,0.152,0,0,6336 +1,4294967295,1642,16,LpNormReduceV2169,LpNormReduceV2,AI_VECTOR_CORE,1699529623221721.8,8.48,106.03,48,0,"""6144""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,4.5,399590,0.0008,0,0.0011,0,0,35328 +1,4294967295,1643,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623221835.8,1.16,105.52,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.66,1213,0.0016,0,0.0049,0,0,320 +1,4294967295,1645,16,Square160,Square,AI_VECTOR_CORE,1699529623221974.8,1.16,137.84,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.65,1194,0.0008,0,0.005,0,0,256 +1,4294967295,1646,16,Add163,Add,AI_VECTOR_CORE,1699529623222062,1.12,86.09,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.61,1126,0.0009,0,0.0053,0,0,256 +1,4294967295,1647,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623222239.8,1.04,176.63,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.51,946,0,0,0.2093,0,0,6336 +1,4294967295,1648,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623222328.2,3.62,87.46,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.7,151241,0.0041,0,0.0029,0,0,53760 +1,4294967295,1649,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623222417.8,1.22,85.88,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.73,1354,0.0015,0,0.0044,0,0,320 +1,4294967295,1651,16,Square160,Square,AI_VECTOR_CORE,1699529623222606,1.22,187.03,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.68,1264,0.0008,0,0.0047,0,0,256 +1,4294967295,1652,16,Add163,Add,AI_VECTOR_CORE,1699529623222722,1.16,114.78,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.61,1120,0.0009,0,0.0054,0,0,256 +1,4294967295,1653,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623222909.2,1,186.09,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.44,823,0,0,0.2406,0,0,6336 +1,4294967295,1654,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623222998,3.72,87.75,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.7,151377,0.0041,0,0.0029,0,0,53760 +1,4294967295,1655,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623223086.8,1.36,85.03,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.8,1480,0.0014,0,0.0041,0,0,320 +1,4294967295,1657,16,Square160,Square,AI_VECTOR_CORE,1699529623223315.5,1.38,227.39,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.83,1535,0.0007,0,0.0039,0,0,256 +1,4294967295,1658,16,Add163,Add,AI_VECTOR_CORE,1699529623223427.5,1.16,110.62,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.61,1125,0.0009,0,0.0053,0,0,256 +1,4294967295,1659,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623223662.8,1.06,234.09,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.48,892,0,0,0.222,0,0,6336 +1,4294967295,1660,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623223751,3.72,87.19,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.72,152772,0.0041,0,0.0028,0,0,53760 +1,4294967295,1661,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623223839.8,1.12,85.03,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.59,1086,0.0018,0,0.0055,0,0,320 +1,4294967295,1663,16,Square160,Square,AI_VECTOR_CORE,1699529623224117,1.24,276.13,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.7,1299,0.0008,0,0.0046,0,0,256 +1,4294967295,1664,16,Add163,Add,AI_VECTOR_CORE,1699529623224204.8,1.18,86.51,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.62,1144,0.0009,0,0.0052,0,0,256 +1,4294967295,1665,16,atomic_memset-1_167_2208226_17_0,MemSet,AI_VECTOR_CORE,1699529623224377,1.06,171.07,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.49,906,0,0,0.2185,0,0,6336 +1,4294967295,1666,16,LpNormReduceV2168,LpNormReduceV2,AI_VECTOR_CORE,1699529623224464.5,3.64,86.44,48,0,"""4608""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.7,150536,0.0022,0,0.0041,0,0,41472 +1,4294967295,1667,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623224556,1.32,87.86,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.78,1446,0.0014,0,0.0041,0,0,320 +1,4294967295,1669,16,Square160,Square,AI_VECTOR_CORE,1699529623224745,1.42,187.68,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.87,1611,0.0006,0,0.0037,0,0,256 +1,4294967295,1670,16,Add163,Add,AI_VECTOR_CORE,1699529623224868.5,1.22,122.08,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.67,1235,0.0008,0,0.0049,0,0,256 +1,4294967295,1671,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623225069,1,199.28,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.51,952,0,0,0.208,0,0,6336 +1,4294967295,1672,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623225156.5,3.7,86.5,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.72,152522,0.0041,0,0.0028,0,0,53760 +1,4294967295,1673,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623225246.2,1.22,86.05,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.63,1171,0.0017,0,0.0051,0,0,320 +1,4294967295,1675,16,Square160,Square,AI_VECTOR_CORE,1699529623225400.2,1.34,152.78,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.86,1598,0.0006,0,0.0038,0,0,256 +1,4294967295,1676,16,Add163,Add,AI_VECTOR_CORE,1699529623225515.2,1.1,113.66,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.64,1184,0.0008,0,0.0051,0,0,256 +1,4294967295,1677,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623225713,1,196.65,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.51,941,0,0,0.2104,0,0,6336 +1,4294967295,1678,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623225801.5,3.6,87.5,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.66,147832,0.0042,0,0.0029,0,0,53760 +1,4294967295,1679,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623225890,1.12,84.9,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.64,1181,0.0017,0,0.0051,0,0,320 +1,4294967295,1681,16,Square160,Square,AI_VECTOR_CORE,1699529623226110.5,1.36,219.38,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.89,1649,0.0006,0,0.0036,0,0,256 +1,4294967295,1682,16,Add163,Add,AI_VECTOR_CORE,1699529623226199,1.14,87.14,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.66,1230,0.0008,0,0.0049,0,0,256 +1,4294967295,1683,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623226382,1,181.86,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.51,939,0,0,0.2109,0,0,6336 +1,4294967295,1684,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623226488.5,3.66,105.5,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.69,150313,0.0042,0,0.0029,0,0,53760 +1,4294967295,1685,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623226580,1.1,87.84,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.61,1128,0.0018,0,0.0053,0,0,320 +1,4294967295,1687,16,Square160,Square,AI_VECTOR_CORE,1699529623226869.8,1.4,288.65,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,1670,0.0006,0,0.0036,0,0,256 +1,4294967295,1688,16,Add163,Add,AI_VECTOR_CORE,1699529623226957.8,1.1,86.6,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.62,1150,0.0009,0,0.0052,0,0,256 +1,4294967295,1689,16,atomic_memset-1_168_2208226_18_0,MemSet,AI_VECTOR_CORE,1699529623227129.2,1.16,170.4,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.67,1246,0,0,0.1589,0,0,6336 +1,4294967295,1690,16,LpNormReduceV2169,LpNormReduceV2,AI_VECTOR_CORE,1699529623227216.8,3.98,86.34,48,0,"""6144""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.98,175705,0.0019,0,0.0025,0,0,35328 +1,4294967295,1691,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623227307.2,1.14,86.52,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.63,1165,0.0017,0,0.0052,0,0,320 +1,4294967295,1693,16,Square160,Square,AI_VECTOR_CORE,1699529623227522.2,1.24,213.86,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.72,1337,0.0007,0,0.0045,0,0,256 +1,4294967295,1694,16,Add163,Add,AI_VECTOR_CORE,1699529623227653.5,1.32,130.01,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.79,1456,0.0007,0,0.0041,0,0,256 +1,4294967295,1695,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623227865.5,1.04,210.68,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.49,907,0,0,0.2183,0,0,6336 +1,4294967295,1696,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623227954.2,3.66,87.71,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.71,151772,0.0041,0,0.0028,0,0,53760 +1,4294967295,1697,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623228044,1.16,86.09,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.65,1200,0.0017,0,0.005,0,0,320 +1,4294967295,1699,16,Square160,Square,AI_VECTOR_CORE,1699529623228246,1.4,200.84,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.87,1609,0.0006,0,0.0037,0,0,256 +1,4294967295,1700,16,Add163,Add,AI_VECTOR_CORE,1699529623228338.5,1.32,91.1,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.81,1505,0.0007,0,0.004,0,0,256 +1,4294967295,1701,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623228530.2,1.12,190.43,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.58,1082,0,0,0.183,0,0,6336 +1,4294967295,1702,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623228617.8,3.68,86.38,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.71,152055,0.0041,0,0.0028,0,0,53760 +1,4294967295,1703,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623228707.5,1.08,86.07,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.58,1065,0.0019,0,0.0056,0,0,320 +1,4294967295,1705,16,Square160,Square,AI_VECTOR_CORE,1699529623228935.5,1.26,226.92,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.74,1374,0.0007,0,0.0044,0,0,256 +1,4294967295,1706,16,Add163,Add,AI_VECTOR_CORE,1699529623229032.8,1.12,95.99,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.6,1118,0.0009,0,0.0054,0,0,256 +1,4294967295,1707,16,atomic_memset-1_166_2208226_16_0,MemSet,AI_VECTOR_CORE,1699529623229217.2,1.04,183.38,1,0,"""""",UNDEFINED,NULL,"""""",UNDEFINED,NULL,N/A,0,0,0,0,0,0,0.49,909,0,0,0.2178,0,0,6336 +1,4294967295,1708,16,LpNormReduceV2167,LpNormReduceV2,AI_VECTOR_CORE,1699529623229306,3.8,87.71,48,0,"""12288""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.85,164313,0.0038,0,0.0026,0,0,53760 +1,4294967295,1709,16,LpNormUpdateV2159,LpNormUpdateV2,AI_VECTOR_CORE,1699529623229396,1.18,86.2,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.63,1159,0.0017,0,0.0052,0,0,320 +1,4294967295,1711,16,Square160,Square,AI_VECTOR_CORE,1699529623229570.8,1.28,173.57,1,0,"""""",FLOAT,FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.72,1328,0.0008,0,0.0045,0,0,256 +1,4294967295,1712,16,Add163,Add,AI_VECTOR_CORE,1699529623229666.2,1.2,94.22,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.64,1184,0.0008,0,0.0051,0,0,256 +1,4294967295,N/A,N/A,hcom_allReduce__935_1,hcom_allReduce_,HCCL,1699529623230256,32.2,588.55,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1715,16,ApplyAdamW175,ApplyAdamW,AI_VECTOR_CORE,1699529623231117,1724.88,828.8,48,0,"""78643200;78643200;78643200;;;;;;;;78643200""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""78643200;78643200;78643200""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,1713.82,152187435,0.2099,0,0.0257,0,0,2170011648 +1,4294967295,1716,16,ApplyAdamW176,ApplyAdamW,AI_VECTOR_CORE,1699529623232927.2,322.4,85.37,48,0,"""12582912;12582912;12582912;;;;;;;;12582912""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12582912;12582912;12582912""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,315.97,28058356,0.1822,0,0.0226,0,0,347452416 +1,4294967295,1717,16,ApplyAdamW177,ApplyAdamW,AI_VECTOR_CORE,1699529623233335.8,1367.04,86.1,48,0,"""56623104;56623104;56623104;;;;;;;;56623104""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""56623104;56623104;56623104""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,1353.77,120215198,0.1914,0,0.0237,0,0,1563460608 +1,4294967295,1718,16,ApplyAdamW178,ApplyAdamW,AI_VECTOR_CORE,1699529623234789.8,464.14,86.96,48,0,"""18874368;18874368;18874368;;;;;;;;18874368""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""18874368;18874368;18874368""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,457.45,40621166,0.1888,0,0.0234,0,0,521167872 +1,4294967295,1719,16,ApplyAdamW179,ApplyAdamW,AI_VECTOR_CORE,1699529623235339.2,1781.98,85.36,48,0,"""75497472;75497472;75497472;;;;;;;;75497472""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""75497472;75497472;75497472""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,1770.41,157212631,0.1951,0,0.0242,0,0,2084606976 +1,4294967295,1720,16,ApplyAdamW179,ApplyAdamW,AI_VECTOR_CORE,1699529623237206.2,1788.54,85.02,48,0,"""75497472;75497472;75497472;;;;;;;;75497472""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""75497472;75497472;75497472""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,1777.44,157836297,0.1943,0,0.0241,0,0,2084606976 +1,4294967295,1721,16,ApplyAdamW177,ApplyAdamW,AI_VECTOR_CORE,1699529623239080.2,1356.64,85.46,48,0,"""56623104;56623104;56623104;;;;;;;;56623104""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""56623104;56623104;56623104""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,1345.74,119502114,0.1925,0,0.0239,0,0,1563460608 +1,4294967295,1722,16,ApplyAdamW178,ApplyAdamW,AI_VECTOR_CORE,1699529623240522.2,474.82,85.36,48,0,"""18874368;18874368;18874368;;;;;;;;18874368""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""18874368;18874368;18874368""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,467.92,41551533,0.1845,0,0.0229,0,0,521167872 +1,4294967295,1723,16,ApplyAdamW179,ApplyAdamW,AI_VECTOR_CORE,1699529623241082.8,1793.06,85.68,48,0,"""75497472;75497472;75497472;;;;;;;;75497472""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""75497472;75497472;75497472""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,1777.43,157835996,0.1943,0,0.0241,0,0,2084606976 +1,4294967295,1724,16,ApplyAdamW179,ApplyAdamW,AI_VECTOR_CORE,1699529623242981.2,1763.92,105.44,48,0,"""75497472;75497472;75497472;;;;;;;;75497472""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""75497472;75497472;75497472""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,1750.36,155431826,0.1973,0,0.0245,0,0,2084606976 +1,4294967295,1725,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623244831.8,6.36,86.58,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,4.4,97646,0.0516,0,0.0081,0,0,347904 +1,4294967295,1726,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623244924.5,6.74,86.39,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,4.79,106353,0.0474,0,0.0074,0,0,347904 +1,4294967295,1727,16,ApplyAdamW184,ApplyAdamW,AI_VECTOR_CORE,1699529623245016.2,4.2,85.01,5,0,"""4608;4608;4608;;;;;;;;4608""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""4608;4608;4608""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,2.97,27479,0.0689,0,0.0111,0,0,130880 +1,4294967295,1728,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623245122.5,5.58,102.05,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.97,88157,0.0572,0,0.009,0,0,347904 +1,4294967295,1729,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623245232.5,5.72,104.42,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,4.16,92449,0.0545,0,0.0086,0,0,347904 +1,4294967295,1730,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623245325.5,6.4,87.28,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.88,86092,0.0585,0,0.0092,0,0,347904 +1,4294967295,1731,16,ApplyAdamW185,ApplyAdamW,AI_VECTOR_CORE,1699529623245417,4.28,85.1,6,0,"""6144;6144;6144;;;;;;;;6144""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""6144;6144;6144""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.25,36023,0.07,0,0.011,0,0,173952 +1,4294967295,1732,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623245507.5,5.38,86.22,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,4.16,92297,0.0546,0,0.0086,0,0,347904 +1,4294967295,1733,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623245599.2,5.78,86.37,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.65,81035,0.0622,0,0.0098,0,0,347904 +1,4294967295,1734,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623245690,6.38,84.97,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.92,86966,0.058,0,0.0091,0,0,347904 +1,4294967295,1735,16,ApplyAdamW184,ApplyAdamW,AI_VECTOR_CORE,1699529623245783.5,3.68,87.12,5,0,"""4608;4608;4608;;;;;;;;4608""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""4608;4608;4608""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,2.7,24996,0.0757,0,0.0122,0,0,130880 +1,4294967295,1736,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623245874,5.8,86.82,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.65,81135,0.0621,0,0.0098,0,0,347904 +1,4294967295,1737,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623245966.5,6,86.7,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.54,78674,0.0641,0,0.0101,0,0,347904 +1,4294967295,1738,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623246057.8,6.48,85.25,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.94,87462,0.0576,0,0.0091,0,0,347904 +1,4294967295,1739,16,ApplyAdamW185,ApplyAdamW,AI_VECTOR_CORE,1699529623246150.5,4.48,86.27,6,0,"""6144;6144;6144;;;;;;;;6144""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""6144;6144;6144""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.11,34482,0.0731,0,0.0115,0,0,173952 +1,4294967295,1740,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623246241.5,6.14,86.52,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.9,86595,0.0582,0,0.0091,0,0,347904 +1,4294967295,1741,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623246333,6.04,85.36,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,3.63,80544,0.0626,0,0.0098,0,0,347904 +1,4294967295,1742,16,ApplyAdamW183,ApplyAdamW,AI_VECTOR_CORE,1699529623246424,6.52,84.96,12,0,"""12288;12288;12288;;;;;;;;12288""",FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND;FORMAT_ND,"""12288;12288;12288""",FLOAT;FLOAT;FLOAT,FORMAT_ND;FORMAT_ND;FORMAT_ND,N/A,0,0,0,0,0,0,4.19,93050,0.0542,0,0.0085,0,0,347904 +1,4294967295,1743,16,Cast186,Cast,AI_VECTOR_CORE,1699529623246517,433.2,86.48,48,0,"""78643200""",FLOAT,FORMAT_ND,"""78643200""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,429.72,38158727,0,0,0,0,0,12288 +1,4294967295,1745,16,Cast187,Cast,AI_VECTOR_CORE,1699529623247253,62.82,302.8,48,0,"""12582912""",FLOAT,FORMAT_ND,"""12582912""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,60.21,5346817,0,0,0.0004,0,0,73728 +1,4294967295,1747,16,Cast188,Cast,AI_VECTOR_CORE,1699529623247454.2,269.44,138.43,48,0,"""56623104""",FLOAT,FORMAT_ND,"""56623104""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,266.37,23654007,0,0,0.0004,0,0,307200 +1,4294967295,1749,16,Cast189,Cast,AI_VECTOR_CORE,1699529623247966.2,94.76,242.56,48,0,"""18874368""",FLOAT,FORMAT_ND,"""18874368""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,92.17,8184321,0,0,0,0,0,12288 +1,4294967295,1751,16,Cast190,Cast,AI_VECTOR_CORE,1699529623248196.5,363.14,135.49,48,0,"""75497472""",FLOAT,FORMAT_ND,"""75497472""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,360.53,32014876,0,0,0.0004,0,0,405504 +1,4294967295,1753,16,Cast190,Cast,AI_VECTOR_CORE,1699529623248850.5,358.56,290.86,48,0,"""75497472""",FLOAT,FORMAT_ND,"""75497472""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,355.24,31545668,0,0,0.0004,0,0,405504 +1,4294967295,1755,16,Cast188,Cast,AI_VECTOR_CORE,1699529623249499,262.76,289.94,48,0,"""56623104""",FLOAT,FORMAT_ND,"""56623104""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,259.14,23011775,0,0,0.0004,0,0,307200 +1,4294967295,1757,16,Cast189,Cast,AI_VECTOR_CORE,1699529623250000.5,89.78,238.74,48,0,"""18874368""",FLOAT,FORMAT_ND,"""18874368""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,86.54,7684758,0,0,0,0,0,12288 +1,4294967295,1759,16,Cast190,Cast,AI_VECTOR_CORE,1699529623250225,356.14,134.72,48,0,"""75497472""",FLOAT,FORMAT_ND,"""75497472""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,352.76,31325528,0,0,0.0004,0,0,405504 +1,4294967295,1761,16,Cast190,Cast,AI_VECTOR_CORE,1699529623250875,359.52,293.86,48,0,"""75497472""",FLOAT,FORMAT_ND,"""75497472""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,355.56,31573754,0,0,0.0004,0,0,405504 +1,4294967295,1763,16,Cast45,Cast,AI_VECTOR_CORE,1699529623251528,2.86,293.48,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,1.5,33339,0,0,0.0022,0,0,2304 +1,4294967295,1765,16,Cast45,Cast,AI_VECTOR_CORE,1699529623251618,2.66,87.14,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,1.52,33635,0,0,0.0021,0,0,2304 +1,4294967295,1767,16,Cast191,Cast,AI_VECTOR_CORE,1699529623251708,2.36,87.34,5,0,"""4608""",FLOAT,FORMAT_ND,"""4608""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,1.46,13459,0,0,0.0022,0,0,960 +1,4294967295,1769,16,Cast45,Cast,AI_VECTOR_CORE,1699529623251797,2.76,86.64,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,1.53,34030,0,0,0.0021,0,0,2304 +1,4294967295,1771,16,Cast45,Cast,AI_VECTOR_CORE,1699529623251885.8,2.32,85.99,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,1.18,26184,0,0,0.0027,0,0,2304 +1,4294967295,1773,16,Cast45,Cast,AI_VECTOR_CORE,1699529623251979,1.6,90.93,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.81,17964,0,0,0.004,0,0,2304 +1,4294967295,1775,16,Cast192,Cast,AI_VECTOR_CORE,1699529623252085,2.34,104.4,6,0,"""6144""",FLOAT,FORMAT_ND,"""6144""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,1.31,14579,0,0,0.0025,0,0,1152 +1,4294967295,1777,16,Cast45,Cast,AI_VECTOR_CORE,1699529623252192.8,2.04,105.41,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,19948,0,0,0.0036,0,0,2304 +1,4294967295,1779,16,Cast45,Cast,AI_VECTOR_CORE,1699529623252283,1.86,88.21,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.91,20169,0,0,0.0036,0,0,2304 +1,4294967295,1781,16,Cast45,Cast,AI_VECTOR_CORE,1699529623252371.8,1.7,86.89,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.88,19430,0,0,0.0037,0,0,2304 +1,4294967295,1783,16,Cast191,Cast,AI_VECTOR_CORE,1699529623252460.5,1.88,87.05,5,0,"""4608""",FLOAT,FORMAT_ND,"""4608""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,1.09,10064,0,0,0.003,0,0,960 +1,4294967295,1785,16,Cast45,Cast,AI_VECTOR_CORE,1699529623252548,1.64,85.62,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.83,18361,0,0,0.0039,0,0,2304 +1,4294967295,1787,16,Cast45,Cast,AI_VECTOR_CORE,1699529623252635.5,1.8,85.86,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.86,19038,0,0,0.0038,0,0,2304 +1,4294967295,1789,16,Cast45,Cast,AI_VECTOR_CORE,1699529623252724.5,1.72,87.2,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.84,18701,0,0,0.0039,0,0,2304 +1,4294967295,1791,16,Cast192,Cast,AI_VECTOR_CORE,1699529623252813.2,2.02,87.03,6,0,"""6144""",FLOAT,FORMAT_ND,"""6144""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,1.28,14252,0,0,0.0025,0,0,1152 +1,4294967295,1793,16,Cast45,Cast,AI_VECTOR_CORE,1699529623252901,1.72,85.73,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.85,18955,0,0,0.0038,0,0,2304 +1,4294967295,1795,16,Cast45,Cast,AI_VECTOR_CORE,1699529623252988.5,1.76,85.78,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,20064,0,0,0.0036,0,0,2304 +1,4294967295,1797,16,Cast45,Cast,AI_VECTOR_CORE,1699529623253077,1.8,86.74,12,0,"""12288""",FLOAT,FORMAT_ND,"""12288""",FLOAT16,FORMAT_ND,N/A,0,0,0,0,0,0,0.88,19638,0,0,0.0037,0,0,2304 +1,4294967295,N/A,N/A,hcom_allGather__328_3,hcom_allGather_,HCCL,1699529623253167.5,4628.6,88.7,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1829,16,Add161,Add,AI_VECTOR_CORE,1699529623259334.8,1.42,1538.65,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.93,1718,0.0006,0,0.0035,0,0,256 +1,4294967295,1830,16,RealDiv138,RealDiv,AI_VECTOR_CORE,1699529623259421.2,1.36,85.08,1,0,""";""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.88,1620,0.0006,0,0.0037,0,0,256 +1,4294967295,1831,16,Add193,Add,AI_VECTOR_CORE,1699529623259957.2,1.42,534.64,1,0,"""1;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""1""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,1673,0.0006,0,0.0036,0,0,256 +1,4294967295,1832,16,OnesLike25,OnesLike,AI_VECTOR_CORE,1699529623260105.8,1.28,147.08,1,0,"""1""",FLOAT,FORMAT_ND,"""1""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.77,1416,0,0,0.0049,0,0,224 +1,4294967295,N/A,N/A,hcom_allReduce__935_2,hcom_allReduce_,HCCL,1699529623260552.2,3132.44,445.22,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1835,16,OnesLike25,OnesLike,AI_VECTOR_CORE,1699529623264254,1.14,569.31,1,0,"""1""",FLOAT,FORMAT_ND,"""1""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.62,1145,0,0,0.0061,0,0,224 +1,4294967295,N/A,N/A,hcom_allReduce__935_3,hcom_allReduce_,HCCL,1699529623264711.2,176.04,456.11,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1838,16,ZerosLike194,ZerosLike,AI_VECTOR_CORE,1699529623265798.2,1.24,910.96,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8,25""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.72,1328,0,0,0.0075,0,0,320 +1,4294967295,N/A,N/A,hcom_allGather__935_4,hcom_allGather_,HCCL,1699529623266568.2,298.52,768.76,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A +1,4294967295,1841,16,StridedSlice195,StridedSliceD,AI_VECTOR_CORE,1699529623267078.5,1.52,211.73,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.99,1837,0,0,0.0071,0,0,416 +1,4294967295,1842,16,Greater56,Greater,AI_VECTOR_CORE,1699529623267251.8,1.56,171.73,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,1.03,1911,0.001,0,0.0068,0.001,0,800 +1,4294967295,1844,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623268645.5,8.02,1392.19,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,4.19,371651,0,0,0.0005,0,0,6144 +1,4294967295,1844,16,NonZero57,NonZero,MIX_AIV,1699529623268654.8,4.74,1.23,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,4.07,60293,0.0003,0.0003,0.0027,0.0001,0,8192 +1,4294967295,1845,16,StridedSlice195,StridedSliceD,AI_VECTOR_CORE,1699529623269174.8,1.3,515.26,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.83,1533,0,0,0.0085,0,0,416 +1,4294967295,1846,16,StridedSlice196,StridedSliceD,AI_VECTOR_CORE,1699529623269938.8,1.48,762.7,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.99,1826,0,0,0.0071,0,0,416 +1,4294967295,1847,16,Greater56,Greater,AI_VECTOR_CORE,1699529623270193.5,1.28,253.27,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.81,1502,0.0013,0,0.0087,0.0013,0,800 +1,4294967295,1849,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623270414.2,4.08,219.47,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.29,203347,0,0,0.0009,0,0,6144 +1,4294967295,1849,16,NonZero57,NonZero,MIX_AIV,1699529623270419.5,4.2,1.17,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.52,52026,0.0003,0.0003,0.0031,0.0002,0,8192 +1,4294967295,1850,16,StridedSlice196,StridedSliceD,AI_VECTOR_CORE,1699529623271010,1.32,586.3,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.83,1527,0,0,0.0085,0,0,416 +1,4294967295,1851,16,StridedSlice197,StridedSliceD,AI_VECTOR_CORE,1699529623271703.5,1.58,692.18,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.07,1979,0,0,0.0066,0,0,416 +1,4294967295,1852,16,Greater56,Greater,AI_VECTOR_CORE,1699529623271869,1.16,163.92,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.66,1226,0.0016,0,0.0106,0.0016,0,800 +1,4294967295,1854,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623272041.8,4,171.59,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.29,203329,0,0,0.0009,0,0,6144 +1,4294967295,1854,16,NonZero57,NonZero,MIX_AIV,1699529623272047,4.36,1.25,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.67,54349,0.0003,0.0003,0.0029,0.0001,0,8192 +1,4294967295,1855,16,StridedSlice197,StridedSliceD,AI_VECTOR_CORE,1699529623272594.5,1.46,543.14,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.89,1648,0,0,0.0079,0,0,416 +1,4294967295,1856,16,StridedSlice198,StridedSliceD,AI_VECTOR_CORE,1699529623273302.2,1.54,706.29,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.98,1812,0,0,0.0072,0,0,416 +1,4294967295,1857,16,Greater56,Greater,AI_VECTOR_CORE,1699529623273481.5,1.46,177.71,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.96,1774,0.0011,0,0.0073,0.0011,0,800 +1,4294967295,1859,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623273671.2,4,188.29,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.31,205408,0,0,0.0009,0,0,6144 +1,4294967295,1859,16,NonZero57,NonZero,MIX_AIV,1699529623273676.8,4.42,1.5,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.67,54312,0.0003,0.0003,0.0029,0.0001,0,8192 +1,4294967295,1860,16,StridedSlice198,StridedSliceD,AI_VECTOR_CORE,1699529623274177.2,1.36,496.08,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.82,1523,0,0,0.0085,0,0,416 +1,4294967295,1861,16,StridedSlice199,StridedSliceD,AI_VECTOR_CORE,1699529623274846.5,1.56,667.89,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.03,1901,0,0,0.0068,0,0,416 +1,4294967295,1862,16,Greater56,Greater,AI_VECTOR_CORE,1699529623275013.2,1.36,165.19,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.82,1518,0.0013,0,0.0086,0.0013,0,800 +1,4294967295,1864,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623275223.5,3.98,208.89,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.26,200599,0,0,0.001,0,0,6144 +1,4294967295,1864,16,NonZero57,NonZero,MIX_AIV,1699529623275228.8,4.32,1.27,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.58,53033,0.0003,0.0003,0.003,0.0002,0,8192 +1,4294967295,1865,16,StridedSlice199,StridedSliceD,AI_VECTOR_CORE,1699529623275745,1.4,511.93,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,1672,0,0,0.0078,0,0,416 +1,4294967295,1866,16,StridedSlice200,StridedSliceD,AI_VECTOR_CORE,1699529623276394.8,1.52,648.35,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.02,1887,0,0,0.0069,0,0,416 +1,4294967295,1867,16,Greater56,Greater,AI_VECTOR_CORE,1699529623276577.8,1.38,181.48,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.86,1589,0.0013,0,0.0082,0.0013,0,800 +1,4294967295,1869,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623276775.8,3.94,196.62,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.26,200277,0,0,0.001,0,0,6144 +1,4294967295,1869,16,NonZero57,NonZero,MIX_AIV,1699529623276781,4,1.31,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.42,50650,0.0003,0.0003,0.0032,0.0002,0,8192 +1,4294967295,1870,16,StridedSlice200,StridedSliceD,AI_VECTOR_CORE,1699529623277226.8,1.36,441.75,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.82,1518,0,0,0.0086,0,0,416 +1,4294967295,1871,16,StridedSlice201,StridedSliceD,AI_VECTOR_CORE,1699529623277963.2,1.36,735.14,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.92,1701,0,0,0.0076,0,0,416 +1,4294967295,1872,16,Greater56,Greater,AI_VECTOR_CORE,1699529623278145.5,1.14,180.89,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.69,1280,0.0016,0,0.0102,0.0016,0,800 +1,4294967295,1874,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623278362,3.92,215.36,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.29,203562,0,0,0.0009,0,0,6144 +1,4294967295,1874,16,NonZero57,NonZero,MIX_AIV,1699529623278367,4.08,1.08,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.32,49108,0.0003,0.0003,0.0033,0.0002,0,8192 +1,4294967295,1875,16,StridedSlice201,StridedSliceD,AI_VECTOR_CORE,1699529623278816.8,1.32,445.67,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.82,1509,0,0,0.0086,0,0,416 +1,4294967295,1876,16,StridedSlice202,StridedSliceD,AI_VECTOR_CORE,1699529623279497,1.36,678.93,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.88,1620,0,0,0.008,0,0,416 +1,4294967295,1877,16,Greater56,Greater,AI_VECTOR_CORE,1699529623279681,1.34,182.64,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.83,1532,0.0013,0,0.0085,0.0013,0,800 +1,4294967295,1879,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623279928.2,3.96,245.91,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.33,206940,0,0,0.0009,0,0,6144 +1,4294967295,1879,16,NonZero57,NonZero,MIX_AIV,1699529623279933.5,3.98,1.29,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.35,49551,0.0003,0.0003,0.0032,0.0002,0,8192 +1,4294967295,1880,16,StridedSlice202,StridedSliceD,AI_VECTOR_CORE,1699529623280444.8,1.38,507.27,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.85,1580,0,0,0.0082,0,0,416 +1,4294967295,1881,16,StridedSlice203,StridedSliceD,AI_VECTOR_CORE,1699529623281126.2,1.44,680.12,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.91,1684,0,0,0.0077,0,0,416 +1,4294967295,1882,16,Greater56,Greater,AI_VECTOR_CORE,1699529623281305,1.18,177.31,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.64,1185,0.0017,0,0.011,0.0017,0,800 +1,4294967295,1884,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623281517.5,3.98,211.32,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.33,207279,0,0,0.0009,0,0,6144 +1,4294967295,1884,16,NonZero57,NonZero,MIX_AIV,1699529623281522.8,4.48,1.27,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.53,52266,0.0003,0.0003,0.0031,0.0002,0,8192 +1,4294967295,1885,16,StridedSlice203,StridedSliceD,AI_VECTOR_CORE,1699529623282085,1.4,557.77,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.89,1640,0,0,0.0079,0,0,416 +1,4294967295,1886,16,StridedSlice204,StridedSliceD,AI_VECTOR_CORE,1699529623282743,1.42,656.6,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,1672,0,0,0.0078,0,0,416 +1,4294967295,1887,16,Greater56,Greater,AI_VECTOR_CORE,1699529623282910,1.2,165.58,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.66,1212,0.0017,0,0.0107,0.0017,0,800 +1,4294967295,1889,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623283101.2,3.98,190.05,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.24,198547,0,0,0.001,0,0,6144 +1,4294967295,1889,16,NonZero57,NonZero,MIX_AIV,1699529623283106.5,4.54,1.27,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.64,53938,0.0003,0.0003,0.003,0.0001,0,8192 +1,4294967295,1890,16,StridedSlice204,StridedSliceD,AI_VECTOR_CORE,1699529623283649,1.46,537.96,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.87,1615,0,0,0.008,0,0,416 +1,4294967295,1891,16,StridedSlice205,StridedSliceD,AI_VECTOR_CORE,1699529623284292.2,1.44,641.79,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.98,1806,0,0,0.0072,0,0,416 +1,4294967295,1892,16,Greater56,Greater,AI_VECTOR_CORE,1699529623284494,1.16,200.31,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.69,1274,0.0016,0,0.0102,0.0016,0,800 +1,4294967295,1894,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623284690.8,3.94,195.59,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.29,203327,0,0,0.0009,0,0,6144 +1,4294967295,1894,16,NonZero57,NonZero,MIX_AIV,1699529623284696,3.9,1.31,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.31,49043,0.0003,0.0003,0.0033,0.0002,0,8192 +1,4294967295,1895,16,StridedSlice205,StridedSliceD,AI_VECTOR_CORE,1699529623285177.2,1.28,477.35,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.77,1426,0,0,0.0091,0,0,416 +1,4294967295,1896,16,StridedSlice206,StridedSliceD,AI_VECTOR_CORE,1699529623285865.8,1.44,687.22,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.91,1677,0,0,0.0078,0,0,416 +1,4294967295,1897,16,Greater56,Greater,AI_VECTOR_CORE,1699529623286049.8,1.38,182.56,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.83,1540,0.0013,0,0.0084,0.0013,0,800 +1,4294967295,1899,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623286233.5,3.98,182.37,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.26,200314,0,0,0.001,0,0,6144 +1,4294967295,1899,16,NonZero57,NonZero,MIX_AIV,1699529623286239,3.86,1.52,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.13,46309,0.0003,0.0003,0.0035,0.0002,0,8192 +1,4294967295,1900,16,StridedSlice206,StridedSliceD,AI_VECTOR_CORE,1699529623286752.8,1.3,509.89,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.85,1569,0,0,0.0083,0,0,416 +1,4294967295,1901,16,StridedSlice207,StridedSliceD,AI_VECTOR_CORE,1699529623287424.2,1.42,670.2,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.93,1718,0,0,0.0076,0,0,416 +1,4294967295,1902,16,Greater56,Greater,AI_VECTOR_CORE,1699529623287616.8,1.12,191.08,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.64,1193,0.0017,0,0.0109,0.0017,0,800 +1,4294967295,1904,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623287801.8,3.94,183.88,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.31,205216,0,0,0.0009,0,0,6144 +1,4294967295,1904,16,NonZero57,NonZero,MIX_AIV,1699529623287807,3.84,1.31,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.16,46839,0.0003,0.0003,0.0034,0.0002,0,8192 +1,4294967295,1905,16,StridedSlice207,StridedSliceD,AI_VECTOR_CORE,1699529623288310,1.36,499.16,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,1660,0,0,0.0078,0,0,416 +1,4294967295,1906,16,StridedSlice208,StridedSliceD,AI_VECTOR_CORE,1699529623289027.2,1.46,715.89,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.98,1820,0,0,0.0071,0,0,416 +1,4294967295,1907,16,Greater56,Greater,AI_VECTOR_CORE,1699529623289208,1.16,179.29,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.66,1225,0.0016,0,0.0106,0.0016,0,800 +1,4294967295,1909,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623289420.8,4,211.59,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.35,208574,0,0,0.0009,0,0,6144 +1,4294967295,1909,16,NonZero57,NonZero,MIX_AIV,1699529623289426,3.8,1.25,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.2,47300,0.0003,0.0003,0.0034,0.0002,0,8192 +1,4294967295,1910,16,StridedSlice208,StridedSliceD,AI_VECTOR_CORE,1699529623289980.5,1.38,550.7,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.85,1569,0,0,0.0083,0,0,416 +1,4294967295,1911,16,StridedSlice209,StridedSliceD,AI_VECTOR_CORE,1699529623290672.5,1.48,690.62,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.94,1743,0,0,0.0075,0,0,416 +1,4294967295,1912,16,Greater56,Greater,AI_VECTOR_CORE,1699529623290875.8,1.2,201.77,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.64,1186,0.0017,0,0.011,0.0017,0,800 +1,4294967295,1914,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623291028.5,4.1,151.55,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.1,186647,0,0,0.001,0,0,6144 +1,4294967295,1914,16,NonZero57,NonZero,MIX_AIV,1699529623291033.8,4.02,1.15,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.25,48152,0.0003,0.0003,0.0033,0.0002,0,8192 +1,4294967295,1915,16,StridedSlice209,StridedSliceD,AI_VECTOR_CORE,1699529623291713.5,1.38,675.73,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.84,1554,0,0,0.0084,0,0,416 +1,4294967295,1916,16,StridedSlice210,StridedSliceD,AI_VECTOR_CORE,1699529623292419.5,1.5,704.62,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.97,1786,0,0,0.0073,0,0,416 +1,4294967295,1917,16,Greater56,Greater,AI_VECTOR_CORE,1699529623292622.8,1.2,201.75,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.64,1192,0.0017,0,0.0109,0.0017,0,800 +1,4294967295,1919,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623292810,4,186.05,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.25,199606,0,0,0.001,0,0,6144 +1,4294967295,1919,16,NonZero57,NonZero,MIX_AIV,1699529623292815.2,3.92,1.25,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.21,47493,0.0003,0.0003,0.0034,0.0002,0,8192 +1,4294967295,1920,16,StridedSlice210,StridedSliceD,AI_VECTOR_CORE,1699529623293296.8,1.3,477.58,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.82,1522,0,0,0.0085,0,0,416 +1,4294967295,1921,16,StridedSlice211,StridedSliceD,AI_VECTOR_CORE,1699529623293997.2,1.4,699.2,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.91,1678,0,0,0.0077,0,0,416 +1,4294967295,1922,16,Greater56,Greater,AI_VECTOR_CORE,1699529623294187.2,1.22,188.6,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.72,1328,0.0015,0,0.0098,0.0015,0,800 +1,4294967295,1924,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623294369.8,3.92,181.28,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.27,201272,0,0,0.001,0,0,6144 +1,4294967295,1924,16,NonZero57,NonZero,MIX_AIV,1699529623294375,3.72,1.33,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.13,46275,0.0003,0.0003,0.0035,0.0002,0,8192 +1,4294967295,1925,16,StridedSlice211,StridedSliceD,AI_VECTOR_CORE,1699529623294863.5,1.34,484.78,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.78,1439,0,0,0.009,0,0,416 +1,4294967295,1926,16,StridedSlice212,StridedSliceD,AI_VECTOR_CORE,1699529623295544,1.52,679.16,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.96,1779,0,0,0.0073,0,0,416 +1,4294967295,1927,16,Greater56,Greater,AI_VECTOR_CORE,1699529623295744.5,1.32,198.98,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.87,1603,0.0012,0,0.0081,0.0012,0,800 +1,4294967295,1929,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623295931.2,3.94,185.43,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.31,204739,0,0,0.0009,0,0,6144 +1,4294967295,1929,16,NonZero57,NonZero,MIX_AIV,1699529623295936.5,3.96,1.31,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.21,47442,0.0003,0.0003,0.0034,0.0002,0,8192 +1,4294967295,1930,16,StridedSlice212,StridedSliceD,AI_VECTOR_CORE,1699529623296440.8,1.28,500.29,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.8,1473,0,0,0.0088,0,0,416 +1,4294967295,1931,16,StridedSlice213,StridedSliceD,AI_VECTOR_CORE,1699529623297141.8,1.42,699.72,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,1668,0,0,0.0078,0,0,416 +1,4294967295,1932,16,Greater56,Greater,AI_VECTOR_CORE,1699529623297313,1.3,169.83,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.79,1469,0.0014,0,0.0088,0.0014,0,800 +1,4294967295,1934,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623297496.8,4.02,182.45,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.29,203464,0,0,0.0009,0,0,6144 +1,4294967295,1934,16,NonZero57,NonZero,MIX_AIV,1699529623297502,3.86,1.23,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.16,46709,0.0003,0.0003,0.0034,0.0002,0,8192 +1,4294967295,1935,16,StridedSlice213,StridedSliceD,AI_VECTOR_CORE,1699529623298036,1.42,530.14,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.9,1672,0,0,0.0078,0,0,416 +1,4294967295,1936,16,StridedSlice214,StridedSliceD,AI_VECTOR_CORE,1699529623298739.2,1.42,701.83,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.91,1684,0,0,0.0077,0,0,416 +1,4294967295,1937,16,Greater56,Greater,AI_VECTOR_CORE,1699529623298925,1.22,184.33,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.68,1253,0.0016,0,0.0104,0.0016,0,800 +1,4294967295,1939,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623299105,3.98,178.78,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.35,208494,0,0,0.0009,0,0,6144 +1,4294967295,1939,16,NonZero57,NonZero,MIX_AIV,1699529623299110.2,3.94,1.27,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.28,48602,0.0003,0.0003,0.0033,0.0002,0,8192 +1,4294967295,1940,16,StridedSlice214,StridedSliceD,AI_VECTOR_CORE,1699529623299638,1.46,523.81,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.94,1738,0,0,0.0075,0,0,416 +1,4294967295,1941,16,StridedSlice215,StridedSliceD,AI_VECTOR_CORE,1699529623300441,1.48,801.54,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.95,1751,0,0,0.0074,0,0,416 +1,4294967295,1942,16,Greater56,Greater,AI_VECTOR_CORE,1699529623300600.8,1.18,158.27,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.67,1238,0.0016,0,0.0105,0.0016,0,800 +1,4294967295,1944,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623300783,4.04,181.07,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.26,200811,0,0,0.001,0,0,6144 +1,4294967295,1944,16,NonZero57,NonZero,MIX_AIV,1699529623300788.2,4.28,1.21,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.35,49572,0.0003,0.0003,0.0032,0.0002,0,8192 +1,4294967295,1945,16,StridedSlice215,StridedSliceD,AI_VECTOR_CORE,1699529623301234.8,1.4,442.22,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.83,1533,0,0,0.0085,0,0,416 +1,4294967295,1946,16,StridedSlice216,StridedSliceD,AI_VECTOR_CORE,1699529623301961.2,1.6,725.1,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.03,1911,0,0,0.0068,0,0,416 +1,4294967295,1947,16,Greater56,Greater,AI_VECTOR_CORE,1699529623302139.2,1.16,176.4,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.7,1286,0.0016,0,0.0101,0.0016,0,800 +1,4294967295,1949,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623302330.5,3.94,190.09,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.28,202189,0,0,0.0009,0,0,6144 +1,4294967295,1949,16,NonZero57,NonZero,MIX_AIV,1699529623302335.8,3.98,1.31,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.21,47517,0.0003,0.0003,0.0034,0.0002,0,8192 +1,4294967295,1950,16,StridedSlice216,StridedSliceD,AI_VECTOR_CORE,1699529623302855.8,1.32,516.02,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.82,1521,0,0,0.0085,0,0,416 +1,4294967295,1951,16,StridedSlice217,StridedSliceD,AI_VECTOR_CORE,1699529623303479.5,1.38,622.43,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.87,1616,0,0,0.008,0,0,416 +1,4294967295,1952,16,Greater56,Greater,AI_VECTOR_CORE,1699529623303653.2,1.22,172.37,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.7,1296,0.0015,0,0.01,0.0015,0,800 +1,4294967295,1954,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623303826.8,3.96,172.28,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.25,200010,0,0,0.001,0,0,6144 +1,4294967295,1954,16,NonZero57,NonZero,MIX_AIV,1699529623303832,4.08,1.29,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.26,48308,0.0003,0.0003,0.0033,0.0002,0,8192 +1,4294967295,1955,16,StridedSlice217,StridedSliceD,AI_VECTOR_CORE,1699529623304371.5,1.32,535.42,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.86,1593,0,0,0.0082,0,0,416 +1,4294967295,1956,16,StridedSlice218,StridedSliceD,AI_VECTOR_CORE,1699529623305056.8,1.7,683.93,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1.24,2292,0,0,0.0057,0,0,416 +1,4294967295,1957,16,Greater56,Greater,AI_VECTOR_CORE,1699529623305234.2,1.16,175.8,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.67,1231,0.0016,0,0.0106,0.0016,0,800 +1,4294967295,1959,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623305412.8,3.94,177.34,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.31,205297,0,0,0.0009,0,0,6144 +1,4294967295,1959,16,NonZero57,NonZero,MIX_AIV,1699529623305418,3.84,1.31,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.16,46705,0.0003,0.0003,0.0034,0.0002,0,8192 +1,4294967295,1960,16,StridedSlice218,StridedSliceD,AI_VECTOR_CORE,1699529623305962.2,1.32,540.41,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.81,1493,0,0,0.0087,0,0,416 +1,4294967295,1961,16,StridedSlice219,StridedSliceD,AI_VECTOR_CORE,1699529623306615,1.48,651.43,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,1,1845,0,0,0.007,0,0,416 +1,4294967295,1962,16,Greater56,Greater,AI_VECTOR_CORE,1699529623306800.2,1.16,183.77,1,0,"""8;""",FLOAT;FLOAT,FORMAT_ND;FORMAT_ND,"""8""",BOOL,FORMAT_ND,N/A,0,0,0,0,0,0,0.69,1268,0.0016,0,0.0103,0.0016,0,800 +1,4294967295,1964,16,NonZero57_MemSet,MemSet,AI_VECTOR_CORE,1699529623306995,4.16,193.59,48,0,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0,0,0,0,2.1,186316,0,0,0.001,0,0,6144 +1,4294967295,1964,16,NonZero57,NonZero,MIX_AIV,1699529623307000.2,3.9,1.09,8,0,"""8""",BOOL,FORMAT_ND,"""1,8""",INT32,FORMAT_ND,1,0,0,0,0,0,0,3.24,47960,0.0003,0.0003,0.0033,0.0002,0,8192 +1,4294967295,1965,16,StridedSlice219,StridedSliceD,AI_VECTOR_CORE,1699529623307499,1.34,494.85,1,0,"""8,25""",FLOAT,FORMAT_ND,"""8""",FLOAT,FORMAT_ND,N/A,0,0,0,0,0,0,0.8,1475,0,0,0.0088,0,0,416 diff --git a/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npufused_advice.py b/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npufused_advice.py new file mode 100644 index 0000000000000000000000000000000000000000..bfefbdc7b12b1e1b704b2cfa95d43e9a4aec3b77 --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npufused_advice.py @@ -0,0 +1,71 @@ +import os +import shutil +import stat +import csv +import unittest +import pytest + +from advisor_backend.interface import Interface + + +class TestComputeAdvice(unittest.TestCase): + TMP_DIR = "./ascend_pt" + interface = None + err_interface = None + + @classmethod + def tearDownClass(cls) -> None: + super().tearDownClass() + if os.path.exists(TestComputeAdvice.TMP_DIR): + shutil.rmtree(TestComputeAdvice.TMP_DIR) + + @classmethod + def setUpClass(cls) -> None: + super().setUpClass() + if not os.path.exists(TestComputeAdvice.TMP_DIR): + os.makedirs(TestComputeAdvice.TMP_DIR) + # create csv files + csv_header = ['Step Id', 'Model ID', 'Task ID', 'Stream ID', 'Name', 'Type', 'Accelerator Core', 'Start Time(us)', + 'Duration(us)', 'Wait Time(us)', 'Block Dim', 'Mix Block Dim', 'Input Shapes', 'Input Data Types', + 'Input Formats', 'Output Shapes', 'Output Data Types', 'Output Formats', 'Context ID', 'aicore_time(us)', + 'aic_total_cycles', 'aic_mac_fp16_ratio', 'aic_mac_int8_ratio', 'aic_cube_fops', 'aic_vector_fops', + 'aiv_time(us)', 'aiv_total_cycles', 'aiv_vec_fp32_ratio', 'aiv_vec_fp16_ratio', 'aiv_vec_int32_ratio', + 'aiv_vec_misc_ratio', 'aiv_cube_fops', 'aiv_vector_fops'] + csv_row1 = [1, 4294967295, 1265, 16, 'Cast66', 'Cast', 'AI_VECTOR_CORE', 1699529623106750, 3.14, 261.56, 9, 0, '4,1025', + 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A', 0, 0, 0, 0, 0, 0, 1.77, 29508, 0, 0, 0.0062, + 0, 0, 5856] + with os.fdopen(os.open(f"{TestComputeAdvice.TMP_DIR}/err_file.csv", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + csv_writer = csv.writer(fp) + csv_writer.writerow(csv_header) + csv_writer.writerow(csv_row1) + + TestComputeAdvice.err_interface = Interface(os.path.join(TestComputeAdvice.TMP_DIR, "err_file.csv")) + TestComputeAdvice.interface = Interface(os.path.join(os.path.dirname(os.path.abspath(__file__)), "kernel_details.csv")) + + + def test_run(self): + dataset = TestComputeAdvice.err_interface.get_data('compute', 'npu_fused') + case_advice = dataset.get('advice') + case_bottleneck = dataset.get('bottleneck') + case_data = dataset.get('data') + self.assertEqual(0, len(case_advice)) + self.assertEqual(0, len(case_bottleneck)) + self.assertEqual(0, len(case_data)) + + dataset = TestComputeAdvice.interface.get_data('compute', 'npu_fused') + case_advice = dataset.get('advice') + case_bottleneck = dataset.get('bottleneck') + self.assertEqual(110, len(case_advice)) + self.assertEqual(47, len(case_bottleneck)) + case_data = dataset.get('data') + + entry_data = case_data.iloc[0] + self.assertEqual('bias_dropout_add', entry_data.loc['pattern_name']) + self.assertEqual(3, entry_data.loc['len']) + self.assertEqual(4, entry_data.loc['count']) + + entry_data = case_data.iloc[1] + self.assertEqual('AddLayerNorm', entry_data.loc['pattern_name']) + self.assertEqual(2, entry_data.loc['len']) + self.assertEqual(4, entry_data.loc['count']) diff --git a/profiler/test/ut/advisor/advisor_backend/prof_bean_advisor/test_cluster_step_trace_time_bean.py b/profiler/test/ut/advisor/advisor_backend/prof_bean_advisor/test_cluster_step_trace_time_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..7b141ae08865c36af83ae65afd2dd713c9e473a6 --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/prof_bean_advisor/test_cluster_step_trace_time_bean.py @@ -0,0 +1,44 @@ +import os +import stat +import shutil +import unittest +from unittest import mock +from unittest.mock import MagicMock + +from common_func.constant import Constant +from advisor_backend.prof_bean_advisor.cluster_step_trace_time_bean import ClusterStepTraceTimeBean + + +class TestClusterStepTraceTimeBean(unittest.TestCase): + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.normal_data = { + "Step": "0", "Type": "MockType", "Index": 9, "Computing": 123.6, + "Communication(Not Overlapped)": 12.3, "Free": 45.6 + } + cls.abnormal_data = { + "Step": "0", "Type": "MockType", "Index": "idx0", "Computing": "MockCommpute", + "Communication(Not Overlapped)": "MockCommunication", "Free": "MockFree" + } + + def test_property_normal(self): + bean_inst = ClusterStepTraceTimeBean(self.normal_data) + self.assertEqual(self.normal_data.get("Step"), bean_inst.step) + self.assertEqual(self.normal_data.get("Type"), bean_inst.type) + self.assertEqual(self.normal_data.get("Index"), bean_inst.index) + self.assertEqual(self.normal_data.get("Computing"), bean_inst.compute) + self.assertEqual(self.normal_data.get("Communication(Not Overlapped)"), bean_inst.communication) + self.assertEqual(self.normal_data.get("Free"), bean_inst.free) + + def test_property_abnormal(self): + bean_inst = ClusterStepTraceTimeBean(self.abnormal_data) + with self.assertRaises(ValueError): + _ = bean_inst.index + with self.assertRaises(ValueError): + _ = bean_inst.compute + with self.assertRaises(ValueError): + _ = bean_inst.communication + with self.assertRaises(ValueError): + _ = bean_inst.free diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_opsche_advice.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_opsche_advice.py new file mode 100644 index 0000000000000000000000000000000000000000..00024746ccbe1119730181ac92df5e181b29fa86 --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_opsche_advice.py @@ -0,0 +1,32 @@ +import os +import shutil +import stat +import json +import unittest +import pytest + +from advisor_backend.interface import Interface + + +class TestOpScheAdvice(unittest.TestCase): + interface = None + + @classmethod + def tearDownClass(cls) -> None: + super().tearDownClass() + + @classmethod + def setUpClass(cls) -> None: + super().setUpClass() + TestOpScheAdvice.interface = Interface(os.path.join(os.path.dirname(os.path.abspath(__file__)), "trace_view.json")) + + def test_run(self): + dataset = TestOpScheAdvice.interface.get_data('timeline', 'op_schedule') + case_advice = dataset.get('advice') + case_bottleneck = dataset.get('bottleneck') + case_data = dataset.get('data') + self.assertEqual(201, len(case_advice)) + self.assertEqual(54, len(case_bottleneck)) + self.assertEqual(2, len(case_data)) + self.assertEqual(274, len(case_data[0])) + self.assertEqual(274, len(case_data[1])) diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_optimizer_advice.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_optimizer_advice.py new file mode 100644 index 0000000000000000000000000000000000000000..de9fbcb5ca9122d04c28299dc997c701a31e7962 --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_optimizer_advice.py @@ -0,0 +1,66 @@ +import os +import shutil +import stat +import json +import unittest +import pytest + +from advisor_backend.interface import Interface + + +class TestOptimizerAdvice(unittest.TestCase): + TMP_DIR = "./ascend_pt" + interface = None + err_interface = None + + @classmethod + def tearDownClass(cls) -> None: + super().tearDownClass() + if os.path.exists(TestOptimizerAdvice.TMP_DIR): + shutil.rmtree(TestOptimizerAdvice.TMP_DIR) + + @classmethod + def setUpClass(cls) -> None: + super().setUpClass() + if not os.path.exists(TestOptimizerAdvice.TMP_DIR): + os.makedirs(TestOptimizerAdvice.TMP_DIR) + # create json files + json_data = [{ + "ph": "X", + "name": "Optimizer.step#Adam.step", + "pid": 2157254, + "tid": 2157254, + "ts":1700547697922669.8, + "dur": 5762.21, + "cat": "cpu_op", + "args": { + "Sequence number": -1, + "Fwd thread id": 0 + } + }] + json_str = json.dumps(json_data) + with os.fdopen(os.open(f"{TestOptimizerAdvice.TMP_DIR}/err_file.json", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json_str) + TestOptimizerAdvice.err_interface = Interface(os.path.join(TestOptimizerAdvice.TMP_DIR, "err_file.json")) + TestOptimizerAdvice.interface = Interface(os.path.join(os.path.dirname(os.path.abspath(__file__)), "trace_view.json")) + + + def test_run(self): + dataset = TestOptimizerAdvice.err_interface.get_data('timeline', 'optimizer') + case_advice = dataset.get('advice') + case_bottleneck = dataset.get('bottleneck') + case_data = dataset.get('data') + self.assertEqual(0, len(case_advice)) + self.assertEqual(0, len(case_bottleneck)) + self.assertEqual(0, len(case_data)) + + dataset = TestOptimizerAdvice.interface.get_data('timeline', 'optimizer') + real_advice = real_bottleneck = "You can choose torch_npu.optim.NpuFusedAdam to replace the current Optimizer: Optimizer.step#Adam.step." + real_data = ['Optimizer.step#Adam.step'] + case_advice = dataset.get('advice') + case_bottleneck = dataset.get('bottleneck') + case_data = dataset.get('data') + self.assertEqual(real_advice, case_advice) + self.assertEqual(real_bottleneck, case_bottleneck) + self.assertEqual(real_data, case_data) diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/trace_view.json b/profiler/test/ut/advisor/advisor_backend/timeline_advice/trace_view.json new file mode 100644 index 0000000000000000000000000000000000000000..f622e9ea36f7d2ab960ce29010d29c36a6238ffc --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/trace_view.json @@ -0,0 +1 @@ +[{"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 437675, "ts": "1704161511420306.491", "dur": 13.08, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 437675, "ts": "1704161511420439.502", "dur": 4.0, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty_strided", "pid": 437675, "tid": 437675, "ts": "1704161511420514.763", "dur": 30.05, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::copy_", "pid": 437675, "tid": 437675, "ts": "1704161511420569.933", "dur": 89.811, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_to_copy", "pid": 437675, "tid": 437675, "ts": "1704161511420489.413", "dur": 171.191, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::to", "pid": 437675, "tid": 437675, "ts": "1704161511420476.853", "dur": 184.121, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "detach_", "pid": 437675, "tid": 437675, "ts": "1704161511420689.695", "dur": 3.89, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::detach_", "pid": 437675, "tid": 437675, "ts": "1704161511420687.045", "dur": 6.88, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 437675, "ts": "1704161511420741.285", "dur": 3.4, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty_strided", "pid": 437675, "tid": 437675, "ts": "1704161511420751.095", "dur": 11.94, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::copy_", "pid": 437675, "tid": 437675, "ts": "1704161511420764.115", "dur": 35.781, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_to_copy", "pid": 437675, "tid": 437675, "ts": "1704161511420748.265", "dur": 52.351, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::to", "pid": 437675, "tid": 437675, "ts": "1704161511420746.855", "dur": 53.991, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "detach_", "pid": 437675, "tid": 437675, "ts": "1704161511420804.436", "dur": 1.49, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::detach_", "pid": 437675, "tid": 437675, "ts": "1704161511420803.416", "dur": 2.74, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 437675, "ts": "1704161511420990.368", "dur": 3.69, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::random_", "pid": 437675, "tid": 437675, "ts": "1704161511421012.868", "dur": 25.08, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511421076.319", "dur": 3.13, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511421045.538", "dur": 34.401, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 437675, "ts": "1704161511421130.789", "dur": 2.55, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 437675, "ts": "1704161511421931.457", "dur": 2.51, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::resize_", "pid": 437675, "tid": 437675, "ts": "1704161511421944.087", "dur": 4.58, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511421982.858", "dur": 3.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::slice", "pid": 437675, "tid": 437675, "ts": "1704161511421954.837", "dur": 33.121, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::narrow", "pid": 437675, "tid": 437675, "ts": "1704161511421951.317", "dur": 36.881, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_cat", "pid": 437675, "tid": 437675, "ts": "1704161511421926.977", "dur": 223.222, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::cat", "pid": 437675, "tid": 437675, "ts": "1704161511421921.707", "dur": 229.902, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::view", "pid": 437675, "tid": 437675, "ts": "1704161511422154.589", "dur": 4.97, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::stack", "pid": 437675, "tid": 437675, "ts": "1704161511421907.547", "dur": 256.452, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422191.510", "dur": 1.72, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422189.670", "dur": 4.52, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422195.410", "dur": 0.91, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422194.740", "dur": 1.86, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422198.150", "dur": 0.66, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422197.140", "dur": 1.91, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422200.180", "dur": 1.22, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422199.450", "dur": 2.18, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422202.670", "dur": 0.92, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422202.030", "dur": 1.76, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422204.780", "dur": 0.95, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422204.160", "dur": 10.11, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422215.720", "dur": 1.04, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422214.890", "dur": 2.16, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422218.100", "dur": 0.89, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422217.510", "dur": 1.73, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422220.320", "dur": 1.09, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422219.650", "dur": 1.98, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422222.590", "dur": 0.8, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422221.990", "dur": 1.62, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422224.730", "dur": 0.94, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422224.100", "dur": 1.77, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422226.800", "dur": 0.75, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422226.250", "dur": 1.51, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422228.670", "dur": 0.64, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422228.110", "dur": 1.44, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422230.590", "dur": 1.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422229.940", "dur": 1.88, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422232.870", "dur": 0.9, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422232.180", "dur": 1.8, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422234.930", "dur": 0.9, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422234.360", "dur": 1.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422237.050", "dur": 0.93, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422236.400", "dur": 1.8, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422239.170", "dur": 1.23, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422238.580", "dur": 2.03, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422241.620", "dur": 0.96, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422240.970", "dur": 1.82, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422243.760", "dur": 0.85, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422243.170", "dur": 1.64, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422245.800", "dur": 0.98, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422245.180", "dur": 1.81, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422248.170", "dur": 0.86, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422247.380", "dur": 1.84, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422250.200", "dur": 0.97, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422249.620", "dur": 1.75, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422252.520", "dur": 0.85, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422251.730", "dur": 1.86, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422254.560", "dur": 1.12, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422253.940", "dur": 1.98, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422256.930", "dur": 0.97, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422256.300", "dur": 1.79, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422259.060", "dur": 0.88, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422258.450", "dur": 1.68, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422261.140", "dur": 0.95, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422260.530", "dur": 1.76, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422263.320", "dur": 0.99, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422262.680", "dur": 1.84, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422265.460", "dur": 0.95, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422264.900", "dur": 1.71, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422268.020", "dur": 0.97, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422266.990", "dur": 2.21, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422270.200", "dur": 0.88, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422269.580", "dur": 1.73, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422272.280", "dur": 1.27, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422271.670", "dur": 2.1, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422274.810", "dur": 0.89, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422274.140", "dur": 1.77, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422276.920", "dur": 0.841, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422276.300", "dur": 1.691, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422279.021", "dur": 1.21, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422278.351", "dur": 2.09, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422281.421", "dur": 0.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422280.801", "dur": 1.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422283.441", "dur": 0.99, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422282.841", "dur": 1.79, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422285.681", "dur": 0.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422285.021", "dur": 1.7, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422287.701", "dur": 0.91, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422287.111", "dur": 1.71, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422289.841", "dur": 1.17, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422289.191", "dur": 2.03, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422292.171", "dur": 1.14, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422291.601", "dur": 1.92, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422294.811", "dur": 0.93, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422293.911", "dur": 2.05, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422296.921", "dur": 0.64, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422296.311", "dur": 1.45, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422298.821", "dur": 0.68, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422298.151", "dur": 1.57, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422300.701", "dur": 0.64, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422300.081", "dur": 1.46, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422302.531", "dur": 0.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422301.911", "dur": 1.54, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422304.441", "dur": 1.08, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422303.821", "dur": 1.9, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422306.731", "dur": 1.19, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422306.111", "dur": 2.05, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422309.191", "dur": 0.71, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422308.561", "dur": 1.54, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422311.071", "dur": 0.63, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422310.481", "dur": 1.45, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422313.331", "dur": 1.19, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422312.311", "dur": 2.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422315.761", "dur": 0.93, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422315.121", "dur": 1.78, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422317.901", "dur": 0.85, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422317.291", "dur": 1.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422319.991", "dur": 0.76, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422319.331", "dur": 1.64, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422321.911", "dur": 0.84, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422321.341", "dur": 1.61, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422324.021", "dur": 0.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422323.311", "dur": 1.75, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422326.061", "dur": 0.88, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422325.451", "dur": 1.72, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422328.221", "dur": 0.84, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422327.561", "dur": 1.71, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422330.271", "dur": 0.86, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422329.661", "dur": 1.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422332.421", "dur": 0.73, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422331.701", "dur": 1.69, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422334.611", "dur": 1.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422333.761", "dur": 2.07, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422336.691", "dur": 0.91, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422336.221", "dur": 1.59, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422338.841", "dur": 0.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::unsqueeze", "pid": 437675, "tid": 437675, "ts": "1704161511422338.191", "dur": 1.68, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 437675, "ts": "1704161511422345.381", "dur": 2.84, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::resize_", "pid": 437675, "tid": 437675, "ts": "1704161511422354.111", "dur": 1.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422359.891", "dur": 1.12, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::slice", "pid": 437675, "tid": 437675, "ts": "1704161511422358.641", "dur": 2.84, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::narrow", "pid": 437675, "tid": 437675, "ts": "1704161511422356.901", "dur": 4.76, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_cat", "pid": 437675, "tid": 437675, "ts": "1704161511422342.831", "dur": 31.48, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::cat", "pid": 437675, "tid": 437675, "ts": "1704161511422341.061", "dur": 33.7, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::stack", "pid": 437675, "tid": 437675, "ts": "1704161511422185.480", "dur": 197.022, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__", "pid": 437675, "tid": 437675, "ts": "1704161511421128.989", "dur": 1302.013, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422488.043", "dur": 15.06, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::copy_", "pid": 437675, "tid": 437675, "ts": "1704161511422504.663", "dur": 114.901, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_to_copy", "pid": 437675, "tid": 437675, "ts": "1704161511422484.433", "dur": 136.051, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::to", "pid": 437675, "tid": 437675, "ts": "1704161511422480.413", "dur": 140.541, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty_strided", "pid": 437675, "tid": 437675, "ts": "1704161511422659.624", "dur": 10.04, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::copy_", "pid": 437675, "tid": 437675, "ts": "1704161511422670.674", "dur": 33.731, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_to_copy", "pid": 437675, "tid": 437675, "ts": "1704161511422657.094", "dur": 48.481, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::to", "pid": 437675, "tid": 437675, "ts": "1704161511422655.244", "dur": 50.611, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 437675, "ts": "1704161511422740.895", "dur": 2.8, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceZero", "pid": 437675, "tid": 437675, "ts": "1704161511422855.116", "dur": 16.86, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::zero_", "pid": 437675, "tid": 437675, "ts": "1704161511422798.066", "dur": 76.61, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceZero", "pid": 437675, "tid": 437675, "ts": "1704161511422892.817", "dur": 2.56, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::zero_", "pid": 437675, "tid": 437675, "ts": "1704161511422885.757", "dur": 10.62, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceZero", "pid": 437675, "tid": 437675, "ts": "1704161511422907.207", "dur": 1.82, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::zero_", "pid": 437675, "tid": 437675, "ts": "1704161511422903.067", "dur": 6.77, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceZero", "pid": 437675, "tid": 437675, "ts": "1704161511422920.807", "dur": 1.94, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::zero_", "pid": 437675, "tid": 437675, "ts": "1704161511422915.777", "dur": 7.59, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceZero", "pid": 437675, "tid": 437675, "ts": "1704161511422933.197", "dur": 2.56, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::zero_", "pid": 437675, "tid": 437675, "ts": "1704161511422929.067", "dur": 7.65, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceZero", "pid": 437675, "tid": 437675, "ts": "1704161511422945.917", "dur": 2.85, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::zero_", "pid": 437675, "tid": 437675, "ts": "1704161511422942.277", "dur": 7.08, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceZero", "pid": 437675, "tid": 437675, "ts": "1704161511422958.477", "dur": 1.88, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::zero_", "pid": 437675, "tid": 437675, "ts": "1704161511422954.877", "dur": 5.96, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceZero", "pid": 437675, "tid": 437675, "ts": "1704161511422970.767", "dur": 3.91, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::zero_", "pid": 437675, "tid": 437675, "ts": "1704161511422966.207", "dur": 9.07, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceZero", "pid": 437675, "tid": 437675, "ts": "1704161511422984.268", "dur": 2.08, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::zero_", "pid": 437675, "tid": 437675, "ts": "1704161511422980.658", "dur": 6.27, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceZero", "pid": 437675, "tid": 437675, "ts": "1704161511422996.918", "dur": 2.39, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::zero_", "pid": 437675, "tid": 437675, "ts": "1704161511422992.538", "dur": 7.27, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "Optimizer.zero_grad#Adam.zero_grad", "pid": 437675, "tid": 437675, "ts": "1704161511422737.645", "dur": 268.043, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "Conv2D", "pid": 437675, "tid": 437675, "ts": "1704161511423205.030", "dur": 9.18, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_convolution", "pid": 437675, "tid": 437675, "ts": "1704161511423143.989", "dur": 73.581, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::convolution", "pid": 437675, "tid": 437675, "ts": "1704161511423098.389", "dur": 123.431, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::conv2d", "pid": 437675, "tid": 437675, "ts": "1704161511423070.928", "dur": 151.712, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnRelu", "pid": 437675, "tid": 437675, "ts": "1704161511423257.470", "dur": 2.45, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::relu", "pid": 437675, "tid": 437675, "ts": "1704161511423234.760", "dur": 31.78, "cat": "cpu_op", "args": {"Sequence number": 18, "Fwd thread id": 0}}, {"ph": "X", "name": "MaxPoolWithArgmaxV1", "pid": 437675, "tid": 437675, "ts": "1704161511423396.302", "dur": 3.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::max_pool2d_with_indices", "pid": 437675, "tid": 437675, "ts": "1704161511423348.621", "dur": 55.691, "cat": "cpu_op", "args": {"Sequence number": 19, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::max_pool2d", "pid": 437675, "tid": 437675, "ts": "1704161511423335.201", "dur": 69.641, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "Conv2D", "pid": 437675, "tid": 437675, "ts": "1704161511423468.142", "dur": 2.75, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_convolution", "pid": 437675, "tid": 437675, "ts": "1704161511423441.942", "dur": 30.45, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::convolution", "pid": 437675, "tid": 437675, "ts": "1704161511423433.722", "dur": 40.22, "cat": "cpu_op", "args": {"Sequence number": 20, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::conv2d", "pid": 437675, "tid": 437675, "ts": "1704161511423431.762", "dur": 42.62, "cat": "cpu_op", "args": {"Sequence number": 20, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnRelu", "pid": 437675, "tid": 437675, "ts": "1704161511423495.183", "dur": 2.09, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::relu", "pid": 437675, "tid": 437675, "ts": "1704161511423480.703", "dur": 20.89, "cat": "cpu_op", "args": {"Sequence number": 21, "Fwd thread id": 0}}, {"ph": "X", "name": "MaxPoolWithArgmaxV1", "pid": 437675, "tid": 437675, "ts": "1704161511423553.623", "dur": 2.7, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::max_pool2d_with_indices", "pid": 437675, "tid": 437675, "ts": "1704161511423526.163", "dur": 32.86, "cat": "cpu_op", "args": {"Sequence number": 22, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::max_pool2d", "pid": 437675, "tid": 437675, "ts": "1704161511423524.283", "dur": 35.21, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::view", "pid": 437675, "tid": 437675, "ts": "1704161511423572.793", "dur": 16.891, "cat": "cpu_op", "args": {"Sequence number": 23, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511423624.774", "dur": 4.49, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 437675, "ts": "1704161511423622.474", "dur": 7.47, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 437675, "ts": "1704161511423615.164", "dur": 17.46, "cat": "cpu_op", "args": {"Sequence number": 24, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnAddmm", "pid": 437675, "tid": 437675, "ts": "1704161511423669.464", "dur": 2.73, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addmm", "pid": 437675, "tid": 437675, "ts": "1704161511423634.704", "dur": 39.8, "cat": "cpu_op", "args": {"Sequence number": 25, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::linear", "pid": 437675, "tid": 437675, "ts": "1704161511423612.274", "dur": 62.66, "cat": "cpu_op", "args": {"Sequence number": 24, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnRelu", "pid": 437675, "tid": 437675, "ts": "1704161511423691.065", "dur": 2.2, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::relu", "pid": 437675, "tid": 437675, "ts": "1704161511423680.365", "dur": 16.36, "cat": "cpu_op", "args": {"Sequence number": 26, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511423719.815", "dur": 2.21, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 437675, "ts": "1704161511423718.335", "dur": 4.15, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 437675, "ts": "1704161511423715.015", "dur": 9.53, "cat": "cpu_op", "args": {"Sequence number": 27, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnAddmm", "pid": 437675, "tid": 437675, "ts": "1704161511423743.715", "dur": 2.05, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addmm", "pid": 437675, "tid": 437675, "ts": "1704161511423725.355", "dur": 21.78, "cat": "cpu_op", "args": {"Sequence number": 28, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::linear", "pid": 437675, "tid": 437675, "ts": "1704161511423713.815", "dur": 33.64, "cat": "cpu_op", "args": {"Sequence number": 27, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnRelu", "pid": 437675, "tid": 437675, "ts": "1704161511423760.635", "dur": 2.37, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::relu", "pid": 437675, "tid": 437675, "ts": "1704161511423751.965", "dur": 13.89, "cat": "cpu_op", "args": {"Sequence number": 29, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 437675, "ts": "1704161511423787.926", "dur": 1.99, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 437675, "ts": "1704161511423786.666", "dur": 3.73, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 437675, "ts": "1704161511423783.516", "dur": 8.91, "cat": "cpu_op", "args": {"Sequence number": 30, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnAddmm", "pid": 437675, "tid": 437675, "ts": "1704161511423810.736", "dur": 2.05, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addmm", "pid": 437675, "tid": 437675, "ts": "1704161511423793.646", "dur": 20.37, "cat": "cpu_op", "args": {"Sequence number": 31, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::linear", "pid": 437675, "tid": 437675, "ts": "1704161511423782.416", "dur": 31.89, "cat": "cpu_op", "args": {"Sequence number": 30, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnLogSoftmax", "pid": 437675, "tid": 437675, "ts": "1704161511426449.252", "dur": 9.33, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_log_softmax", "pid": 437675, "tid": 437675, "ts": "1704161511426415.582", "dur": 49.78, "cat": "cpu_op", "args": {"Sequence number": 32, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::log_softmax", "pid": 437675, "tid": 437675, "ts": "1704161511426394.022", "dur": 72.14, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceOne", "pid": 437675, "tid": 437675, "ts": "1704161511426532.533", "dur": 6.39, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::ones", "pid": 437675, "tid": 437675, "ts": "1704161511426519.463", "dur": 20.13, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnNLLLoss", "pid": 437675, "tid": 437675, "ts": "1704161511426550.453", "dur": 2.32, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::nll_loss_forward", "pid": 437675, "tid": 437675, "ts": "1704161511426502.433", "dur": 55.71, "cat": "cpu_op", "args": {"Sequence number": 33, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::nll_loss", "pid": 437675, "tid": 437675, "ts": "1704161511426491.313", "dur": 67.5, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::nll_loss_nd", "pid": 437675, "tid": 437675, "ts": "1704161511426488.323", "dur": 70.85, "cat": "cpu_op", "args": {"Sequence number": 33, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::cross_entropy_loss", "pid": 437675, "tid": 437675, "ts": "1704161511426369.121", "dur": 190.602, "cat": "cpu_op", "args": {"Sequence number": 32, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceOne", "pid": 437675, "tid": 437675, "ts": "1704161511426617.294", "dur": 7.18, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::ones_like", "pid": 437675, "tid": 437675, "ts": "1704161511426604.814", "dur": 20.82, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceOne", "pid": 437675, "tid": 439228, "ts": "1704161511426816.966", "dur": 12.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::ones", "pid": 437675, "tid": 439228, "ts": "1704161511426785.246", "dur": 45.8, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnNLLLossBackward", "pid": 437675, "tid": 439228, "ts": "1704161511426846.736", "dur": 6.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::nll_loss_backward", "pid": 437675, "tid": 439228, "ts": "1704161511426756.445", "dur": 102.401, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "NllLossBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511426719.085", "dur": 140.901, "cat": "cpu_op", "args": {"Sequence number": 33, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: NllLossBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511426712.505", "dur": 157.371, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnLogSoftmaxBackward", "pid": 437675, "tid": 439228, "ts": "1704161511426924.287", "dur": 8.34, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_log_softmax_backward_data", "pid": 437675, "tid": 439228, "ts": "1704161511426905.107", "dur": 29.71, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "LogSoftmaxBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511426883.167", "dur": 52.65, "cat": "cpu_op", "args": {"Sequence number": 32, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: LogSoftmaxBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511426881.907", "dur": 60.15, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511426964.327", "dur": 5.21, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511426961.747", "dur": 8.73, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511426956.237", "dur": 17.78, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnMm", "pid": 437675, "tid": 439228, "ts": "1704161511426997.638", "dur": 7.2, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mm", "pid": 437675, "tid": 439228, "ts": "1704161511426976.137", "dur": 29.781, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511427012.258", "dur": 2.9, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511427011.048", "dur": 4.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427009.298", "dur": 7.94, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnMm", "pid": 437675, "tid": 439228, "ts": "1704161511427030.778", "dur": 2.46, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mm", "pid": 437675, "tid": 439228, "ts": "1704161511427018.108", "dur": 15.73, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511427037.468", "dur": 1.23, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511427035.728", "dur": 3.29, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427034.588", "dur": 5.28, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "AddmmBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511426950.567", "dur": 90.181, "cat": "cpu_op", "args": {"Sequence number": 31, "Fwd thread id": 1}}, {"ph": "X", "name": "aclnnReduceSum", "pid": 437675, "tid": 439228, "ts": "1704161511427082.058", "dur": 3.501, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sum", "pid": 437675, "tid": 439228, "ts": "1704161511427066.188", "dur": 20.631, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::view", "pid": 437675, "tid": 439228, "ts": "1704161511427088.509", "dur": 7.29, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "autograd::engine::evaluate_function: AddmmBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511426948.817", "dur": 159.982, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 439228, "ts": "1704161511427259.190", "dur": 7.57, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 439228, "ts": "1704161511427151.909", "dur": 118.171, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511427126.039", "dur": 148.611, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511427123.179", "dur": 152.681, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511427292.441", "dur": 2.75, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511427290.691", "dur": 5.1, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427287.481", "dur": 10.42, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "TBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427285.141", "dur": 13.09, "cat": "cpu_op", "args": {"Sequence number": 30, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427283.891", "dur": 17.84, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 439228, "ts": "1704161511427351.201", "dur": 8.27, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 439228, "ts": "1704161511427313.851", "dur": 46.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511427310.581", "dur": 54.27, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511427308.501", "dur": 57.55, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnThresholdBackward", "pid": 437675, "tid": 439228, "ts": "1704161511427415.592", "dur": 9.4, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::threshold_backward", "pid": 437675, "tid": 439228, "ts": "1704161511427395.782", "dur": 31.02, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "ReluBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427373.241", "dur": 54.731, "cat": "cpu_op", "args": {"Sequence number": 29, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: ReluBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427371.891", "dur": 62.291, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511427449.542", "dur": 2.31, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511427447.412", "dur": 5.15, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427444.652", "dur": 9.82, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnMm", "pid": 437675, "tid": 439228, "ts": "1704161511427471.612", "dur": 7.59, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mm", "pid": 437675, "tid": 439228, "ts": "1704161511427455.412", "dur": 24.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511427484.723", "dur": 2.46, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511427483.373", "dur": 4.52, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427481.962", "dur": 7.201, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnMm", "pid": 437675, "tid": 439228, "ts": "1704161511427501.723", "dur": 2.6, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mm", "pid": 437675, "tid": 439228, "ts": "1704161511427489.863", "dur": 14.99, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511427508.723", "dur": 1.37, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511427507.033", "dur": 3.38, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427505.693", "dur": 5.55, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "AddmmBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427440.652", "dur": 71.391, "cat": "cpu_op", "args": {"Sequence number": 28, "Fwd thread id": 1}}, {"ph": "X", "name": "aclnnReduceSum", "pid": 437675, "tid": 439228, "ts": "1704161511427522.133", "dur": 3.0, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sum", "pid": 437675, "tid": 439228, "ts": "1704161511427513.273", "dur": 12.5, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::view", "pid": 437675, "tid": 439228, "ts": "1704161511427527.023", "dur": 6.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "autograd::engine::evaluate_function: AddmmBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427439.312", "dur": 104.601, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 439228, "ts": "1704161511427593.064", "dur": 7.41, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 439228, "ts": "1704161511427556.403", "dur": 45.311, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511427554.133", "dur": 50.761, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511427551.563", "dur": 54.281, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511427618.334", "dur": 2.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511427616.474", "dur": 4.45, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427614.614", "dur": 8.34, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "TBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427613.274", "dur": 9.97, "cat": "cpu_op", "args": {"Sequence number": 27, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427612.124", "dur": 14.39, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 439228, "ts": "1704161511427667.914", "dur": 8.03, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 439228, "ts": "1704161511427635.604", "dur": 41.44, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511427633.654", "dur": 47.02, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511427631.964", "dur": 49.66, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnThresholdBackward", "pid": 437675, "tid": 439228, "ts": "1704161511427704.725", "dur": 2.66, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::threshold_backward", "pid": 437675, "tid": 439228, "ts": "1704161511427691.945", "dur": 16.42, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "ReluBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427687.485", "dur": 21.74, "cat": "cpu_op", "args": {"Sequence number": 26, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: ReluBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427686.325", "dur": 28.5, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511427726.845", "dur": 2.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511427725.485", "dur": 4.41, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427723.555", "dur": 7.96, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnMm", "pid": 437675, "tid": 439228, "ts": "1704161511427746.585", "dur": 7.37, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mm", "pid": 437675, "tid": 439228, "ts": "1704161511427732.395", "dur": 22.28, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511427758.545", "dur": 1.87, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511427757.545", "dur": 3.44, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427756.275", "dur": 5.78, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnMm", "pid": 437675, "tid": 439228, "ts": "1704161511427861.856", "dur": 7.4, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mm", "pid": 437675, "tid": 439228, "ts": "1704161511427762.755", "dur": 107.641, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511427876.236", "dur": 2.04, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511427873.866", "dur": 5.02, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427871.806", "dur": 8.62, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "AddmmBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427720.065", "dur": 161.751, "cat": "cpu_op", "args": {"Sequence number": 25, "Fwd thread id": 1}}, {"ph": "X", "name": "aclnnReduceSum", "pid": 437675, "tid": 439228, "ts": "1704161511427894.447", "dur": 2.84, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sum", "pid": 437675, "tid": 439228, "ts": "1704161511427883.337", "dur": 14.82, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::view", "pid": 437675, "tid": 439228, "ts": "1704161511427899.567", "dur": 5.47, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "autograd::engine::evaluate_function: AddmmBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427719.175", "dur": 198.842, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 439228, "ts": "1704161511427972.097", "dur": 9.4, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 439228, "ts": "1704161511427934.447", "dur": 48.55, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511427932.037", "dur": 54.381, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511427929.467", "dur": 58.111, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::as_strided", "pid": 437675, "tid": 439228, "ts": "1704161511428002.138", "dur": 1.92, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::transpose", "pid": 437675, "tid": 439228, "ts": "1704161511428000.428", "dur": 4.14, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::t", "pid": 437675, "tid": 439228, "ts": "1704161511427998.238", "dur": 7.98, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "TBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427996.438", "dur": 10.14, "cat": "cpu_op", "args": {"Sequence number": 24, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511427994.928", "dur": 14.9, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 439228, "ts": "1704161511428050.588", "dur": 7.87, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 439228, "ts": "1704161511428019.038", "dur": 40.73, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511428017.138", "dur": 46.09, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511428015.518", "dur": 48.69, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::view", "pid": 437675, "tid": 439228, "ts": "1704161511428076.568", "dur": 6.49, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::reshape", "pid": 437675, "tid": 439228, "ts": "1704161511428073.448", "dur": 9.991, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "ViewBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428070.708", "dur": 13.441, "cat": "cpu_op", "args": {"Sequence number": 23, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428069.748", "dur": 16.931, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "format_contiguousV2", "pid": 437675, "tid": 439228, "ts": "1704161511428141.569", "dur": 4.77, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "MaxPoolGradWithArgmaxV1", "pid": 437675, "tid": 439228, "ts": "1704161511428164.169", "dur": 7.33, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::max_pool2d_with_indices_backward", "pid": 437675, "tid": 439228, "ts": "1704161511428113.029", "dur": 62.16, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "MaxPool2DWithIndicesBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428092.079", "dur": 83.84, "cat": "cpu_op", "args": {"Sequence number": 22, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: MaxPool2DWithIndicesBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428090.999", "dur": 96.351, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnThresholdBackward", "pid": 437675, "tid": 439228, "ts": "1704161511428213.150", "dur": 2.63, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::threshold_backward", "pid": 437675, "tid": 439228, "ts": "1704161511428198.560", "dur": 18.07, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "ReluBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428193.900", "dur": 23.43, "cat": "cpu_op", "args": {"Sequence number": 21, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: ReluBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428192.750", "dur": 29.51, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 439228, "ts": "1704161511428283.661", "dur": 6.44, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty_strided", "pid": 437675, "tid": 439228, "ts": "1704161511428297.951", "dur": 3.41, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::copy_", "pid": 437675, "tid": 439228, "ts": "1704161511428302.681", "dur": 21.46, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_to_copy", "pid": 437675, "tid": 439228, "ts": "1704161511428295.221", "dur": 29.32, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::to", "pid": 437675, "tid": 439228, "ts": "1704161511428292.131", "dur": 32.7, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "Conv2DBackpropInput", "pid": 437675, "tid": 439228, "ts": "1704161511428347.071", "dur": 3.15, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 439228, "ts": "1704161511428357.691", "dur": 2.48, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty_strided", "pid": 437675, "tid": 439228, "ts": "1704161511428363.201", "dur": 1.75, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::copy_", "pid": 437675, "tid": 439228, "ts": "1704161511428365.621", "dur": 4.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_to_copy", "pid": 437675, "tid": 439228, "ts": "1704161511428361.881", "dur": 8.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::to", "pid": 437675, "tid": 439228, "ts": "1704161511428360.961", "dur": 9.56, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "Conv2DBackpropFilter", "pid": 437675, "tid": 439228, "ts": "1704161511428382.811", "dur": 2.731, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::view", "pid": 437675, "tid": 439228, "ts": "1704161511428389.642", "dur": 3.26, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "contiguous_h_memRepoint", "pid": 437675, "tid": 439228, "ts": "1704161511428418.612", "dur": 2.65, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "format_contiguousV2", "pid": 437675, "tid": 439228, "ts": "1704161511428400.652", "dur": 21.52, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 439228, "ts": "1704161511428425.572", "dur": 1.76, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "ReduceSum", "pid": 437675, "tid": 439228, "ts": "1704161511428434.892", "dur": 2.06, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceCopy", "pid": 437675, "tid": 439228, "ts": "1704161511428525.263", "dur": 2.91, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::clone", "pid": 437675, "tid": 439228, "ts": "1704161511428478.562", "dur": 50.851, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::npu_dtype_cast", "pid": 437675, "tid": 439228, "ts": "1704161511428473.402", "dur": 57.331, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::convolution_backward", "pid": 437675, "tid": 439228, "ts": "1704161511428248.750", "dur": 287.123, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "ConvolutionBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428228.320", "dur": 308.573, "cat": "cpu_op", "args": {"Sequence number": 20, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: ConvolutionBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428226.910", "dur": 321.963, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 439228, "ts": "1704161511428602.694", "dur": 7.19, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 439228, "ts": "1704161511428567.123", "dur": 44.391, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511428563.893", "dur": 50.721, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511428561.693", "dur": 53.811, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 439228, "ts": "1704161511428655.254", "dur": 2.35, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 439228, "ts": "1704161511428628.904", "dur": 29.71, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511428626.724", "dur": 34.35, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511428624.854", "dur": 36.87, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "MaxPoolGradWithArgmaxV1", "pid": 437675, "tid": 439228, "ts": "1704161511428703.785", "dur": 2.63, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::max_pool2d_with_indices_backward", "pid": 437675, "tid": 439228, "ts": "1704161511428671.624", "dur": 36.141, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "MaxPool2DWithIndicesBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428668.254", "dur": 40.011, "cat": "cpu_op", "args": {"Sequence number": 19, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: MaxPool2DWithIndicesBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428667.204", "dur": 52.221, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnThresholdBackward", "pid": 437675, "tid": 439228, "ts": "1704161511428743.655", "dur": 2.22, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::threshold_backward", "pid": 437675, "tid": 439228, "ts": "1704161511428730.715", "dur": 15.99, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "ReluBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428726.245", "dur": 21.17, "cat": "cpu_op", "args": {"Sequence number": 18, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: ReluBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428724.535", "dur": 27.23, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 439228, "ts": "1704161511428781.445", "dur": 4.761, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty_strided", "pid": 437675, "tid": 439228, "ts": "1704161511428790.906", "dur": 2.74, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::copy_", "pid": 437675, "tid": 439228, "ts": "1704161511428794.766", "dur": 9.25, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_to_copy", "pid": 437675, "tid": 439228, "ts": "1704161511428789.006", "dur": 15.36, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::to", "pid": 437675, "tid": 439228, "ts": "1704161511428787.566", "dur": 17.17, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "Conv2DBackpropFilter", "pid": 437675, "tid": 439228, "ts": "1704161511428819.646", "dur": 2.52, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::view", "pid": 437675, "tid": 439228, "ts": "1704161511428824.346", "dur": 3.54, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "contiguous_h_memRepoint", "pid": 437675, "tid": 439228, "ts": "1704161511428841.666", "dur": 2.15, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "format_contiguousV2", "pid": 437675, "tid": 439228, "ts": "1704161511428831.896", "dur": 12.62, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 439228, "ts": "1704161511428848.066", "dur": 2.22, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "ReduceSum", "pid": 437675, "tid": 439228, "ts": "1704161511428855.646", "dur": 2.08, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceCopy", "pid": 437675, "tid": 439228, "ts": "1704161511428891.997", "dur": 2.99, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::clone", "pid": 437675, "tid": 439228, "ts": "1704161511428862.486", "dur": 33.391, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::npu_dtype_cast", "pid": 437675, "tid": 439228, "ts": "1704161511428860.526", "dur": 35.981, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::convolution_backward", "pid": 437675, "tid": 439228, "ts": "1704161511428759.645", "dur": 141.002, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "ConvolutionBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428757.055", "dur": 144.382, "cat": "cpu_op", "args": {"Sequence number": 17, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: ConvolutionBackward0", "pid": 437675, "tid": 439228, "ts": "1704161511428755.985", "dur": 151.492, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 439228, "ts": "1704161511428953.477", "dur": 2.42, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 439228, "ts": "1704161511428921.497", "dur": 35.75, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511428918.797", "dur": 41.21, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511428916.747", "dur": 44.07, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 439228, "ts": "1704161511428995.778", "dur": 2.99, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 439228, "ts": "1704161511428971.757", "dur": 28.161, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511428969.937", "dur": 32.301, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 1}}, {"ph": "X", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 437675, "tid": 439228, "ts": "1704161511428968.057", "dur": 34.961, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 437675, "ts": "1704161511429058.878", "dur": 4.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429178.589", "dur": 1.6, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429176.179", "dur": 4.59, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511429190.490", "dur": 8.74, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511429165.879", "dur": 34.861, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511429220.330", "dur": 2.5, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511429211.280", "dur": 12.76, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429233.240", "dur": 0.42, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429232.390", "dur": 1.75, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511429237.520", "dur": 2.28, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511429230.170", "dur": 10.31, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcmul", "pid": 437675, "tid": 437675, "ts": "1704161511429259.120", "dur": 1.99, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcmul_", "pid": 437675, "tid": 437675, "ts": "1704161511429249.450", "dur": 13.41, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnSqrt", "pid": 437675, "tid": 437675, "ts": "1704161511429284.691", "dur": 1.75, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sqrt", "pid": 437675, "tid": 437675, "ts": "1704161511429267.090", "dur": 20.361, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429314.501", "dur": 0.46, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429313.271", "dur": 2.19, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511429396.712", "dur": 3.39, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511429301.561", "dur": 100.651, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429420.402", "dur": 0.49, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429419.312", "dur": 2.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdds", "pid": 437675, "tid": 437675, "ts": "1704161511429427.092", "dur": 2.05, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511429415.522", "dur": 14.92, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcdiv", "pid": 437675, "tid": 437675, "ts": "1704161511429503.263", "dur": 3.24, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcdiv_", "pid": 437675, "tid": 437675, "ts": "1704161511429437.712", "dur": 70.711, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429524.353", "dur": 0.95, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429523.523", "dur": 2.24, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511429530.753", "dur": 5.81, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511429520.073", "dur": 17.31, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511429551.083", "dur": 2.36, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511429544.703", "dur": 9.46, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429562.543", "dur": 0.44, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429561.463", "dur": 1.93, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511429566.633", "dur": 2.03, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511429559.373", "dur": 9.86, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcmul", "pid": 437675, "tid": 437675, "ts": "1704161511429580.403", "dur": 2.28, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcmul_", "pid": 437675, "tid": 437675, "ts": "1704161511429574.843", "dur": 8.68, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnSqrt", "pid": 437675, "tid": 437675, "ts": "1704161511429597.614", "dur": 2.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sqrt", "pid": 437675, "tid": 437675, "ts": "1704161511429585.954", "dur": 14.48, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429617.274", "dur": 0.69, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429616.034", "dur": 2.41, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511429657.214", "dur": 2.86, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511429608.354", "dur": 52.95, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429677.804", "dur": 0.74, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429676.884", "dur": 2.12, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdds", "pid": 437675, "tid": 437675, "ts": "1704161511429683.444", "dur": 2.331, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511429673.574", "dur": 12.991, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcdiv", "pid": 437675, "tid": 437675, "ts": "1704161511429739.045", "dur": 2.71, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcdiv_", "pid": 437675, "tid": 437675, "ts": "1704161511429695.985", "dur": 46.77, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429759.365", "dur": 0.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429758.425", "dur": 1.92, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511429764.755", "dur": 6.94, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511429755.035", "dur": 17.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511429783.905", "dur": 1.681, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511429778.045", "dur": 8.381, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429801.536", "dur": 1.91, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429800.766", "dur": 3.33, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511429807.856", "dur": 6.63, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511429798.686", "dur": 16.37, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcmul", "pid": 437675, "tid": 437675, "ts": "1704161511429826.966", "dur": 2.21, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcmul_", "pid": 437675, "tid": 437675, "ts": "1704161511429820.676", "dur": 9.24, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnSqrt", "pid": 437675, "tid": 437675, "ts": "1704161511429844.736", "dur": 1.48, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sqrt", "pid": 437675, "tid": 437675, "ts": "1704161511429832.366", "dur": 14.58, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429863.306", "dur": 1.09, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429862.376", "dur": 2.38, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511429904.687", "dur": 3.13, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511429854.746", "dur": 54.321, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511429925.047", "dur": 0.47, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511429924.137", "dur": 2.07, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdds", "pid": 437675, "tid": 437675, "ts": "1704161511429930.557", "dur": 1.82, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511429921.117", "dur": 12.11, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcdiv", "pid": 437675, "tid": 437675, "ts": "1704161511429986.048", "dur": 2.36, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcdiv_", "pid": 437675, "tid": 437675, "ts": "1704161511429941.267", "dur": 48.181, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430003.358", "dur": 0.39, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430002.518", "dur": 1.59, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511430008.578", "dur": 2.17, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511429998.998", "dur": 12.5, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511430021.828", "dur": 9.61, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511430016.448", "dur": 15.8, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430039.698", "dur": 0.66, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430038.968", "dur": 1.73, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511430043.898", "dur": 2.07, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511430037.088", "dur": 9.53, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcmul", "pid": 437675, "tid": 437675, "ts": "1704161511430057.468", "dur": 1.89, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcmul_", "pid": 437675, "tid": 437675, "ts": "1704161511430052.218", "dur": 7.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnSqrt", "pid": 437675, "tid": 437675, "ts": "1704161511430073.098", "dur": 1.71, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sqrt", "pid": 437675, "tid": 437675, "ts": "1704161511430062.238", "dur": 13.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430091.089", "dur": 0.59, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430090.099", "dur": 2.05, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511430126.759", "dur": 2.58, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511430083.038", "dur": 47.361, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430143.819", "dur": 0.72, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430142.829", "dur": 2.07, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdds", "pid": 437675, "tid": 437675, "ts": "1704161511430149.279", "dur": 1.77, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511430139.819", "dur": 12.0, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcdiv", "pid": 437675, "tid": 437675, "ts": "1704161511430198.880", "dur": 2.28, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcdiv_", "pid": 437675, "tid": 437675, "ts": "1704161511430159.969", "dur": 42.051, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430225.760", "dur": 0.68, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430224.810", "dur": 2.08, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511430231.680", "dur": 1.92, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511430220.910", "dur": 13.36, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511430245.440", "dur": 5.75, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511430239.840", "dur": 12.13, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430260.040", "dur": 0.52, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430259.120", "dur": 1.8, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511430263.740", "dur": 2.32, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511430256.850", "dur": 9.79, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcmul", "pid": 437675, "tid": 437675, "ts": "1704161511430277.350", "dur": 1.81, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcmul_", "pid": 437675, "tid": 437675, "ts": "1704161511430271.710", "dur": 8.1, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnSqrt", "pid": 437675, "tid": 437675, "ts": "1704161511430293.391", "dur": 1.7, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sqrt", "pid": 437675, "tid": 437675, "ts": "1704161511430281.940", "dur": 13.881, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430311.011", "dur": 0.53, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430309.891", "dur": 2.02, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511430348.241", "dur": 3.13, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511430303.031", "dur": 49.29, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430366.901", "dur": 0.97, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430365.751", "dur": 2.48, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdds", "pid": 437675, "tid": 437675, "ts": "1704161511430372.141", "dur": 2.07, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511430362.511", "dur": 12.44, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcdiv", "pid": 437675, "tid": 437675, "ts": "1704161511430424.952", "dur": 2.92, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcdiv_", "pid": 437675, "tid": 437675, "ts": "1704161511430382.441", "dur": 46.401, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430442.232", "dur": 0.89, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430441.222", "dur": 2.31, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511430447.902", "dur": 1.96, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511430437.972", "dur": 12.49, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511430461.112", "dur": 5.48, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511430455.542", "dur": 11.79, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430475.102", "dur": 0.57, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430474.332", "dur": 1.64, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511430478.862", "dur": 1.87, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511430471.942", "dur": 9.24, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcmul", "pid": 437675, "tid": 437675, "ts": "1704161511430491.463", "dur": 2.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcmul_", "pid": 437675, "tid": 437675, "ts": "1704161511430486.163", "dur": 7.97, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnSqrt", "pid": 437675, "tid": 437675, "ts": "1704161511430507.133", "dur": 2.04, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sqrt", "pid": 437675, "tid": 437675, "ts": "1704161511430496.143", "dur": 13.79, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430524.673", "dur": 0.53, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430523.523", "dur": 2.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511430557.473", "dur": 2.52, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511430516.653", "dur": 44.49, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430574.693", "dur": 0.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430573.913", "dur": 1.66, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdds", "pid": 437675, "tid": 437675, "ts": "1704161511430579.823", "dur": 2.03, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511430571.193", "dur": 11.39, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcdiv", "pid": 437675, "tid": 437675, "ts": "1704161511430628.544", "dur": 2.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcdiv_", "pid": 437675, "tid": 437675, "ts": "1704161511430590.294", "dur": 41.9, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430644.944", "dur": 0.72, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430644.094", "dur": 1.98, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511430650.654", "dur": 1.65, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511430641.194", "dur": 11.95, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511430663.114", "dur": 6.04, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511430657.884", "dur": 12.07, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430677.944", "dur": 0.86, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430676.774", "dur": 2.42, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511430682.284", "dur": 2.88, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511430674.484", "dur": 11.18, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcmul", "pid": 437675, "tid": 437675, "ts": "1704161511430696.425", "dur": 2.6, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcmul_", "pid": 437675, "tid": 437675, "ts": "1704161511430690.745", "dur": 9.0, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnSqrt", "pid": 437675, "tid": 437675, "ts": "1704161511430712.515", "dur": 1.62, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sqrt", "pid": 437675, "tid": 437675, "ts": "1704161511430701.955", "dur": 12.94, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430729.755", "dur": 0.56, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430728.985", "dur": 1.68, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511430763.255", "dur": 2.59, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511430722.245", "dur": 44.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430779.425", "dur": 0.47, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430778.525", "dur": 1.95, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdds", "pid": 437675, "tid": 437675, "ts": "1704161511430784.465", "dur": 1.861, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511430775.985", "dur": 11.071, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcdiv", "pid": 437675, "tid": 437675, "ts": "1704161511430835.566", "dur": 2.6, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcdiv_", "pid": 437675, "tid": 437675, "ts": "1704161511430794.066", "dur": 45.02, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430851.976", "dur": 0.5, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430851.126", "dur": 1.7, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511430856.916", "dur": 1.72, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511430848.136", "dur": 11.1, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511430868.846", "dur": 1.63, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511430863.906", "dur": 7.18, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430878.056", "dur": 0.46, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430877.316", "dur": 1.53, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511430882.316", "dur": 5.551, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511430875.526", "dur": 12.971, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcmul", "pid": 437675, "tid": 437675, "ts": "1704161511430898.757", "dur": 1.65, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcmul_", "pid": 437675, "tid": 437675, "ts": "1704161511430893.317", "dur": 7.7, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnSqrt", "pid": 437675, "tid": 437675, "ts": "1704161511430912.737", "dur": 1.64, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sqrt", "pid": 437675, "tid": 437675, "ts": "1704161511430903.177", "dur": 12.05, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430929.797", "dur": 0.56, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430928.917", "dur": 1.84, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511430962.007", "dur": 2.31, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511430922.497", "dur": 43.02, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511430978.377", "dur": 0.51, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511430977.487", "dur": 1.77, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdds", "pid": 437675, "tid": 437675, "ts": "1704161511430983.637", "dur": 1.73, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511430974.827", "dur": 11.211, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcdiv", "pid": 437675, "tid": 437675, "ts": "1704161511431030.818", "dur": 2.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcdiv_", "pid": 437675, "tid": 437675, "ts": "1704161511430993.418", "dur": 40.85, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431047.738", "dur": 0.47, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431046.838", "dur": 1.7, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511431052.738", "dur": 1.82, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511431044.128", "dur": 11.07, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511431065.368", "dur": 2.1, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511431060.238", "dur": 7.87, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431075.138", "dur": 0.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431074.398", "dur": 1.5, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511431078.608", "dur": 8.451, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511431072.118", "dur": 15.581, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcmul", "pid": 437675, "tid": 437675, "ts": "1704161511431099.609", "dur": 1.99, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcmul_", "pid": 437675, "tid": 437675, "ts": "1704161511431094.069", "dur": 8.21, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnSqrt", "pid": 437675, "tid": 437675, "ts": "1704161511431115.269", "dur": 1.68, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sqrt", "pid": 437675, "tid": 437675, "ts": "1704161511431104.729", "dur": 12.98, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431132.169", "dur": 0.47, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431131.339", "dur": 1.67, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511431168.769", "dur": 2.7, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511431124.759", "dur": 47.86, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431186.730", "dur": 0.42, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431185.929", "dur": 1.601, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdds", "pid": 437675, "tid": 437675, "ts": "1704161511431191.670", "dur": 1.88, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511431183.009", "dur": 11.321, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcdiv", "pid": 437675, "tid": 437675, "ts": "1704161511431243.120", "dur": 2.68, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcdiv_", "pid": 437675, "tid": 437675, "ts": "1704161511431201.610", "dur": 45.06, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431261.090", "dur": 0.5, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431260.080", "dur": 1.89, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511431266.000", "dur": 1.71, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511431256.970", "dur": 11.44, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511431279.690", "dur": 1.94, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511431273.680", "dur": 8.55, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431289.131", "dur": 0.37, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431288.381", "dur": 1.43, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceMuls", "pid": 437675, "tid": 437675, "ts": "1704161511431292.811", "dur": 4.58, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::mul_", "pid": 437675, "tid": 437675, "ts": "1704161511431286.421", "dur": 11.5, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcmul", "pid": 437675, "tid": 437675, "ts": "1704161511431308.501", "dur": 1.9, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcmul_", "pid": 437675, "tid": 437675, "ts": "1704161511431303.281", "dur": 7.84, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnSqrt", "pid": 437675, "tid": 437675, "ts": "1704161511431323.381", "dur": 2.04, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sqrt", "pid": 437675, "tid": 437675, "ts": "1704161511431313.341", "dur": 12.78, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431341.171", "dur": 0.51, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431340.241", "dur": 1.83, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511431374.301", "dur": 2.16, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511431333.161", "dur": 44.25, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431390.592", "dur": 0.44, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431389.582", "dur": 1.79, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdds", "pid": 437675, "tid": 437675, "ts": "1704161511431395.582", "dur": 1.78, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511431386.792", "dur": 11.29, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAddcdiv", "pid": 437675, "tid": 437675, "ts": "1704161511431442.142", "dur": 2.21, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::addcdiv_", "pid": 437675, "tid": 437675, "ts": "1704161511431405.392", "dur": 39.71, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "Optimizer.step#Adam.step", "pid": 437675, "tid": 437675, "ts": "1704161511429056.868", "dur": 2410.824, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::empty", "pid": 437675, "tid": 437675, "ts": "1704161511431481.472", "dur": 3.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__", "pid": 437675, "tid": 437675, "ts": "1704161511431479.532", "dur": 16.571, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::to", "pid": 437675, "tid": 437675, "ts": "1704161511431514.063", "dur": 3.19, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::to", "pid": 437675, "tid": 437675, "ts": "1704161511431520.293", "dur": 0.65, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511431534.493", "dur": 2.2, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511431524.593", "dur": 13.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::view", "pid": 437675, "tid": 437675, "ts": "1704161511431552.563", "dur": 3.68, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::reshape", "pid": 437675, "tid": 437675, "ts": "1704161511431550.593", "dur": 5.95, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnArgMax", "pid": 437675, "tid": 437675, "ts": "1704161511431573.203", "dur": 1.98, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::argmax", "pid": 437675, "tid": 437675, "ts": "1704161511431546.143", "dur": 30.74, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnEqTensor", "pid": 437675, "tid": 437675, "ts": "1704161511431594.914", "dur": 5.89, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::eq", "pid": 437675, "tid": 437675, "ts": "1704161511431582.683", "dur": 20.181, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnReduceSum", "pid": 437675, "tid": 437675, "ts": "1704161511431704.105", "dur": 2.47, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::sum", "pid": 437675, "tid": 437675, "ts": "1704161511431609.514", "dur": 99.141, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnCast", "pid": 437675, "tid": 437675, "ts": "1704161511431757.055", "dur": 2.33, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::npu_dtype_cast", "pid": 437675, "tid": 437675, "ts": "1704161511431718.405", "dur": 44.07, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::to", "pid": 437675, "tid": 437675, "ts": "1704161511431715.145", "dur": 48.14, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnInplaceAdd", "pid": 437675, "tid": 437675, "ts": "1704161511431778.495", "dur": 2.02, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::add_", "pid": 437675, "tid": 437675, "ts": "1704161511431770.325", "dur": 11.01, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431814.126", "dur": 1.2, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431812.476", "dur": 3.45, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511431820.886", "dur": 1.86, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511431803.586", "dur": 20.05, "cat": "cpu_op", "args": {"Sequence number": 34, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431836.616", "dur": 0.64, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431835.426", "dur": 2.26, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aclnnDivs", "pid": 437675, "tid": 437675, "ts": "1704161511431840.796", "dur": 1.88, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::div", "pid": 437675, "tid": 437675, "ts": "1704161511431829.806", "dur": 13.38, "cat": "cpu_op", "args": {"Sequence number": 34, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511431855.466", "dur": 2399.584, "cat": "cpu_op", "args": {"Sequence number": 34, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511431854.516", "dur": 2401.574, "cat": "cpu_op", "args": {"Sequence number": 34, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::_local_scalar_dense", "pid": 437675, "tid": 437675, "ts": "1704161511434270.200", "dur": 24.761, "cat": "cpu_op", "args": {"Sequence number": 34, "Fwd thread id": 0}}, {"ph": "X", "name": "aten::item", "pid": 437675, "tid": 437675, "ts": "1704161511434267.580", "dur": 28.101, "cat": "cpu_op", "args": {"Sequence number": 34, "Fwd thread id": 0}}, {"ph": "X", "name": "ProfilerStep#1", "pid": 437675, "tid": 437675, "ts": "1704161511420289.011", "dur": 14091.97, "cat": "cpu_op", "args": {"Sequence number": -1, "Fwd thread id": 0}}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511422856.936", "dur": 13.33, "cat": "enqueue", "args": {"correlation_id": 148}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 148, "pid": 437675, "tid": 437675, "ts": "1704161511422856.936", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511422893.127", "dur": 1.61, "cat": "enqueue", "args": {"correlation_id": 149}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 149, "pid": 437675, "tid": 437675, "ts": "1704161511422893.127", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511422907.637", "dur": 0.98, "cat": "enqueue", "args": {"correlation_id": 150}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 150, "pid": 437675, "tid": 437675, "ts": "1704161511422907.637", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511422921.047", "dur": 1.34, "cat": "enqueue", "args": {"correlation_id": 151}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 151, "pid": 437675, "tid": 437675, "ts": "1704161511422921.047", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511422933.527", "dur": 1.58, "cat": "enqueue", "args": {"correlation_id": 152}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 152, "pid": 437675, "tid": 437675, "ts": "1704161511422933.527", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511422946.317", "dur": 1.83, "cat": "enqueue", "args": {"correlation_id": 153}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 153, "pid": 437675, "tid": 437675, "ts": "1704161511422946.317", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511422958.767", "dur": 1.26, "cat": "enqueue", "args": {"correlation_id": 154}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 154, "pid": 437675, "tid": 437675, "ts": "1704161511422958.767", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511422972.797", "dur": 1.51, "cat": "enqueue", "args": {"correlation_id": 155}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 155, "pid": 437675, "tid": 437675, "ts": "1704161511422972.797", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511422984.548", "dur": 1.25, "cat": "enqueue", "args": {"correlation_id": 156}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 156, "pid": 437675, "tid": 437675, "ts": "1704161511422984.548", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511422997.278", "dur": 1.43, "cat": "enqueue", "args": {"correlation_id": 157}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 157, "pid": 437675, "tid": 437675, "ts": "1704161511422997.278", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423205.670", "dur": 7.69, "cat": "enqueue", "args": {"correlation_id": 158}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 158, "pid": 437675, "tid": 437675, "ts": "1704161511423205.670", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423257.830", "dur": 1.55, "cat": "enqueue", "args": {"correlation_id": 159}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 159, "pid": 437675, "tid": 437675, "ts": "1704161511423257.830", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423396.942", "dur": 1.71, "cat": "enqueue", "args": {"correlation_id": 160}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 160, "pid": 437675, "tid": 437675, "ts": "1704161511423396.942", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423468.642", "dur": 1.59, "cat": "enqueue", "args": {"correlation_id": 161}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 161, "pid": 437675, "tid": 437675, "ts": "1704161511423468.642", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423495.553", "dur": 1.2, "cat": "enqueue", "args": {"correlation_id": 162}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 162, "pid": 437675, "tid": 437675, "ts": "1704161511423495.553", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423553.983", "dur": 1.89, "cat": "enqueue", "args": {"correlation_id": 163}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 163, "pid": 437675, "tid": 437675, "ts": "1704161511423553.983", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423669.864", "dur": 1.79, "cat": "enqueue", "args": {"correlation_id": 164}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 164, "pid": 437675, "tid": 437675, "ts": "1704161511423669.864", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423691.515", "dur": 1.19, "cat": "enqueue", "args": {"correlation_id": 165}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 165, "pid": 437675, "tid": 437675, "ts": "1704161511423691.515", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423744.035", "dur": 1.14, "cat": "enqueue", "args": {"correlation_id": 166}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 166, "pid": 437675, "tid": 437675, "ts": "1704161511423744.035", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423761.025", "dur": 1.19, "cat": "enqueue", "args": {"correlation_id": 167}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 167, "pid": 437675, "tid": 437675, "ts": "1704161511423761.025", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511423811.076", "dur": 1.23, "cat": "enqueue", "args": {"correlation_id": 168}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 168, "pid": 437675, "tid": 437675, "ts": "1704161511423811.076", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511426449.692", "dur": 7.98, "cat": "enqueue", "args": {"correlation_id": 169}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 169, "pid": 437675, "tid": 437675, "ts": "1704161511426449.692", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511426532.863", "dur": 5.38, "cat": "enqueue", "args": {"correlation_id": 170}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 170, "pid": 437675, "tid": 437675, "ts": "1704161511426532.863", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511426550.633", "dur": 1.38, "cat": "enqueue", "args": {"correlation_id": 171}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 171, "pid": 437675, "tid": 437675, "ts": "1704161511426550.633", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511426617.844", "dur": 6.05, "cat": "enqueue", "args": {"correlation_id": 172}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 172, "pid": 437675, "tid": 437675, "ts": "1704161511426617.844", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511426818.186", "dur": 10.19, "cat": "enqueue", "args": {"correlation_id": 173}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 173, "pid": 437675, "tid": 439228, "ts": "1704161511426818.186", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511426847.736", "dur": 5.02, "cat": "enqueue", "args": {"correlation_id": 174}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 174, "pid": 437675, "tid": 439228, "ts": "1704161511426847.736", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511426925.357", "dur": 6.37, "cat": "enqueue", "args": {"correlation_id": 175}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 175, "pid": 437675, "tid": 439228, "ts": "1704161511426925.357", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511426998.368", "dur": 5.55, "cat": "enqueue", "args": {"correlation_id": 176}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 176, "pid": 437675, "tid": 439228, "ts": "1704161511426998.368", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427031.358", "dur": 1.35, "cat": "enqueue", "args": {"correlation_id": 177}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 177, "pid": 437675, "tid": 439228, "ts": "1704161511427031.358", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427082.879", "dur": 1.76, "cat": "enqueue", "args": {"correlation_id": 178}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 178, "pid": 437675, "tid": 439228, "ts": "1704161511427082.879", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427260.020", "dur": 5.98, "cat": "enqueue", "args": {"correlation_id": 179}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 179, "pid": 437675, "tid": 439228, "ts": "1704161511427260.020", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427352.651", "dur": 6.15, "cat": "enqueue", "args": {"correlation_id": 180}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 180, "pid": 437675, "tid": 439228, "ts": "1704161511427352.651", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427416.502", "dur": 6.99, "cat": "enqueue", "args": {"correlation_id": 181}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 181, "pid": 437675, "tid": 439228, "ts": "1704161511427416.502", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427472.242", "dur": 5.74, "cat": "enqueue", "args": {"correlation_id": 182}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 182, "pid": 437675, "tid": 439228, "ts": "1704161511427472.242", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427502.463", "dur": 1.22, "cat": "enqueue", "args": {"correlation_id": 183}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 183, "pid": 437675, "tid": 439228, "ts": "1704161511427502.463", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427522.873", "dur": 1.62, "cat": "enqueue", "args": {"correlation_id": 184}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 184, "pid": 437675, "tid": 439228, "ts": "1704161511427522.873", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427593.574", "dur": 6.17, "cat": "enqueue", "args": {"correlation_id": 185}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 185, "pid": 437675, "tid": 439228, "ts": "1704161511427593.574", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427669.004", "dur": 6.27, "cat": "enqueue", "args": {"correlation_id": 186}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 186, "pid": 437675, "tid": 439228, "ts": "1704161511427669.004", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427705.165", "dur": 1.73, "cat": "enqueue", "args": {"correlation_id": 187}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 187, "pid": 437675, "tid": 439228, "ts": "1704161511427705.165", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427746.995", "dur": 5.9, "cat": "enqueue", "args": {"correlation_id": 188}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 188, "pid": 437675, "tid": 439228, "ts": "1704161511427746.995", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427862.546", "dur": 5.94, "cat": "enqueue", "args": {"correlation_id": 189}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 189, "pid": 437675, "tid": 439228, "ts": "1704161511427862.546", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427894.867", "dur": 1.89, "cat": "enqueue", "args": {"correlation_id": 190}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 190, "pid": 437675, "tid": 439228, "ts": "1704161511427894.867", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511427972.787", "dur": 8.0, "cat": "enqueue", "args": {"correlation_id": 191}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 191, "pid": 437675, "tid": 439228, "ts": "1704161511427972.787", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428051.258", "dur": 6.46, "cat": "enqueue", "args": {"correlation_id": 192}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 192, "pid": 437675, "tid": 439228, "ts": "1704161511428051.258", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428164.819", "dur": 5.93, "cat": "enqueue", "args": {"correlation_id": 193}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 193, "pid": 437675, "tid": 439228, "ts": "1704161511428164.819", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428213.600", "dur": 1.64, "cat": "enqueue", "args": {"correlation_id": 194}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 194, "pid": 437675, "tid": 439228, "ts": "1704161511428213.600", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428347.691", "dur": 1.82, "cat": "enqueue", "args": {"correlation_id": 195}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 195, "pid": 437675, "tid": 439228, "ts": "1704161511428347.691", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428383.221", "dur": 1.761, "cat": "enqueue", "args": {"correlation_id": 196}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 196, "pid": 437675, "tid": 439228, "ts": "1704161511428383.221", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428435.312", "dur": 1.24, "cat": "enqueue", "args": {"correlation_id": 197}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 197, "pid": 437675, "tid": 439228, "ts": "1704161511428435.312", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428525.943", "dur": 1.61, "cat": "enqueue", "args": {"correlation_id": 198}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 198, "pid": 437675, "tid": 439228, "ts": "1704161511428525.943", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428603.434", "dur": 5.75, "cat": "enqueue", "args": {"correlation_id": 199}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 199, "pid": 437675, "tid": 439228, "ts": "1704161511428603.434", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428655.854", "dur": 1.32, "cat": "enqueue", "args": {"correlation_id": 200}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 200, "pid": 437675, "tid": 439228, "ts": "1704161511428655.854", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428704.395", "dur": 1.47, "cat": "enqueue", "args": {"correlation_id": 201}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 201, "pid": 437675, "tid": 439228, "ts": "1704161511428704.395", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428744.025", "dur": 1.32, "cat": "enqueue", "args": {"correlation_id": 202}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 202, "pid": 437675, "tid": 439228, "ts": "1704161511428744.025", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428820.236", "dur": 1.41, "cat": "enqueue", "args": {"correlation_id": 203}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 203, "pid": 437675, "tid": 439228, "ts": "1704161511428820.236", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428856.076", "dur": 1.21, "cat": "enqueue", "args": {"correlation_id": 204}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 204, "pid": 437675, "tid": 439228, "ts": "1704161511428856.076", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428892.847", "dur": 1.48, "cat": "enqueue", "args": {"correlation_id": 205}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 205, "pid": 437675, "tid": 439228, "ts": "1704161511428892.847", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428954.057", "dur": 1.37, "cat": "enqueue", "args": {"correlation_id": 206}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 206, "pid": 437675, "tid": 439228, "ts": "1704161511428954.057", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 439228, "ts": "1704161511428996.318", "dur": 1.77, "cat": "enqueue", "args": {"correlation_id": 207}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 207, "pid": 437675, "tid": 439228, "ts": "1704161511428996.318", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429190.970", "dur": 7.08, "cat": "enqueue", "args": {"correlation_id": 208}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 208, "pid": 437675, "tid": 437675, "ts": "1704161511429190.970", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429221.150", "dur": 1.19, "cat": "enqueue", "args": {"correlation_id": 209}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 209, "pid": 437675, "tid": 437675, "ts": "1704161511429221.150", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429237.710", "dur": 1.3, "cat": "enqueue", "args": {"correlation_id": 210}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 210, "pid": 437675, "tid": 437675, "ts": "1704161511429237.710", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429259.320", "dur": 1.03, "cat": "enqueue", "args": {"correlation_id": 211}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 211, "pid": 437675, "tid": 437675, "ts": "1704161511429259.320", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429285.011", "dur": 1.06, "cat": "enqueue", "args": {"correlation_id": 212}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 212, "pid": 437675, "tid": 437675, "ts": "1704161511429285.011", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429397.162", "dur": 1.72, "cat": "enqueue", "args": {"correlation_id": 213}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 213, "pid": 437675, "tid": 437675, "ts": "1704161511429397.162", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429427.412", "dur": 1.21, "cat": "enqueue", "args": {"correlation_id": 214}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 214, "pid": 437675, "tid": 437675, "ts": "1704161511429427.412", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429503.913", "dur": 1.76, "cat": "enqueue", "args": {"correlation_id": 215}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 215, "pid": 437675, "tid": 437675, "ts": "1704161511429503.913", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429531.253", "dur": 4.75, "cat": "enqueue", "args": {"correlation_id": 216}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 216, "pid": 437675, "tid": 437675, "ts": "1704161511429531.253", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429551.483", "dur": 1.05, "cat": "enqueue", "args": {"correlation_id": 217}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 217, "pid": 437675, "tid": 437675, "ts": "1704161511429551.483", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429567.203", "dur": 0.99, "cat": "enqueue", "args": {"correlation_id": 218}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 218, "pid": 437675, "tid": 437675, "ts": "1704161511429567.203", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429580.823", "dur": 1.05, "cat": "enqueue", "args": {"correlation_id": 219}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 219, "pid": 437675, "tid": 437675, "ts": "1704161511429580.823", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429598.114", "dur": 1.04, "cat": "enqueue", "args": {"correlation_id": 220}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 220, "pid": 437675, "tid": 437675, "ts": "1704161511429598.114", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429657.774", "dur": 1.83, "cat": "enqueue", "args": {"correlation_id": 221}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 221, "pid": 437675, "tid": 437675, "ts": "1704161511429657.774", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429684.024", "dur": 1.131, "cat": "enqueue", "args": {"correlation_id": 222}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 222, "pid": 437675, "tid": 437675, "ts": "1704161511429684.024", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429739.385", "dur": 1.36, "cat": "enqueue", "args": {"correlation_id": 223}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 223, "pid": 437675, "tid": 437675, "ts": "1704161511429739.385", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429764.995", "dur": 6.07, "cat": "enqueue", "args": {"correlation_id": 224}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 224, "pid": 437675, "tid": 437675, "ts": "1704161511429764.995", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429784.135", "dur": 1.041, "cat": "enqueue", "args": {"correlation_id": 225}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 225, "pid": 437675, "tid": 437675, "ts": "1704161511429784.135", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429812.196", "dur": 1.28, "cat": "enqueue", "args": {"correlation_id": 226}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 226, "pid": 437675, "tid": 437675, "ts": "1704161511429812.196", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429827.206", "dur": 1.05, "cat": "enqueue", "args": {"correlation_id": 227}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 227, "pid": 437675, "tid": 437675, "ts": "1704161511429827.206", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429844.996", "dur": 0.9, "cat": "enqueue", "args": {"correlation_id": 228}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 228, "pid": 437675, "tid": 437675, "ts": "1704161511429844.996", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429905.447", "dur": 1.53, "cat": "enqueue", "args": {"correlation_id": 229}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 229, "pid": 437675, "tid": 437675, "ts": "1704161511429905.447", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429930.777", "dur": 1.14, "cat": "enqueue", "args": {"correlation_id": 230}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 230, "pid": 437675, "tid": 437675, "ts": "1704161511429930.777", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511429986.378", "dur": 1.51, "cat": "enqueue", "args": {"correlation_id": 231}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 231, "pid": 437675, "tid": 437675, "ts": "1704161511429986.378", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430008.848", "dur": 1.53, "cat": "enqueue", "args": {"correlation_id": 232}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 232, "pid": 437675, "tid": 437675, "ts": "1704161511430008.848", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430022.048", "dur": 8.89, "cat": "enqueue", "args": {"correlation_id": 233}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 233, "pid": 437675, "tid": 437675, "ts": "1704161511430022.048", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430044.098", "dur": 1.54, "cat": "enqueue", "args": {"correlation_id": 234}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 234, "pid": 437675, "tid": 437675, "ts": "1704161511430044.098", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430057.688", "dur": 0.97, "cat": "enqueue", "args": {"correlation_id": 235}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 235, "pid": 437675, "tid": 437675, "ts": "1704161511430057.688", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430073.458", "dur": 0.93, "cat": "enqueue", "args": {"correlation_id": 236}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 236, "pid": 437675, "tid": 437675, "ts": "1704161511430073.458", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430127.299", "dur": 1.32, "cat": "enqueue", "args": {"correlation_id": 237}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 237, "pid": 437675, "tid": 437675, "ts": "1704161511430127.299", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430149.599", "dur": 0.99, "cat": "enqueue", "args": {"correlation_id": 238}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 238, "pid": 437675, "tid": 437675, "ts": "1704161511430149.599", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430199.360", "dur": 1.34, "cat": "enqueue", "args": {"correlation_id": 239}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 239, "pid": 437675, "tid": 437675, "ts": "1704161511430199.360", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430232.000", "dur": 1.01, "cat": "enqueue", "args": {"correlation_id": 240}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 240, "pid": 437675, "tid": 437675, "ts": "1704161511430232.000", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430245.820", "dur": 4.85, "cat": "enqueue", "args": {"correlation_id": 241}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 241, "pid": 437675, "tid": 437675, "ts": "1704161511430245.820", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430264.080", "dur": 1.03, "cat": "enqueue", "args": {"correlation_id": 242}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 242, "pid": 437675, "tid": 437675, "ts": "1704161511430264.080", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430277.760", "dur": 0.99, "cat": "enqueue", "args": {"correlation_id": 243}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 243, "pid": 437675, "tid": 437675, "ts": "1704161511430277.760", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430293.741", "dur": 0.99, "cat": "enqueue", "args": {"correlation_id": 244}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 244, "pid": 437675, "tid": 437675, "ts": "1704161511430293.741", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430349.201", "dur": 1.5, "cat": "enqueue", "args": {"correlation_id": 245}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 245, "pid": 437675, "tid": 437675, "ts": "1704161511430349.201", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430372.591", "dur": 1.11, "cat": "enqueue", "args": {"correlation_id": 246}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 246, "pid": 437675, "tid": 437675, "ts": "1704161511430372.591", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430425.692", "dur": 1.65, "cat": "enqueue", "args": {"correlation_id": 247}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 247, "pid": 437675, "tid": 437675, "ts": "1704161511430425.692", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430448.362", "dur": 1.07, "cat": "enqueue", "args": {"correlation_id": 248}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 248, "pid": 437675, "tid": 437675, "ts": "1704161511430448.362", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430461.752", "dur": 4.38, "cat": "enqueue", "args": {"correlation_id": 249}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 249, "pid": 437675, "tid": 437675, "ts": "1704161511430461.752", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430479.222", "dur": 1.11, "cat": "enqueue", "args": {"correlation_id": 250}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 250, "pid": 437675, "tid": 437675, "ts": "1704161511430479.222", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430491.803", "dur": 1.18, "cat": "enqueue", "args": {"correlation_id": 251}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 251, "pid": 437675, "tid": 437675, "ts": "1704161511430491.803", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430507.353", "dur": 1.19, "cat": "enqueue", "args": {"correlation_id": 252}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 252, "pid": 437675, "tid": 437675, "ts": "1704161511430507.353", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430557.873", "dur": 1.57, "cat": "enqueue", "args": {"correlation_id": 253}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 253, "pid": 437675, "tid": 437675, "ts": "1704161511430557.873", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430580.193", "dur": 1.16, "cat": "enqueue", "args": {"correlation_id": 254}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 254, "pid": 437675, "tid": 437675, "ts": "1704161511430580.193", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430628.964", "dur": 1.45, "cat": "enqueue", "args": {"correlation_id": 255}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 255, "pid": 437675, "tid": 437675, "ts": "1704161511430628.964", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430650.884", "dur": 0.98, "cat": "enqueue", "args": {"correlation_id": 256}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 256, "pid": 437675, "tid": 437675, "ts": "1704161511430650.884", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430663.964", "dur": 4.52, "cat": "enqueue", "args": {"correlation_id": 257}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 257, "pid": 437675, "tid": 437675, "ts": "1704161511430663.964", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430683.034", "dur": 1.32, "cat": "enqueue", "args": {"correlation_id": 258}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 258, "pid": 437675, "tid": 437675, "ts": "1704161511430683.034", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430696.885", "dur": 1.12, "cat": "enqueue", "args": {"correlation_id": 259}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 259, "pid": 437675, "tid": 437675, "ts": "1704161511430696.885", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430712.765", "dur": 0.98, "cat": "enqueue", "args": {"correlation_id": 260}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 260, "pid": 437675, "tid": 437675, "ts": "1704161511430712.765", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430763.735", "dur": 1.42, "cat": "enqueue", "args": {"correlation_id": 261}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 261, "pid": 437675, "tid": 437675, "ts": "1704161511430763.735", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430784.735", "dur": 1.171, "cat": "enqueue", "args": {"correlation_id": 262}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 262, "pid": 437675, "tid": 437675, "ts": "1704161511430784.735", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430836.066", "dur": 1.6, "cat": "enqueue", "args": {"correlation_id": 263}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 263, "pid": 437675, "tid": 437675, "ts": "1704161511430836.066", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430857.146", "dur": 0.99, "cat": "enqueue", "args": {"correlation_id": 264}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 264, "pid": 437675, "tid": 437675, "ts": "1704161511430857.146", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430869.096", "dur": 1.0, "cat": "enqueue", "args": {"correlation_id": 265}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 265, "pid": 437675, "tid": 437675, "ts": "1704161511430869.096", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430882.656", "dur": 4.731, "cat": "enqueue", "args": {"correlation_id": 266}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 266, "pid": 437675, "tid": 437675, "ts": "1704161511430882.656", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430899.047", "dur": 0.88, "cat": "enqueue", "args": {"correlation_id": 267}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 267, "pid": 437675, "tid": 437675, "ts": "1704161511430899.047", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430912.987", "dur": 0.99, "cat": "enqueue", "args": {"correlation_id": 268}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 268, "pid": 437675, "tid": 437675, "ts": "1704161511430912.987", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430962.387", "dur": 1.47, "cat": "enqueue", "args": {"correlation_id": 269}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 269, "pid": 437675, "tid": 437675, "ts": "1704161511430962.387", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511430983.937", "dur": 0.99, "cat": "enqueue", "args": {"correlation_id": 270}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 270, "pid": 437675, "tid": 437675, "ts": "1704161511430983.937", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431031.308", "dur": 1.4, "cat": "enqueue", "args": {"correlation_id": 271}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 271, "pid": 437675, "tid": 437675, "ts": "1704161511431031.308", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431053.078", "dur": 1.06, "cat": "enqueue", "args": {"correlation_id": 272}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 272, "pid": 437675, "tid": 437675, "ts": "1704161511431053.078", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431065.678", "dur": 1.38, "cat": "enqueue", "args": {"correlation_id": 273}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 273, "pid": 437675, "tid": 437675, "ts": "1704161511431065.678", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431078.968", "dur": 7.471, "cat": "enqueue", "args": {"correlation_id": 274}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 274, "pid": 437675, "tid": 437675, "ts": "1704161511431078.968", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431099.939", "dur": 1.28, "cat": "enqueue", "args": {"correlation_id": 275}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 275, "pid": 437675, "tid": 437675, "ts": "1704161511431099.939", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431115.559", "dur": 1.03, "cat": "enqueue", "args": {"correlation_id": 276}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 276, "pid": 437675, "tid": 437675, "ts": "1704161511431115.559", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431169.109", "dur": 1.91, "cat": "enqueue", "args": {"correlation_id": 277}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 277, "pid": 437675, "tid": 437675, "ts": "1704161511431169.109", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431192.010", "dur": 1.1, "cat": "enqueue", "args": {"correlation_id": 278}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 278, "pid": 437675, "tid": 437675, "ts": "1704161511431192.010", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431243.530", "dur": 1.79, "cat": "enqueue", "args": {"correlation_id": 279}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 279, "pid": 437675, "tid": 437675, "ts": "1704161511431243.530", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431266.240", "dur": 1.03, "cat": "enqueue", "args": {"correlation_id": 280}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 280, "pid": 437675, "tid": 437675, "ts": "1704161511431266.240", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431280.000", "dur": 1.25, "cat": "enqueue", "args": {"correlation_id": 281}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 281, "pid": 437675, "tid": 437675, "ts": "1704161511431280.000", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431293.161", "dur": 3.68, "cat": "enqueue", "args": {"correlation_id": 282}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 282, "pid": 437675, "tid": 437675, "ts": "1704161511431293.161", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431308.821", "dur": 1.1, "cat": "enqueue", "args": {"correlation_id": 283}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 283, "pid": 437675, "tid": 437675, "ts": "1704161511431308.821", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431323.731", "dur": 1.34, "cat": "enqueue", "args": {"correlation_id": 284}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 284, "pid": 437675, "tid": 437675, "ts": "1704161511431323.731", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431374.651", "dur": 1.35, "cat": "enqueue", "args": {"correlation_id": 285}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 285, "pid": 437675, "tid": 437675, "ts": "1704161511431374.651", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431395.862", "dur": 1.1, "cat": "enqueue", "args": {"correlation_id": 286}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 286, "pid": 437675, "tid": 437675, "ts": "1704161511431395.862", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431442.512", "dur": 1.42, "cat": "enqueue", "args": {"correlation_id": 287}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 287, "pid": 437675, "tid": 437675, "ts": "1704161511431442.512", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431534.863", "dur": 1.38, "cat": "enqueue", "args": {"correlation_id": 288}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 288, "pid": 437675, "tid": 437675, "ts": "1704161511431534.863", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431573.543", "dur": 1.2, "cat": "enqueue", "args": {"correlation_id": 289}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 289, "pid": 437675, "tid": 437675, "ts": "1704161511431573.543", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431595.214", "dur": 5.08, "cat": "enqueue", "args": {"correlation_id": 290}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 290, "pid": 437675, "tid": 437675, "ts": "1704161511431595.214", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431704.525", "dur": 1.47, "cat": "enqueue", "args": {"correlation_id": 291}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 291, "pid": 437675, "tid": 437675, "ts": "1704161511431704.525", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431757.595", "dur": 1.34, "cat": "enqueue", "args": {"correlation_id": 292}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 292, "pid": 437675, "tid": 437675, "ts": "1704161511431757.595", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431778.895", "dur": 1.09, "cat": "enqueue", "args": {"correlation_id": 293}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 293, "pid": 437675, "tid": 437675, "ts": "1704161511431778.895", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431821.186", "dur": 1.12, "cat": "enqueue", "args": {"correlation_id": 294}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 294, "pid": 437675, "tid": 437675, "ts": "1704161511431821.186", "cat": "async_task_queue"}, {"ph": "X", "name": "Enqueue", "pid": 437675, "tid": 437675, "ts": "1704161511431841.146", "dur": 1.07, "cat": "enqueue", "args": {"correlation_id": 295}}, {"ph": "s", "bp": "e", "name": "enqueue_to_dequeue", "id": 295, "pid": 437675, "tid": 437675, "ts": "1704161511431841.146", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceZero", "pid": 437675, "tid": 439084, "ts": "1704161511422882.957", "dur": 36.27, "cat": "dequeue", "args": {"correlation_id": 148}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 148, "pid": 437675, "tid": 439084, "ts": "1704161511422882.957", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceZero", "pid": 437675, "tid": 439084, "ts": "1704161511422923.227", "dur": 10.78, "cat": "dequeue", "args": {"correlation_id": 149}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 149, "pid": 437675, "tid": 439084, "ts": "1704161511422923.227", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceZero", "pid": 437675, "tid": 439084, "ts": "1704161511422935.077", "dur": 9.5, "cat": "dequeue", "args": {"correlation_id": 150}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 150, "pid": 437675, "tid": 439084, "ts": "1704161511422935.077", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceZero", "pid": 437675, "tid": 439084, "ts": "1704161511422946.277", "dur": 8.98, "cat": "dequeue", "args": {"correlation_id": 151}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 151, "pid": 437675, "tid": 439084, "ts": "1704161511422946.277", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceZero", "pid": 437675, "tid": 439084, "ts": "1704161511422957.017", "dur": 9.59, "cat": "dequeue", "args": {"correlation_id": 152}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 152, "pid": 437675, "tid": 439084, "ts": "1704161511422957.017", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceZero", "pid": 437675, "tid": 439084, "ts": "1704161511422968.347", "dur": 7.82, "cat": "dequeue", "args": {"correlation_id": 153}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 153, "pid": 437675, "tid": 439084, "ts": "1704161511422968.347", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceZero", "pid": 437675, "tid": 439084, "ts": "1704161511422978.278", "dur": 8.36, "cat": "dequeue", "args": {"correlation_id": 154}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 154, "pid": 437675, "tid": 439084, "ts": "1704161511422978.278", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceZero", "pid": 437675, "tid": 439084, "ts": "1704161511422988.558", "dur": 7.97, "cat": "dequeue", "args": {"correlation_id": 155}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 155, "pid": 437675, "tid": 439084, "ts": "1704161511422988.558", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceZero", "pid": 437675, "tid": 439084, "ts": "1704161511422997.808", "dur": 7.88, "cat": "dequeue", "args": {"correlation_id": 156}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 156, "pid": 437675, "tid": 439084, "ts": "1704161511422997.808", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceZero", "pid": 437675, "tid": 439084, "ts": "1704161511423007.428", "dur": 7.29, "cat": "dequeue", "args": {"correlation_id": 157}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 157, "pid": 437675, "tid": 439084, "ts": "1704161511423007.428", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@Conv2D", "pid": 437675, "tid": 439084, "ts": "1704161511423217.990", "dur": 1107.771, "cat": "dequeue", "args": {"correlation_id": 158}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 158, "pid": 437675, "tid": 439084, "ts": "1704161511423217.990", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnRelu", "pid": 437675, "tid": 439084, "ts": "1704161511424331.571", "dur": 21.21, "cat": "dequeue", "args": {"correlation_id": 159}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 159, "pid": 437675, "tid": 439084, "ts": "1704161511424331.571", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@MaxPoolWithArgmaxV1", "pid": 437675, "tid": 439084, "ts": "1704161511424355.421", "dur": 614.326, "cat": "dequeue", "args": {"correlation_id": 160}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 160, "pid": 437675, "tid": 439084, "ts": "1704161511424355.421", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@Conv2D", "pid": 437675, "tid": 439084, "ts": "1704161511424975.557", "dur": 919.1, "cat": "dequeue", "args": {"correlation_id": 161}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 161, "pid": 437675, "tid": 439084, "ts": "1704161511424975.557", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnRelu", "pid": 437675, "tid": 439084, "ts": "1704161511425900.197", "dur": 15.6, "cat": "dequeue", "args": {"correlation_id": 162}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 162, "pid": 437675, "tid": 439084, "ts": "1704161511425900.197", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@MaxPoolWithArgmaxV1", "pid": 437675, "tid": 439084, "ts": "1704161511425917.937", "dur": 136.671, "cat": "dequeue", "args": {"correlation_id": 163}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 163, "pid": 437675, "tid": 439084, "ts": "1704161511425917.937", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnAddmm", "pid": 437675, "tid": 439084, "ts": "1704161511426057.978", "dur": 60.801, "cat": "dequeue", "args": {"correlation_id": 164}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 164, "pid": 437675, "tid": 439084, "ts": "1704161511426057.978", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnRelu", "pid": 437675, "tid": 439084, "ts": "1704161511426120.939", "dur": 8.08, "cat": "dequeue", "args": {"correlation_id": 165}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 165, "pid": 437675, "tid": 439084, "ts": "1704161511426120.939", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnAddmm", "pid": 437675, "tid": 439084, "ts": "1704161511426130.339", "dur": 57.371, "cat": "dequeue", "args": {"correlation_id": 166}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 166, "pid": 437675, "tid": 439084, "ts": "1704161511426130.339", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnRelu", "pid": 437675, "tid": 439084, "ts": "1704161511426189.740", "dur": 7.7, "cat": "dequeue", "args": {"correlation_id": 167}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 167, "pid": 437675, "tid": 439084, "ts": "1704161511426189.740", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnAddmm", "pid": 437675, "tid": 439084, "ts": "1704161511426199.100", "dur": 49.96, "cat": "dequeue", "args": {"correlation_id": 168}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 168, "pid": 437675, "tid": 439084, "ts": "1704161511426199.100", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnLogSoftmax", "pid": 437675, "tid": 439084, "ts": "1704161511426463.522", "dur": 15.78, "cat": "dequeue", "args": {"correlation_id": 169}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 169, "pid": 437675, "tid": 439084, "ts": "1704161511426463.522", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceOne", "pid": 437675, "tid": 439084, "ts": "1704161511426542.153", "dur": 11.75, "cat": "dequeue", "args": {"correlation_id": 170}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 170, "pid": 437675, "tid": 439084, "ts": "1704161511426542.153", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnNLLLoss", "pid": 437675, "tid": 439084, "ts": "1704161511426556.003", "dur": 24.07, "cat": "dequeue", "args": {"correlation_id": 171}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 171, "pid": 437675, "tid": 439084, "ts": "1704161511426556.003", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceOne", "pid": 437675, "tid": 439084, "ts": "1704161511426627.744", "dur": 9.55, "cat": "dequeue", "args": {"correlation_id": 172}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 172, "pid": 437675, "tid": 439084, "ts": "1704161511426627.744", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceOne", "pid": 437675, "tid": 439084, "ts": "1704161511426831.926", "dur": 8.96, "cat": "dequeue", "args": {"correlation_id": 173}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 173, "pid": 437675, "tid": 439084, "ts": "1704161511426831.926", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnNLLLossBackward", "pid": 437675, "tid": 439084, "ts": "1704161511426856.316", "dur": 16.66, "cat": "dequeue", "args": {"correlation_id": 174}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 174, "pid": 437675, "tid": 439084, "ts": "1704161511426856.316", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnLogSoftmaxBackward", "pid": 437675, "tid": 439084, "ts": "1704161511426935.117", "dur": 12.12, "cat": "dequeue", "args": {"correlation_id": 175}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 175, "pid": 437675, "tid": 439084, "ts": "1704161511426935.117", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnMm", "pid": 437675, "tid": 439084, "ts": "1704161511427007.198", "dur": 37.96, "cat": "dequeue", "args": {"correlation_id": 176}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 176, "pid": 437675, "tid": 439084, "ts": "1704161511427007.198", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnMm", "pid": 437675, "tid": 439084, "ts": "1704161511427047.458", "dur": 36.251, "cat": "dequeue", "args": {"correlation_id": 177}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 177, "pid": 437675, "tid": 439084, "ts": "1704161511427047.458", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnReduceSum", "pid": 437675, "tid": 439084, "ts": "1704161511427085.799", "dur": 14.39, "cat": "dequeue", "args": {"correlation_id": 178}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 178, "pid": 437675, "tid": 439084, "ts": "1704161511427085.799", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511427269.460", "dur": 72.671, "cat": "dequeue", "args": {"correlation_id": 179}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 179, "pid": 437675, "tid": 439084, "ts": "1704161511427269.460", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511427362.431", "dur": 36.601, "cat": "dequeue", "args": {"correlation_id": 180}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 180, "pid": 437675, "tid": 439084, "ts": "1704161511427362.431", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnThresholdBackward", "pid": 437675, "tid": 439084, "ts": "1704161511427427.912", "dur": 14.58, "cat": "dequeue", "args": {"correlation_id": 181}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 181, "pid": 437675, "tid": 439084, "ts": "1704161511427427.912", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnMm", "pid": 437675, "tid": 439084, "ts": "1704161511427481.542", "dur": 35.051, "cat": "dequeue", "args": {"correlation_id": 182}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 182, "pid": 437675, "tid": 439084, "ts": "1704161511427481.542", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnMm", "pid": 437675, "tid": 439084, "ts": "1704161511427518.693", "dur": 32.5, "cat": "dequeue", "args": {"correlation_id": 183}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 183, "pid": 437675, "tid": 439084, "ts": "1704161511427518.693", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnReduceSum", "pid": 437675, "tid": 439084, "ts": "1704161511427553.183", "dur": 10.63, "cat": "dequeue", "args": {"correlation_id": 184}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 184, "pid": 437675, "tid": 439084, "ts": "1704161511427553.183", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511427602.924", "dur": 34.05, "cat": "dequeue", "args": {"correlation_id": 185}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 185, "pid": 437675, "tid": 439084, "ts": "1704161511427602.924", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511427679.604", "dur": 30.371, "cat": "dequeue", "args": {"correlation_id": 186}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 186, "pid": 437675, "tid": 439084, "ts": "1704161511427679.604", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnThresholdBackward", "pid": 437675, "tid": 439084, "ts": "1704161511427712.375", "dur": 8.47, "cat": "dequeue", "args": {"correlation_id": 187}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 187, "pid": 437675, "tid": 439084, "ts": "1704161511427712.375", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnMm", "pid": 437675, "tid": 439084, "ts": "1704161511427756.445", "dur": 34.011, "cat": "dequeue", "args": {"correlation_id": 188}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 188, "pid": 437675, "tid": 439084, "ts": "1704161511427756.445", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnMm", "pid": 437675, "tid": 439084, "ts": "1704161511427871.846", "dur": 32.871, "cat": "dequeue", "args": {"correlation_id": 189}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 189, "pid": 437675, "tid": 439084, "ts": "1704161511427871.846", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnReduceSum", "pid": 437675, "tid": 439084, "ts": "1704161511427907.237", "dur": 10.66, "cat": "dequeue", "args": {"correlation_id": 190}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 190, "pid": 437675, "tid": 439084, "ts": "1704161511427907.237", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511427984.308", "dur": 31.5, "cat": "dequeue", "args": {"correlation_id": 191}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 191, "pid": 437675, "tid": 439084, "ts": "1704161511427984.308", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511428060.948", "dur": 29.231, "cat": "dequeue", "args": {"correlation_id": 192}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 192, "pid": 437675, "tid": 439084, "ts": "1704161511428060.948", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@MaxPoolGradWithArgmaxV1", "pid": 437675, "tid": 439084, "ts": "1704161511428174.169", "dur": 816.009, "cat": "dequeue", "args": {"correlation_id": 193}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 193, "pid": 437675, "tid": 439084, "ts": "1704161511428174.169", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnThresholdBackward", "pid": 437675, "tid": 439084, "ts": "1704161511428995.858", "dur": 20.71, "cat": "dequeue", "args": {"correlation_id": 194}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 194, "pid": 437675, "tid": 439084, "ts": "1704161511428995.858", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@Conv2DBackpropInput", "pid": 437675, "tid": 439084, "ts": "1704161511429019.438", "dur": 901.779, "cat": "dequeue", "args": {"correlation_id": 195}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 195, "pid": 437675, "tid": 439084, "ts": "1704161511429019.438", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@Conv2DBackpropFilter", "pid": 437675, "tid": 439084, "ts": "1704161511429932.567", "dur": 830.638, "cat": "dequeue", "args": {"correlation_id": 196}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 196, "pid": 437675, "tid": 439084, "ts": "1704161511429932.567", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@ReduceSum", "pid": 437675, "tid": 439084, "ts": "1704161511430770.935", "dur": 321.184, "cat": "dequeue", "args": {"correlation_id": 197}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 197, "pid": 437675, "tid": 439084, "ts": "1704161511430770.935", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceCopy", "pid": 437675, "tid": 439084, "ts": "1704161511431099.719", "dur": 32.82, "cat": "dequeue", "args": {"correlation_id": 198}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 198, "pid": 437675, "tid": 439084, "ts": "1704161511431099.719", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511431135.549", "dur": 62.001, "cat": "dequeue", "args": {"correlation_id": 199}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 199, "pid": 437675, "tid": 439084, "ts": "1704161511431135.549", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511431200.040", "dur": 32.9, "cat": "dequeue", "args": {"correlation_id": 200}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 200, "pid": 437675, "tid": 439084, "ts": "1704161511431200.040", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@MaxPoolGradWithArgmaxV1", "pid": 437675, "tid": 439084, "ts": "1704161511431235.210", "dur": 794.548, "cat": "dequeue", "args": {"correlation_id": 201}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 201, "pid": 437675, "tid": 439084, "ts": "1704161511431235.210", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnThresholdBackward", "pid": 437675, "tid": 439084, "ts": "1704161511432035.618", "dur": 15.23, "cat": "dequeue", "args": {"correlation_id": 202}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 202, "pid": 437675, "tid": 439084, "ts": "1704161511432035.618", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@Conv2DBackpropFilter", "pid": 437675, "tid": 439084, "ts": "1704161511432052.848", "dur": 174.352, "cat": "dequeue", "args": {"correlation_id": 203}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 203, "pid": 437675, "tid": 439084, "ts": "1704161511432052.848", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@ReduceSum", "pid": 437675, "tid": 439084, "ts": "1704161511432232.750", "dur": 65.311, "cat": "dequeue", "args": {"correlation_id": 204}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 204, "pid": 437675, "tid": 439084, "ts": "1704161511432232.750", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceCopy", "pid": 437675, "tid": 439084, "ts": "1704161511432302.831", "dur": 18.13, "cat": "dequeue", "args": {"correlation_id": 205}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 205, "pid": 437675, "tid": 439084, "ts": "1704161511432302.831", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511432323.421", "dur": 53.45, "cat": "dequeue", "args": {"correlation_id": 206}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 206, "pid": 437675, "tid": 439084, "ts": "1704161511432323.421", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511432380.571", "dur": 33.811, "cat": "dequeue", "args": {"correlation_id": 207}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 207, "pid": 437675, "tid": 439084, "ts": "1704161511432380.571", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511432416.262", "dur": 15.33, "cat": "dequeue", "args": {"correlation_id": 208}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 208, "pid": 437675, "tid": 439084, "ts": "1704161511432416.262", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511432433.172", "dur": 8.55, "cat": "dequeue", "args": {"correlation_id": 209}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 209, "pid": 437675, "tid": 439084, "ts": "1704161511432433.172", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511432443.062", "dur": 8.59, "cat": "dequeue", "args": {"correlation_id": 210}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 210, "pid": 437675, "tid": 439084, "ts": "1704161511432443.062", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcmul", "pid": 437675, "tid": 439084, "ts": "1704161511432453.422", "dur": 11.51, "cat": "dequeue", "args": {"correlation_id": 211}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 211, "pid": 437675, "tid": 439084, "ts": "1704161511432453.422", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnSqrt", "pid": 437675, "tid": 439084, "ts": "1704161511432468.282", "dur": 10.31, "cat": "dequeue", "args": {"correlation_id": 212}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 212, "pid": 437675, "tid": 439084, "ts": "1704161511432468.282", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511432480.822", "dur": 42.771, "cat": "dequeue", "args": {"correlation_id": 213}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 213, "pid": 437675, "tid": 439084, "ts": "1704161511432480.822", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdds", "pid": 437675, "tid": 439084, "ts": "1704161511432525.823", "dur": 11.54, "cat": "dequeue", "args": {"correlation_id": 214}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 214, "pid": 437675, "tid": 439084, "ts": "1704161511432525.823", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcdiv", "pid": 437675, "tid": 439084, "ts": "1704161511432538.703", "dur": 45.57, "cat": "dequeue", "args": {"correlation_id": 215}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 215, "pid": 437675, "tid": 439084, "ts": "1704161511432538.703", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511432586.643", "dur": 10.111, "cat": "dequeue", "args": {"correlation_id": 216}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 216, "pid": 437675, "tid": 439084, "ts": "1704161511432586.643", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511432597.924", "dur": 8.58, "cat": "dequeue", "args": {"correlation_id": 217}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 217, "pid": 437675, "tid": 439084, "ts": "1704161511432597.924", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511432608.044", "dur": 8.25, "cat": "dequeue", "args": {"correlation_id": 218}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 218, "pid": 437675, "tid": 439084, "ts": "1704161511432608.044", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcmul", "pid": 437675, "tid": 439084, "ts": "1704161511432617.454", "dur": 8.76, "cat": "dequeue", "args": {"correlation_id": 219}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 219, "pid": 437675, "tid": 439084, "ts": "1704161511432617.454", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnSqrt", "pid": 437675, "tid": 439084, "ts": "1704161511432627.424", "dur": 8.48, "cat": "dequeue", "args": {"correlation_id": 220}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 220, "pid": 437675, "tid": 439084, "ts": "1704161511432627.424", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511432637.564", "dur": 28.93, "cat": "dequeue", "args": {"correlation_id": 221}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 221, "pid": 437675, "tid": 439084, "ts": "1704161511432637.564", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdds", "pid": 437675, "tid": 439084, "ts": "1704161511432668.274", "dur": 8.65, "cat": "dequeue", "args": {"correlation_id": 222}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 222, "pid": 437675, "tid": 439084, "ts": "1704161511432668.274", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcdiv", "pid": 437675, "tid": 439084, "ts": "1704161511432678.164", "dur": 30.831, "cat": "dequeue", "args": {"correlation_id": 223}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 223, "pid": 437675, "tid": 439084, "ts": "1704161511432678.164", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511432710.885", "dur": 9.14, "cat": "dequeue", "args": {"correlation_id": 224}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 224, "pid": 437675, "tid": 439084, "ts": "1704161511432710.885", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511432721.335", "dur": 8.6, "cat": "dequeue", "args": {"correlation_id": 225}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 225, "pid": 437675, "tid": 439084, "ts": "1704161511432721.335", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511432731.175", "dur": 7.51, "cat": "dequeue", "args": {"correlation_id": 226}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 226, "pid": 437675, "tid": 439084, "ts": "1704161511432731.175", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcmul", "pid": 437675, "tid": 439084, "ts": "1704161511432739.885", "dur": 8.47, "cat": "dequeue", "args": {"correlation_id": 227}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 227, "pid": 437675, "tid": 439084, "ts": "1704161511432739.885", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnSqrt", "pid": 437675, "tid": 439084, "ts": "1704161511432750.015", "dur": 7.16, "cat": "dequeue", "args": {"correlation_id": 228}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 228, "pid": 437675, "tid": 439084, "ts": "1704161511432750.015", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511432758.715", "dur": 27.39, "cat": "dequeue", "args": {"correlation_id": 229}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 229, "pid": 437675, "tid": 439084, "ts": "1704161511432758.715", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdds", "pid": 437675, "tid": 439084, "ts": "1704161511432787.806", "dur": 9.75, "cat": "dequeue", "args": {"correlation_id": 230}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 230, "pid": 437675, "tid": 439084, "ts": "1704161511432787.806", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcdiv", "pid": 437675, "tid": 439084, "ts": "1704161511432798.776", "dur": 30.37, "cat": "dequeue", "args": {"correlation_id": 231}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 231, "pid": 437675, "tid": 439084, "ts": "1704161511432798.776", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511432831.116", "dur": 9.36, "cat": "dequeue", "args": {"correlation_id": 232}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 232, "pid": 437675, "tid": 439084, "ts": "1704161511432831.116", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511432841.746", "dur": 8.12, "cat": "dequeue", "args": {"correlation_id": 233}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 233, "pid": 437675, "tid": 439084, "ts": "1704161511432841.746", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511432851.576", "dur": 7.67, "cat": "dequeue", "args": {"correlation_id": 234}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 234, "pid": 437675, "tid": 439084, "ts": "1704161511432851.576", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcmul", "pid": 437675, "tid": 439084, "ts": "1704161511432860.416", "dur": 8.02, "cat": "dequeue", "args": {"correlation_id": 235}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 235, "pid": 437675, "tid": 439084, "ts": "1704161511432860.416", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnSqrt", "pid": 437675, "tid": 439084, "ts": "1704161511432870.106", "dur": 19.701, "cat": "dequeue", "args": {"correlation_id": 236}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 236, "pid": 437675, "tid": 439084, "ts": "1704161511432870.106", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511432891.347", "dur": 28.97, "cat": "dequeue", "args": {"correlation_id": 237}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 237, "pid": 437675, "tid": 439084, "ts": "1704161511432891.347", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdds", "pid": 437675, "tid": 439084, "ts": "1704161511432922.427", "dur": 8.92, "cat": "dequeue", "args": {"correlation_id": 238}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 238, "pid": 437675, "tid": 439084, "ts": "1704161511432922.427", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcdiv", "pid": 437675, "tid": 439084, "ts": "1704161511432933.367", "dur": 29.67, "cat": "dequeue", "args": {"correlation_id": 239}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 239, "pid": 437675, "tid": 439084, "ts": "1704161511432933.367", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511432964.797", "dur": 8.73, "cat": "dequeue", "args": {"correlation_id": 240}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 240, "pid": 437675, "tid": 439084, "ts": "1704161511432964.797", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511432974.767", "dur": 7.07, "cat": "dequeue", "args": {"correlation_id": 241}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 241, "pid": 437675, "tid": 439084, "ts": "1704161511432974.767", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511432984.307", "dur": 7.431, "cat": "dequeue", "args": {"correlation_id": 242}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 242, "pid": 437675, "tid": 439084, "ts": "1704161511432984.307", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcmul", "pid": 437675, "tid": 439084, "ts": "1704161511432992.968", "dur": 8.17, "cat": "dequeue", "args": {"correlation_id": 243}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 243, "pid": 437675, "tid": 439084, "ts": "1704161511432992.968", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnSqrt", "pid": 437675, "tid": 439084, "ts": "1704161511433002.668", "dur": 6.99, "cat": "dequeue", "args": {"correlation_id": 244}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 244, "pid": 437675, "tid": 439084, "ts": "1704161511433002.668", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511433010.778", "dur": 26.12, "cat": "dequeue", "args": {"correlation_id": 245}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 245, "pid": 437675, "tid": 439084, "ts": "1704161511433010.778", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdds", "pid": 437675, "tid": 439084, "ts": "1704161511433038.848", "dur": 8.6, "cat": "dequeue", "args": {"correlation_id": 246}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 246, "pid": 437675, "tid": 439084, "ts": "1704161511433038.848", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcdiv", "pid": 437675, "tid": 439084, "ts": "1704161511433049.118", "dur": 29.69, "cat": "dequeue", "args": {"correlation_id": 247}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 247, "pid": 437675, "tid": 439084, "ts": "1704161511433049.118", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511433080.468", "dur": 8.661, "cat": "dequeue", "args": {"correlation_id": 248}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 248, "pid": 437675, "tid": 439084, "ts": "1704161511433080.468", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511433090.569", "dur": 7.33, "cat": "dequeue", "args": {"correlation_id": 249}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 249, "pid": 437675, "tid": 439084, "ts": "1704161511433090.569", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511433099.589", "dur": 7.78, "cat": "dequeue", "args": {"correlation_id": 250}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 250, "pid": 437675, "tid": 439084, "ts": "1704161511433099.589", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcmul", "pid": 437675, "tid": 439084, "ts": "1704161511433109.439", "dur": 8.09, "cat": "dequeue", "args": {"correlation_id": 251}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 251, "pid": 437675, "tid": 439084, "ts": "1704161511433109.439", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnSqrt", "pid": 437675, "tid": 439084, "ts": "1704161511433118.819", "dur": 6.81, "cat": "dequeue", "args": {"correlation_id": 252}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 252, "pid": 437675, "tid": 439084, "ts": "1704161511433118.819", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511433126.789", "dur": 25.53, "cat": "dequeue", "args": {"correlation_id": 253}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 253, "pid": 437675, "tid": 439084, "ts": "1704161511433126.789", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdds", "pid": 437675, "tid": 439084, "ts": "1704161511433154.069", "dur": 8.92, "cat": "dequeue", "args": {"correlation_id": 254}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 254, "pid": 437675, "tid": 439084, "ts": "1704161511433154.069", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcdiv", "pid": 437675, "tid": 439084, "ts": "1704161511433164.759", "dur": 29.461, "cat": "dequeue", "args": {"correlation_id": 255}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 255, "pid": 437675, "tid": 439084, "ts": "1704161511433164.759", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511433195.970", "dur": 14.23, "cat": "dequeue", "args": {"correlation_id": 256}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 256, "pid": 437675, "tid": 439084, "ts": "1704161511433195.970", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511433211.800", "dur": 9.11, "cat": "dequeue", "args": {"correlation_id": 257}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 257, "pid": 437675, "tid": 439084, "ts": "1704161511433211.800", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511433222.670", "dur": 7.81, "cat": "dequeue", "args": {"correlation_id": 258}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 258, "pid": 437675, "tid": 439084, "ts": "1704161511433222.670", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcmul", "pid": 437675, "tid": 439084, "ts": "1704161511433231.710", "dur": 8.33, "cat": "dequeue", "args": {"correlation_id": 259}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 259, "pid": 437675, "tid": 439084, "ts": "1704161511433231.710", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnSqrt", "pid": 437675, "tid": 439084, "ts": "1704161511433241.360", "dur": 6.86, "cat": "dequeue", "args": {"correlation_id": 260}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 260, "pid": 437675, "tid": 439084, "ts": "1704161511433241.360", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511433249.470", "dur": 27.93, "cat": "dequeue", "args": {"correlation_id": 261}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 261, "pid": 437675, "tid": 439084, "ts": "1704161511433249.470", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdds", "pid": 437675, "tid": 439084, "ts": "1704161511433279.480", "dur": 8.61, "cat": "dequeue", "args": {"correlation_id": 262}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 262, "pid": 437675, "tid": 439084, "ts": "1704161511433279.480", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcdiv", "pid": 437675, "tid": 439084, "ts": "1704161511433289.291", "dur": 29.4, "cat": "dequeue", "args": {"correlation_id": 263}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 263, "pid": 437675, "tid": 439084, "ts": "1704161511433289.291", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511433320.371", "dur": 8.61, "cat": "dequeue", "args": {"correlation_id": 264}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 264, "pid": 437675, "tid": 439084, "ts": "1704161511433320.371", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511433330.691", "dur": 7.92, "cat": "dequeue", "args": {"correlation_id": 265}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 265, "pid": 437675, "tid": 439084, "ts": "1704161511433330.691", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511433340.121", "dur": 7.42, "cat": "dequeue", "args": {"correlation_id": 266}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 266, "pid": 437675, "tid": 439084, "ts": "1704161511433340.121", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcmul", "pid": 437675, "tid": 439084, "ts": "1704161511433348.821", "dur": 8.01, "cat": "dequeue", "args": {"correlation_id": 267}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 267, "pid": 437675, "tid": 439084, "ts": "1704161511433348.821", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnSqrt", "pid": 437675, "tid": 439084, "ts": "1704161511433358.211", "dur": 6.85, "cat": "dequeue", "args": {"correlation_id": 268}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 268, "pid": 437675, "tid": 439084, "ts": "1704161511433358.211", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511433366.231", "dur": 26.091, "cat": "dequeue", "args": {"correlation_id": 269}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 269, "pid": 437675, "tid": 439084, "ts": "1704161511433366.231", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdds", "pid": 437675, "tid": 439084, "ts": "1704161511433394.652", "dur": 8.34, "cat": "dequeue", "args": {"correlation_id": 270}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 270, "pid": 437675, "tid": 439084, "ts": "1704161511433394.652", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcdiv", "pid": 437675, "tid": 439084, "ts": "1704161511433404.682", "dur": 29.04, "cat": "dequeue", "args": {"correlation_id": 271}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 271, "pid": 437675, "tid": 439084, "ts": "1704161511433404.682", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511433435.592", "dur": 8.88, "cat": "dequeue", "args": {"correlation_id": 272}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 272, "pid": 437675, "tid": 439084, "ts": "1704161511433435.592", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511433446.452", "dur": 7.79, "cat": "dequeue", "args": {"correlation_id": 273}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 273, "pid": 437675, "tid": 439084, "ts": "1704161511433446.452", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511433455.512", "dur": 7.04, "cat": "dequeue", "args": {"correlation_id": 274}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 274, "pid": 437675, "tid": 439084, "ts": "1704161511433455.512", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcmul", "pid": 437675, "tid": 439084, "ts": "1704161511433468.282", "dur": 8.75, "cat": "dequeue", "args": {"correlation_id": 275}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 275, "pid": 437675, "tid": 439084, "ts": "1704161511433468.282", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnSqrt", "pid": 437675, "tid": 439084, "ts": "1704161511433478.682", "dur": 7.36, "cat": "dequeue", "args": {"correlation_id": 276}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 276, "pid": 437675, "tid": 439084, "ts": "1704161511433478.682", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511433487.272", "dur": 28.061, "cat": "dequeue", "args": {"correlation_id": 277}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 277, "pid": 437675, "tid": 439084, "ts": "1704161511433487.272", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdds", "pid": 437675, "tid": 439084, "ts": "1704161511433517.313", "dur": 8.68, "cat": "dequeue", "args": {"correlation_id": 278}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 278, "pid": 437675, "tid": 439084, "ts": "1704161511433517.313", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcdiv", "pid": 437675, "tid": 439084, "ts": "1704161511433527.423", "dur": 29.78, "cat": "dequeue", "args": {"correlation_id": 279}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 279, "pid": 437675, "tid": 439084, "ts": "1704161511433527.423", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511433558.703", "dur": 9.01, "cat": "dequeue", "args": {"correlation_id": 280}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 280, "pid": 437675, "tid": 439084, "ts": "1704161511433558.703", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511433569.443", "dur": 7.68, "cat": "dequeue", "args": {"correlation_id": 281}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 281, "pid": 437675, "tid": 439084, "ts": "1704161511433569.443", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceMuls", "pid": 437675, "tid": 439084, "ts": "1704161511433578.383", "dur": 6.93, "cat": "dequeue", "args": {"correlation_id": 282}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 282, "pid": 437675, "tid": 439084, "ts": "1704161511433578.383", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcmul", "pid": 437675, "tid": 439084, "ts": "1704161511433586.543", "dur": 7.471, "cat": "dequeue", "args": {"correlation_id": 283}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 283, "pid": 437675, "tid": 439084, "ts": "1704161511433586.543", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnSqrt", "pid": 437675, "tid": 439084, "ts": "1704161511433598.074", "dur": 7.12, "cat": "dequeue", "args": {"correlation_id": 284}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 284, "pid": 437675, "tid": 439084, "ts": "1704161511433598.074", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511433606.634", "dur": 29.06, "cat": "dequeue", "args": {"correlation_id": 285}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 285, "pid": 437675, "tid": 439084, "ts": "1704161511433606.634", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdds", "pid": 437675, "tid": 439084, "ts": "1704161511433637.924", "dur": 8.22, "cat": "dequeue", "args": {"correlation_id": 286}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 286, "pid": 437675, "tid": 439084, "ts": "1704161511433637.924", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAddcdiv", "pid": 437675, "tid": 439084, "ts": "1704161511433647.444", "dur": 28.28, "cat": "dequeue", "args": {"correlation_id": 287}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 287, "pid": 437675, "tid": 439084, "ts": "1704161511433647.444", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511433677.384", "dur": 8.04, "cat": "dequeue", "args": {"correlation_id": 288}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 288, "pid": 437675, "tid": 439084, "ts": "1704161511433677.384", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnArgMax", "pid": 437675, "tid": 439084, "ts": "1704161511433687.094", "dur": 15.151, "cat": "dequeue", "args": {"correlation_id": 289}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 289, "pid": 437675, "tid": 439084, "ts": "1704161511433687.094", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnEqTensor", "pid": 437675, "tid": 439084, "ts": "1704161511433703.595", "dur": 10.72, "cat": "dequeue", "args": {"correlation_id": 290}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 290, "pid": 437675, "tid": 439084, "ts": "1704161511433703.595", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnReduceSum", "pid": 437675, "tid": 439084, "ts": "1704161511433715.545", "dur": 121.881, "cat": "dequeue", "args": {"correlation_id": 291}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 291, "pid": 437675, "tid": 439084, "ts": "1704161511433715.545", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnCast", "pid": 437675, "tid": 439084, "ts": "1704161511433847.346", "dur": 17.8, "cat": "dequeue", "args": {"correlation_id": 292}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 292, "pid": 437675, "tid": 439084, "ts": "1704161511433847.346", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnInplaceAdd", "pid": 437675, "tid": 439084, "ts": "1704161511433867.346", "dur": 9.33, "cat": "dequeue", "args": {"correlation_id": 293}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 293, "pid": 437675, "tid": 439084, "ts": "1704161511433867.346", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511433878.276", "dur": 8.13, "cat": "dequeue", "args": {"correlation_id": 294}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 294, "pid": 437675, "tid": 439084, "ts": "1704161511433878.276", "cat": "async_task_queue"}, {"ph": "X", "name": "Dequeue@aclnnDivs", "pid": 437675, "tid": 439084, "ts": "1704161511433887.776", "dur": 6.751, "cat": "dequeue", "args": {"correlation_id": 295}}, {"ph": "f", "bp": "e", "name": "enqueue_to_dequeue", "id": 295, "pid": 437675, "tid": 439084, "ts": "1704161511433887.776", "cat": "async_task_queue"}, {"ph": "M", "name": "process_name", "pid": 437675, "tid": 0, "args": {"name": "Python"}}, {"ph": "M", "name": "process_labels", "pid": 437675, "tid": 0, "args": {"labels": "CPU"}}, {"ph": "M", "name": "process_sort_index", "pid": 437675, "tid": 0, "args": {"sort_index": 0}}, {"ph": "M", "name": "thread_name", "pid": 437675, "tid": 437675, "args": {"name": "Thread 437675"}}, {"ph": "M", "name": "thread_sort_index", "pid": 437675, "tid": 437675, "args": {"sort_index": 437675}}, {"ph": "M", "name": "thread_name", "pid": 437675, "tid": 439228, "args": {"name": "Thread 439228"}}, {"ph": "M", "name": "thread_sort_index", "pid": 437675, "tid": 439228, "args": {"sort_index": 439228}}, {"ph": "M", "name": "thread_name", "pid": 437675, "tid": 439084, "args": {"name": "Thread 439084"}}, {"ph": "M", "name": "thread_sort_index", "pid": 437675, "tid": 439084, "args": {"sort_index": 439229}}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 17, "pid": 437675, "tid": 437675, "ts": "1704161511423098.389", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 17, "pid": 437675, "tid": 439228, "ts": "1704161511428757.055", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 18, "pid": 437675, "tid": 437675, "ts": "1704161511423234.760", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 18, "pid": 437675, "tid": 439228, "ts": "1704161511428726.245", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 19, "pid": 437675, "tid": 437675, "ts": "1704161511423348.621", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 19, "pid": 437675, "tid": 439228, "ts": "1704161511428668.254", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 20, "pid": 437675, "tid": 437675, "ts": "1704161511423433.722", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 20, "pid": 437675, "tid": 439228, "ts": "1704161511428228.320", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 21, "pid": 437675, "tid": 437675, "ts": "1704161511423480.703", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 21, "pid": 437675, "tid": 439228, "ts": "1704161511428193.900", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 22, "pid": 437675, "tid": 437675, "ts": "1704161511423526.163", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 22, "pid": 437675, "tid": 439228, "ts": "1704161511428092.079", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 23, "pid": 437675, "tid": 437675, "ts": "1704161511423572.793", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 23, "pid": 437675, "tid": 439228, "ts": "1704161511428070.708", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 24, "pid": 437675, "tid": 437675, "ts": "1704161511423615.164", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 24, "pid": 437675, "tid": 439228, "ts": "1704161511427996.438", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 25, "pid": 437675, "tid": 437675, "ts": "1704161511423634.704", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 25, "pid": 437675, "tid": 439228, "ts": "1704161511427720.065", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 26, "pid": 437675, "tid": 437675, "ts": "1704161511423680.365", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 26, "pid": 437675, "tid": 439228, "ts": "1704161511427687.485", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 27, "pid": 437675, "tid": 437675, "ts": "1704161511423715.015", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 27, "pid": 437675, "tid": 439228, "ts": "1704161511427613.274", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 28, "pid": 437675, "tid": 437675, "ts": "1704161511423725.355", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 28, "pid": 437675, "tid": 439228, "ts": "1704161511427440.652", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 29, "pid": 437675, "tid": 437675, "ts": "1704161511423751.965", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 29, "pid": 437675, "tid": 439228, "ts": "1704161511427373.241", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 30, "pid": 437675, "tid": 437675, "ts": "1704161511423783.516", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 30, "pid": 437675, "tid": 439228, "ts": "1704161511427285.141", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 31, "pid": 437675, "tid": 437675, "ts": "1704161511423793.646", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 31, "pid": 437675, "tid": 439228, "ts": "1704161511426950.567", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 32, "pid": 437675, "tid": 437675, "ts": "1704161511426415.582", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 32, "pid": 437675, "tid": 439228, "ts": "1704161511426883.167", "cat": "fwdbwd"}, {"ph": "s", "bp": "e", "name": "fwdbwd", "id": 33, "pid": 437675, "tid": 437675, "ts": "1704161511426502.433", "cat": "fwdbwd"}, {"ph": "f", "bp": "e", "name": "fwdbwd", "id": 33, "pid": 437675, "tid": 439228, "ts": "1704161511426719.085", "cat": "fwdbwd"},{"name": "process_name", "pid": 800, "tid": 0, "args": {"name": "Ascend Hardware"}, "ph": "M"}, {"name": "thread_name", "pid": 800, "tid": 3, "args": {"name": "Stream 3"}, "ph": "M"}, {"name": "thread_sort_index", "pid": 800, "tid": 3, "args": {"sort_index": 3}, "ph": "M"}, {"name": "process_labels", "pid": 800, "tid": 0, "args": {"labels": "NPU"}, "ph": "M"}, {"name": "process_sort_index", "pid": 800, "tid": 0, "args": {"sort_index": 8}, "ph": "M"}, {"name": "process_name", "pid": 43767502, "tid": 0, "args": {"name": "CANN"}, "ph": "M"}, {"name": "thread_name", "pid": 43767502, "tid": 439228, "args": {"name": "Thread 439228"}, "ph": "M"}, {"name": "thread_name", "pid": 43767502, "tid": 437675, "args": {"name": "Thread 437675"}, "ph": "M"}, {"name": "thread_name", "pid": 43767502, "tid": 439084, "args": {"name": "Thread 439084"}, "ph": "M"}, {"name": "thread_name", "pid": 43767502, "tid": 439085, "args": {"name": "Thread 439085"}, "ph": "M"}, {"name": "thread_sort_index", "pid": 43767502, "tid": 439228, "args": {"sort_index": 439228}, "ph": "M"}, {"name": "thread_sort_index", "pid": 43767502, "tid": 437675, "args": {"sort_index": 437675}, "ph": "M"}, {"name": "thread_sort_index", "pid": 43767502, "tid": 439084, "args": {"sort_index": 439084}, "ph": "M"}, {"name": "thread_sort_index", "pid": 43767502, "tid": 439085, "args": {"sort_index": 439085}, "ph": "M"}, {"name": "process_labels", "pid": 43767502, "tid": 0, "args": {"labels": "CPU"}, "ph": "M"}, {"name": "process_sort_index", "pid": 43767502, "tid": 0, "args": {"sort_index": 2}, "ph": "M"}, {"name": "process_name", "pid": 4376751100, "tid": 0, "args": {"name": "Overlap Analysis"}, "ph": "M"}, {"name": "thread_name", "pid": 4376751100, "tid": 0, "args": {"name": "Computing"}, "ph": "M"}, {"name": "thread_name", "pid": 4376751100, "tid": 1, "args": {"name": "Communication"}, "ph": "M"}, {"name": "thread_name", "pid": 4376751100, "tid": 2, "args": {"name": "Communication(Not Overlapped)"}, "ph": "M"}, {"name": "thread_name", "pid": 4376751100, "tid": 3, "args": {"name": "Free"}, "ph": "M"}, {"name": "process_labels", "pid": 4376751100, "tid": 0, "args": {"labels": "NPU"}, "ph": "M"}, {"name": "process_sort_index", "pid": 4376751100, "tid": 0, "args": {"sort_index": 11}, "ph": "M"}, {"name": "AscendCL@aclrtSynchronizeStream", "pid": 43767502, "tid": 437675, "ts": "1704161511420589.043", "dur": 12.77012757357446, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtSynchronizeStream", "item_id": "0", "connection_id": 0}, "ph": "X"}, {"name": "AscendCL@aclrtMemcpy", "pid": 43767502, "tid": 437675, "ts": "1704161511420605.703", "dur": 52.40052348122958, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtMemcpy", "item_id": "0", "connection_id": 1}, "ph": "X"}, {"name": "AscendCL@aclrtSynchronizeStream", "pid": 43767502, "tid": 437675, "ts": "1704161511420768.565", "dur": 3.72003716317126, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtSynchronizeStream", "item_id": "0", "connection_id": 2}, "ph": "X"}, {"name": "AscendCL@aclrtMemcpy", "pid": 43767502, "tid": 437675, "ts": "1704161511420773.545", "dur": 24.910248853386044, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtMemcpy", "item_id": "0", "connection_id": 3}, "ph": "X"}, {"name": "AscendCL@aclrtSynchronizeStream", "pid": 43767502, "tid": 437675, "ts": "1704161511422511.373", "dur": 6.250062438123757, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtSynchronizeStream", "item_id": "0", "connection_id": 4}, "ph": "X"}, {"name": "AscendCL@aclrtMemcpy", "pid": 43767502, "tid": 437675, "ts": "1704161511422519.282", "dur": 98.43098332552343, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtMemcpy", "item_id": "0", "connection_id": 5}, "ph": "X"}, {"name": "AscendCL@aclrtSynchronizeStream", "pid": 43767502, "tid": 437675, "ts": "1704161511422674.694", "dur": 3.680036763567268, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtSynchronizeStream", "item_id": "0", "connection_id": 6}, "ph": "X"}, {"name": "AscendCL@aclrtMemcpy", "pid": 43767502, "tid": 437675, "ts": "1704161511422679.264", "dur": 24.1302410611082, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtMemcpy", "item_id": "0", "connection_id": 7}, "ph": "X"}, {"name": "AscendCL@aclnnInplaceZero", "pid": 43767502, "tid": 439084, "ts": "1704161511422896.956", "dur": 21.100210791105802, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceZero", "item_id": "0", "connection_id": 9}, "ph": "X"}, {"name": "HostToDevice38654705663", "ph": "s", "cat": "HostToDevice", "id": "38654705663", "pid": 43767502, "tid": 439084, "ts": "1704161511422898.906"}, {"name": "aclnnInplaceZero_ZerosLikeAiCore_ZerosLike", "pid": 800, "tid": 3, "ts": "1704161511422904.204", "dur": 1.35, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 279, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 8}, "ph": "X"}, {"name": "HostToDevice38654705663", "ph": "f", "id": "38654705663", "ts": "1704161511422904.204", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511422904.204", "dur": 1.35, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511422905.554", "dur": 11.71, "ph": "X"}, {"name": "aclnnInplaceZero_ZerosLikeAiCore_ZerosLike", "pid": 800, "tid": 3, "ts": "1704161511422917.264", "dur": 1.2695, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 280, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 10}, "ph": "X"}, {"name": "HostToDevice47244640255", "ph": "f", "id": "47244640255", "ts": "1704161511422917.264", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511422917.264", "dur": 1.2695, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511422918.533", "dur": 31.51, "ph": "X"}, {"name": "AscendCL@aclnnInplaceZero", "pid": 43767502, "tid": 439084, "ts": "1704161511422925.227", "dur": 8.020080120600404, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceZero", "item_id": "0", "connection_id": 11}, "ph": "X"}, {"name": "HostToDevice47244640255", "ph": "s", "cat": "HostToDevice", "id": "47244640255", "pid": 43767502, "tid": 439084, "ts": "1704161511422926.436"}, {"name": "AscendCL@aclnnInplaceZero", "pid": 43767502, "tid": 439084, "ts": "1704161511422936.867", "dur": 6.910069031589626, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceZero", "item_id": "0", "connection_id": 13}, "ph": "X"}, {"name": "HostToDevice55834574847", "ph": "s", "cat": "HostToDevice", "id": "55834574847", "pid": 43767502, "tid": 439084, "ts": "1704161511422937.387"}, {"name": "AscendCL@aclnnInplaceZero", "pid": 43767502, "tid": 439084, "ts": "1704161511422947.936", "dur": 6.510065035549705, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceZero", "item_id": "0", "connection_id": 15}, "ph": "X"}, {"name": "HostToDevice64424509439", "ph": "s", "cat": "HostToDevice", "id": "64424509439", "pid": 43767502, "tid": 439084, "ts": "1704161511422948.477"}, {"name": "aclnnInplaceZero_ZerosLikeAiCore_ZerosLike", "pid": 800, "tid": 3, "ts": "1704161511422950.043", "dur": 2.52, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 281, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 12}, "ph": "X"}, {"name": "HostToDevice55834574847", "ph": "f", "id": "55834574847", "ts": "1704161511422950.043", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511422950.043", "dur": 2.52, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511422952.563", "dur": 0.11, "ph": "X"}, {"name": "aclnnInplaceZero_ZerosLikeAiCore_ZerosLike", "pid": 800, "tid": 3, "ts": "1704161511422952.673", "dur": 1.26, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 282, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 14}, "ph": "X"}, {"name": "HostToDevice64424509439", "ph": "f", "id": "64424509439", "ts": "1704161511422952.673", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511422952.673", "dur": 1.26, "ph": "X"}, {"name": "aclnnInplaceZero_ZerosLikeAiCore_ZerosLike", "pid": 800, "tid": 3, "ts": "1704161511422954.043", "dur": 4.21, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 283, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 16}, "ph": "X"}, {"name": "HostToDevice73014444031", "ph": "f", "id": "73014444031", "ts": "1704161511422954.043", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511422954.043", "dur": 4.21, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511422953.933", "dur": 0.11, "ph": "X"}, {"name": "AscendCL@aclnnInplaceZero", "pid": 43767502, "tid": 439084, "ts": "1704161511422957.787", "dur": 7.850078422283438, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceZero", "item_id": "0", "connection_id": 17}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511422958.253", "dur": 4.0, "ph": "X"}, {"name": "HostToDevice73014444031", "ph": "s", "cat": "HostToDevice", "id": "73014444031", "pid": 43767502, "tid": 439084, "ts": "1704161511422958.727"}, {"name": "aclnnInplaceZero_ZerosLikeAiCore_ZerosLike", "pid": 800, "tid": 3, "ts": "1704161511422962.253", "dur": 1.34, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 284, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 18}, "ph": "X"}, {"name": "HostToDevice81604378623", "ph": "f", "id": "81604378623", "ts": "1704161511422962.253", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511422962.253", "dur": 1.34, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511422963.593", "dur": 4.5395, "ph": "X"}, {"name": "aclnnInplaceZero_ZerosLikeAiCore_ZerosLike", "pid": 800, "tid": 3, "ts": "1704161511422968.132", "dur": 2.15, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 285, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 20}, "ph": "X"}, {"name": "HostToDevice90194313215", "ph": "f", "id": "90194313215", "ts": "1704161511422968.132", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511422968.132", "dur": 2.15, "ph": "X"}, {"name": "AscendCL@aclnnInplaceZero", "pid": 43767502, "tid": 439084, "ts": "1704161511422969.347", "dur": 6.140061339212778, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceZero", "item_id": "0", "connection_id": 19}, "ph": "X"}, {"name": "HostToDevice81604378623", "ph": "s", "cat": "HostToDevice", "id": "81604378623", "pid": 43767502, "tid": 439084, "ts": "1704161511422969.747"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511422970.282", "dur": 12.75, "ph": "X"}, {"name": "AscendCL@aclnnInplaceZero", "pid": 43767502, "tid": 439084, "ts": "1704161511422979.117", "dur": 6.730067233371662, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceZero", "item_id": "0", "connection_id": 21}, "ph": "X"}, {"name": "HostToDevice90194313215", "ph": "s", "cat": "HostToDevice", "id": "90194313215", "pid": 43767502, "tid": 439084, "ts": "1704161511422979.827"}, {"name": "aclnnInplaceZero_ZerosLikeAiCore_ZerosLike", "pid": 800, "tid": 3, "ts": "1704161511422983.032", "dur": 1.25, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 286, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 22}, "ph": "X"}, {"name": "HostToDevice98784247807", "ph": "f", "id": "98784247807", "ts": "1704161511422983.032", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511422983.032", "dur": 1.25, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511422984.282", "dur": 0.11, "ph": "X"}, {"name": "aclnnInplaceZero_ZerosLikeAiCore_ZerosLike", "pid": 800, "tid": 3, "ts": "1704161511422984.392", "dur": 1.32, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 287, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 24}, "ph": "X"}, {"name": "HostToDevice107374182399", "ph": "f", "id": "107374182399", "ts": "1704161511422984.392", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511422984.392", "dur": 1.32, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511422985.712", "dur": 5.32, "ph": "X"}, {"name": "AscendCL@aclnnInplaceZero", "pid": 43767502, "tid": 439084, "ts": "1704161511422989.447", "dur": 6.380063736836731, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceZero", "item_id": "0", "connection_id": 23}, "ph": "X"}, {"name": "HostToDevice98784247807", "ph": "s", "cat": "HostToDevice", "id": "98784247807", "pid": 43767502, "tid": 439084, "ts": "1704161511422989.787"}, {"name": "aclnnInplaceZero_ZerosLikeAiCore_ZerosLike", "pid": 800, "tid": 3, "ts": "1704161511422991.032", "dur": 1.25, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 288, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 26}, "ph": "X"}, {"name": "HostToDevice115964116991", "ph": "f", "id": "115964116991", "ts": "1704161511422991.032", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511422991.032", "dur": 1.25, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511422992.282", "dur": 1093.429, "ph": "X"}, {"name": "AscendCL@aclnnInplaceZero", "pid": 43767502, "tid": 439084, "ts": "1704161511422998.767", "dur": 6.2000619386187665, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceZero", "item_id": "0", "connection_id": 25}, "ph": "X"}, {"name": "HostToDevice107374182399", "ph": "s", "cat": "HostToDevice", "id": "107374182399", "pid": 43767502, "tid": 439084, "ts": "1704161511422999.217"}, {"name": "AscendCL@aclnnInplaceZero", "pid": 43767502, "tid": 439084, "ts": "1704161511423008.148", "dur": 5.890058841687829, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceZero", "item_id": "0", "connection_id": 27}, "ph": "X"}, {"name": "HostToDevice115964116991", "ph": "s", "cat": "HostToDevice", "id": "115964116991", "pid": 43767502, "tid": 439084, "ts": "1704161511423008.527"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423176.929", "dur": 1.890018881288624, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 28}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423184.389", "dur": 0.6700066933668668, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 29}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423186.669", "dur": 0.2800027972279443, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 30}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423187.659", "dur": 0.1900018981189622, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 31}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423188.659", "dur": 0.24000239762395226, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 32}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423189.249", "dur": 0.15000149851497016, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 33}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423191.109", "dur": 0.25000249752495024, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 34}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423192.149", "dur": 0.21000209792095823, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 35}, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 437675, "ts": "1704161511423193.189", "dur": 0.3600035964359284, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 36}, "ph": "X"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511423223.879", "dur": 1099.9709887101772, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 70}, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511423227.410", "dur": 27.990279622893432, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 37}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423378.411", "dur": 0.5900058941588827, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 38}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423382.711", "dur": 0.24000239762395226, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 39}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423383.801", "dur": 0.2800027972279443, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 40}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423384.691", "dur": 0.1900018981189622, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 41}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423386.501", "dur": 0.2600025974259483, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 42}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423387.301", "dur": 0.14000139861397215, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 43}, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 437675, "ts": "1704161511423388.111", "dur": 0.34000339663393236, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 44}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423453.302", "dur": 0.5700056943568866, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 45}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423455.412", "dur": 0.22000219782195624, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 46}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423456.602", "dur": 0.44000439564391247, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 47}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423457.652", "dur": 0.16000159841596817, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 48}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423458.592", "dur": 0.3600035964359284, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 49}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423459.312", "dur": 0.23000229772295425, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 50}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423460.212", "dur": 0.40000399603992043, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 51}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423460.882", "dur": 0.14000139861397215, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 52}, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 437675, "ts": "1704161511423461.552", "dur": 0.15000149851497016, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 53}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423541.533", "dur": 0.5400053946538926, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 54}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423543.733", "dur": 0.25000249752495024, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 55}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423544.433", "dur": 0.3800037962379244, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 56}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423545.133", "dur": 0.23000229772295425, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 57}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 437675, "ts": "1704161511423546.133", "dur": 0.41000409594091847, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 58}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 437675, "ts": "1704161511423546.923", "dur": 0.15000149851497016, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 59}, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 437675, "ts": "1704161511423547.473", "dur": 0.2700026973269463, "args": {"Thread Id": 437675, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 60}, "ph": "X"}, {"name": "HostToDevice266287972351", "ph": "s", "cat": "HostToDevice", "id": "266287972351", "pid": 43767502, "tid": 439084, "ts": "1704161511424082.978"}, {"name": "trans_Cast_0", "pid": 800, "tid": 3, "ts": "1704161511424085.712", "dur": 4.28, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 289, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 61}, "ph": "X"}, {"name": "HostToDevice266287972351", "ph": "f", "id": "266287972351", "ts": "1704161511424085.712", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424085.712", "dur": 4.28, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424089.992", "dur": 47.4395, "ph": "X"}, {"name": "trans_TransData_1", "pid": 800, "tid": 3, "ts": "1704161511424137.431", "dur": 9.96, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 290, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 62}, "ph": "X"}, {"name": "HostToDevice270582939647", "ph": "f", "id": "270582939647", "ts": "1704161511424137.431", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424137.431", "dur": 9.96, "ph": "X"}, {"name": "HostToDevice270582939647", "ph": "s", "cat": "HostToDevice", "id": "270582939647", "pid": 43767502, "tid": 439084, "ts": "1704161511424145.799"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424147.391", "dur": 11.61, "ph": "X"}, {"name": "trans_Cast_2", "pid": 800, "tid": 3, "ts": "1704161511424159.001", "dur": 1.44, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 291, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 63}, "ph": "X"}, {"name": "HostToDevice274877906943", "ph": "f", "id": "274877906943", "ts": "1704161511424159.001", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424159.001", "dur": 1.44, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424160.441", "dur": 36.7895, "ph": "X"}, {"name": "HostToDevice274877906943", "ph": "s", "cat": "HostToDevice", "id": "274877906943", "pid": 43767502, "tid": 439084, "ts": "1704161511424176.029"}, {"name": "trans_TransData_3_MemSet", "pid": 800, "tid": 3, "ts": "1704161511424197.230", "dur": 8.25, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 292, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 64}, "ph": "X"}, {"name": "HostToDevice279172874239", "ph": "f", "id": "279172874239", "ts": "1704161511424197.230", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424197.230", "dur": 8.25, "ph": "X"}, {"name": "HostToDevice279172874239", "ph": "s", "cat": "HostToDevice", "id": "279172874239", "pid": 43767502, "tid": 439084, "ts": "1704161511424204.480"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424205.480", "dur": 10.29, "ph": "X"}, {"name": "HostToDevice283467841535", "ph": "s", "cat": "HostToDevice", "id": "283467841535", "pid": 43767502, "tid": 439084, "ts": "1704161511424214.730"}, {"name": "trans_TransData_3", "pid": 800, "tid": 3, "ts": "1704161511424215.770", "dur": 5.3495, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 293, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 65}, "ph": "X"}, {"name": "HostToDevice283467841535", "ph": "f", "id": "283467841535", "ts": "1704161511424215.770", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424215.770", "dur": 5.3495, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424221.120", "dur": 2.81, "ph": "X"}, {"name": "trans_Cast_4", "pid": 800, "tid": 3, "ts": "1704161511424223.930", "dur": 1.21, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 294, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 66}, "ph": "X"}, {"name": "HostToDevice287762808831", "ph": "f", "id": "287762808831", "ts": "1704161511424223.930", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424223.930", "dur": 1.21, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424225.140", "dur": 32.73, "ph": "X"}, {"name": "HostToDevice287762808831", "ph": "s", "cat": "HostToDevice", "id": "287762808831", "pid": 43767502, "tid": 439084, "ts": "1704161511424231.850"}, {"name": "Conv2D1", "pid": 800, "tid": 3, "ts": "1704161511424257.870", "dur": 22.5995, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 295, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 67}, "ph": "X"}, {"name": "HostToDevice292057776127", "ph": "f", "id": "292057776127", "ts": "1704161511424257.870", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424257.870", "dur": 22.5995, "ph": "X"}, {"name": "HostToDevice292057776127", "ph": "s", "cat": "HostToDevice", "id": "292057776127", "pid": 43767502, "tid": 439084, "ts": "1704161511424266.080"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424280.470", "dur": 5.79, "ph": "X"}, {"name": "trans_TransData_5", "pid": 800, "tid": 3, "ts": "1704161511424286.260", "dur": 7.84, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 296, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 68}, "ph": "X"}, {"name": "HostToDevice296352743423", "ph": "f", "id": "296352743423", "ts": "1704161511424286.260", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424286.260", "dur": 7.84, "ph": "X"}, {"name": "HostToDevice296352743423", "ph": "s", "cat": "HostToDevice", "id": "296352743423", "pid": 43767502, "tid": 439084, "ts": "1704161511424292.700"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424294.100", "dur": 0.11, "ph": "X"}, {"name": "trans_Cast_6", "pid": 800, "tid": 3, "ts": "1704161511424294.210", "dur": 5.92, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 297, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 69}, "ph": "X"}, {"name": "HostToDevice300647710719", "ph": "f", "id": "300647710719", "ts": "1704161511424294.210", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424294.210", "dur": 5.92, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424300.130", "dur": 33.2995, "ph": "X"}, {"name": "HostToDevice300647710719", "ph": "s", "cat": "HostToDevice", "id": "300647710719", "pid": 43767502, "tid": 439084, "ts": "1704161511424310.840"}, {"name": "aclnnRelu_Relu_Relu", "pid": 800, "tid": 3, "ts": "1704161511424333.429", "dur": 7.11, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 298, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 71}, "ph": "X"}, {"name": "HostToDevice309237645311", "ph": "f", "id": "309237645311", "ts": "1704161511424333.429", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424333.429", "dur": 7.11, "ph": "X"}, {"name": "AscendCL@aclnnRelu", "pid": 43767502, "tid": 439084, "ts": "1704161511424340.500", "dur": 11.270112588424757, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnRelu", "item_id": "0", "connection_id": 72}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424340.539", "dur": 497.405, "ph": "X"}, {"name": "HostToDevice309237645311", "ph": "s", "cat": "HostToDevice", "id": "309237645311", "pid": 43767502, "tid": 439084, "ts": "1704161511424341.750"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511424357.161", "dur": 610.6161000548395, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 84}, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511424358.671", "dur": 16.85016833318165, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 73}, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511424376.381", "dur": 13.220132069119371, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 74}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511424406.401", "dur": 4.660046553865073, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 75}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511424412.062", "dur": 2.7500274727744527, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 76}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511424415.691", "dur": 2.900028971289423, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 77}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511424420.361", "dur": 2.3500234767345325, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 78}, "ph": "X"}, {"name": "trans_Cast_7", "pid": 800, "tid": 3, "ts": "1704161511424837.944", "dur": 6.02, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 299, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 79}, "ph": "X"}, {"name": "HostToDevice343597383679", "ph": "f", "id": "343597383679", "ts": "1704161511424837.944", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424837.944", "dur": 6.02, "ph": "X"}, {"name": "HostToDevice343597383679", "ph": "s", "cat": "HostToDevice", "id": "343597383679", "pid": 43767502, "tid": 439084, "ts": "1704161511424841.526"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424843.964", "dur": 27.4495, "ph": "X"}, {"name": "trans_TransData_8", "pid": 800, "tid": 3, "ts": "1704161511424871.414", "dur": 9.32, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 300, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 80}, "ph": "X"}, {"name": "HostToDevice347892350975", "ph": "f", "id": "347892350975", "ts": "1704161511424871.414", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424871.414", "dur": 9.32, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424880.734", "dur": 27.2, "ph": "X"}, {"name": "HostToDevice347892350975", "ph": "s", "cat": "HostToDevice", "id": "347892350975", "pid": 43767502, "tid": 439084, "ts": "1704161511424881.266"}, {"name": "MaxPoolWithArgmaxV12", "pid": 800, "tid": 3, "ts": "1704161511424907.934", "dur": 14.4395, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 301, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 81}, "ph": "X"}, {"name": "HostToDevice352187318271", "ph": "f", "id": "352187318271", "ts": "1704161511424907.934", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424907.934", "dur": 14.4395, "ph": "X"}, {"name": "HostToDevice352187318271", "ph": "s", "cat": "HostToDevice", "id": "352187318271", "pid": 43767502, "tid": 439084, "ts": "1704161511424917.196"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424922.373", "dur": 7.05, "ph": "X"}, {"name": "trans_TransData_9", "pid": 800, "tid": 3, "ts": "1704161511424929.423", "dur": 10.43, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 302, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 82}, "ph": "X"}, {"name": "HostToDevice356482285567", "ph": "f", "id": "356482285567", "ts": "1704161511424929.423", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424929.423", "dur": 10.43, "ph": "X"}, {"name": "HostToDevice356482285567", "ph": "s", "cat": "HostToDevice", "id": "356482285567", "pid": 43767502, "tid": 439084, "ts": "1704161511424938.727"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424939.853", "dur": 6.15, "ph": "X"}, {"name": "trans_Cast_10", "pid": 800, "tid": 3, "ts": "1704161511424946.003", "dur": 4.68, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 303, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 83}, "ph": "X"}, {"name": "HostToDevice360777252863", "ph": "f", "id": "360777252863", "ts": "1704161511424946.003", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511424946.003", "dur": 4.68, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511424950.683", "dur": 746.5625, "ph": "X"}, {"name": "HostToDevice360777252863", "ph": "s", "cat": "HostToDevice", "id": "360777252863", "pid": 43767502, "tid": 439084, "ts": "1704161511424955.157"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511424977.097", "dur": 915.5591464358729, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 99}, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511424978.807", "dur": 14.310142958328154, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 85}, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511425015.387", "dur": 6.270062637925753, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 86}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511425023.618", "dur": 1.830018281882636, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 87}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511425026.627", "dur": 0.7800077922778449, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 88}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511425028.507", "dur": 0.5700056943568866, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 89}, "ph": "X"}, {"name": "trans_Cast_13", "pid": 800, "tid": 3, "ts": "1704161511425697.246", "dur": 4.03, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 304, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 90}, "ph": "X"}, {"name": "HostToDevice390842023935", "ph": "f", "id": "390842023935", "ts": "1704161511425697.246", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425697.246", "dur": 4.03, "ph": "X"}, {"name": "HostToDevice390842023935", "ph": "s", "cat": "HostToDevice", "id": "390842023935", "pid": 43767502, "tid": 439084, "ts": "1704161511425699.794"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425701.276", "dur": 32.4595, "ph": "X"}, {"name": "trans_TransData_14", "pid": 800, "tid": 3, "ts": "1704161511425733.735", "dur": 8.53, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 305, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 91}, "ph": "X"}, {"name": "HostToDevice395136991231", "ph": "f", "id": "395136991231", "ts": "1704161511425733.735", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425733.735", "dur": 8.53, "ph": "X"}, {"name": "HostToDevice395136991231", "ph": "s", "cat": "HostToDevice", "id": "395136991231", "pid": 43767502, "tid": 439084, "ts": "1704161511425742.195"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425742.265", "dur": 17.81, "ph": "X"}, {"name": "trans_Cast_15", "pid": 800, "tid": 3, "ts": "1704161511425760.075", "dur": 2.32, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 306, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 92}, "ph": "X"}, {"name": "HostToDevice399431958527", "ph": "f", "id": "399431958527", "ts": "1704161511425760.075", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425760.075", "dur": 2.32, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425762.395", "dur": 15.1695, "ph": "X"}, {"name": "HostToDevice399431958527", "ph": "s", "cat": "HostToDevice", "id": "399431958527", "pid": 43767502, "tid": 439084, "ts": "1704161511425768.945"}, {"name": "trans_TransData_16_MemSet", "pid": 800, "tid": 3, "ts": "1704161511425777.564", "dur": 8.97, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 307, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 93}, "ph": "X"}, {"name": "HostToDevice403726925823", "ph": "f", "id": "403726925823", "ts": "1704161511425777.564", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425777.564", "dur": 8.97, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425786.534", "dur": 0.11, "ph": "X"}, {"name": "trans_TransData_16", "pid": 800, "tid": 3, "ts": "1704161511425786.644", "dur": 6.82, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 308, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 94}, "ph": "X"}, {"name": "HostToDevice408021893119", "ph": "f", "id": "408021893119", "ts": "1704161511425786.644", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425786.644", "dur": 6.82, "ph": "X"}, {"name": "HostToDevice403726925823", "ph": "s", "cat": "HostToDevice", "id": "403726925823", "pid": 43767502, "tid": 439084, "ts": "1704161511425791.615"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425793.464", "dur": 9.58, "ph": "X"}, {"name": "HostToDevice408021893119", "ph": "s", "cat": "HostToDevice", "id": "408021893119", "pid": 43767502, "tid": 439084, "ts": "1704161511425800.125"}, {"name": "trans_Cast_17", "pid": 800, "tid": 3, "ts": "1704161511425803.044", "dur": 1.29, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 309, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 95}, "ph": "X"}, {"name": "HostToDevice412316860415", "ph": "f", "id": "412316860415", "ts": "1704161511425803.044", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425803.044", "dur": 1.29, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425804.334", "dur": 26.4595, "ph": "X"}, {"name": "HostToDevice412316860415", "ph": "s", "cat": "HostToDevice", "id": "412316860415", "pid": 43767502, "tid": 439084, "ts": "1704161511425816.335"}, {"name": "Conv2D3", "pid": 800, "tid": 3, "ts": "1704161511425830.794", "dur": 17.94, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 310, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 96}, "ph": "X"}, {"name": "HostToDevice416611827711", "ph": "f", "id": "416611827711", "ts": "1704161511425830.794", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425830.794", "dur": 17.94, "ph": "X"}, {"name": "HostToDevice416611827711", "ph": "s", "cat": "HostToDevice", "id": "416611827711", "pid": 43767502, "tid": 439084, "ts": "1704161511425841.256"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425848.734", "dur": 4.69, "ph": "X"}, {"name": "trans_TransData_18", "pid": 800, "tid": 3, "ts": "1704161511425853.424", "dur": 11.99, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 311, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 97}, "ph": "X"}, {"name": "HostToDevice420906795007", "ph": "f", "id": "420906795007", "ts": "1704161511425853.424", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425853.424", "dur": 11.99, "ph": "X"}, {"name": "HostToDevice420906795007", "ph": "s", "cat": "HostToDevice", "id": "420906795007", "pid": 43767502, "tid": 439084, "ts": "1704161511425865.476"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425865.414", "dur": 4.11, "ph": "X"}, {"name": "trans_Cast_19", "pid": 800, "tid": 3, "ts": "1704161511425869.524", "dur": 4.4295, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 312, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 98}, "ph": "X"}, {"name": "HostToDevice425201762303", "ph": "f", "id": "425201762303", "ts": "1704161511425869.524", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425869.524", "dur": 4.4295, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425873.954", "dur": 21.76, "ph": "X"}, {"name": "HostToDevice425201762303", "ph": "s", "cat": "HostToDevice", "id": "425201762303", "pid": 43767502, "tid": 439084, "ts": "1704161511425881.906"}, {"name": "aclnnRelu_Relu_Relu", "pid": 800, "tid": 3, "ts": "1704161511425895.714", "dur": 5.16, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 313, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 100}, "ph": "X"}, {"name": "HostToDevice433791696895", "ph": "f", "id": "433791696895", "ts": "1704161511425895.714", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425895.714", "dur": 5.16, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425900.874", "dur": 55.0895, "ph": "X"}, {"name": "AscendCL@aclnnRelu", "pid": 43767502, "tid": 439084, "ts": "1704161511425904.466", "dur": 10.340103297631945, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnRelu", "item_id": "0", "connection_id": 101}, "ph": "X"}, {"name": "HostToDevice433791696895", "ph": "s", "cat": "HostToDevice", "id": "433791696895", "pid": 43767502, "tid": 439084, "ts": "1704161511425905.746"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511425919.136", "dur": 134.2713413707003, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 113}, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511425920.727", "dur": 11.390113787236734, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 102}, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511425941.097", "dur": 5.780057742776851, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 103}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511425949.037", "dur": 1.6500164836646718, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 104}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511425951.186", "dur": 1.0700106894067871, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 105}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511425952.497", "dur": 0.9700096903968071, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 106}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511425954.087", "dur": 0.7300072927728548, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 107}, "ph": "X"}, {"name": "HostToDevice468151435263", "ph": "s", "cat": "HostToDevice", "id": "468151435263", "pid": 43767502, "tid": 439084, "ts": "1704161511425955.377"}, {"name": "trans_Cast_7", "pid": 800, "tid": 3, "ts": "1704161511425955.963", "dur": 4.32, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 314, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 108}, "ph": "X"}, {"name": "HostToDevice468151435263", "ph": "f", "id": "468151435263", "ts": "1704161511425955.963", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425955.963", "dur": 4.32, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425960.283", "dur": 15.0295, "ph": "X"}, {"name": "trans_TransData_8", "pid": 800, "tid": 3, "ts": "1704161511425975.312", "dur": 14.0, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 315, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 109}, "ph": "X"}, {"name": "HostToDevice472446402559", "ph": "f", "id": "472446402559", "ts": "1704161511425975.312", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425975.312", "dur": 14.0, "ph": "X"}, {"name": "HostToDevice472446402559", "ph": "s", "cat": "HostToDevice", "id": "472446402559", "pid": 43767502, "tid": 439084, "ts": "1704161511425981.047"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511425989.312", "dur": 7.74, "ph": "X"}, {"name": "MaxPoolWithArgmaxV12", "pid": 800, "tid": 3, "ts": "1704161511425997.052", "dur": 14.11, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 316, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 110}, "ph": "X"}, {"name": "HostToDevice476741369855", "ph": "f", "id": "476741369855", "ts": "1704161511425997.052", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511425997.052", "dur": 14.11, "ph": "X"}, {"name": "HostToDevice476741369855", "ph": "s", "cat": "HostToDevice", "id": "476741369855", "pid": 43767502, "tid": 439084, "ts": "1704161511426007.747"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426011.162", "dur": 8.52, "ph": "X"}, {"name": "trans_TransData_9", "pid": 800, "tid": 3, "ts": "1704161511426019.682", "dur": 13.7495, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 317, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 111}, "ph": "X"}, {"name": "HostToDevice481036337151", "ph": "f", "id": "481036337151", "ts": "1704161511426019.682", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426019.682", "dur": 13.7495, "ph": "X"}, {"name": "HostToDevice481036337151", "ph": "s", "cat": "HostToDevice", "id": "481036337151", "pid": 43767502, "tid": 439084, "ts": "1704161511426028.018"}, {"name": "trans_Cast_10", "pid": 800, "tid": 3, "ts": "1704161511426033.542", "dur": 3.97, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 318, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 112}, "ph": "X"}, {"name": "HostToDevice485331304447", "ph": "f", "id": "485331304447", "ts": "1704161511426033.542", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426033.542", "dur": 3.97, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426033.432", "dur": 0.11, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426037.512", "dur": 15.19, "ph": "X"}, {"name": "HostToDevice485331304447", "ph": "s", "cat": "HostToDevice", "id": "485331304447", "pid": 43767502, "tid": 439084, "ts": "1704161511426042.627"}, {"name": "aclnnAddmm_Muls_Muls", "pid": 800, "tid": 3, "ts": "1704161511426052.702", "dur": 1.58, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 319, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 115}, "ph": "X"}, {"name": "HostToDevice498216206335", "ph": "f", "id": "498216206335", "ts": "1704161511426052.702", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426052.702", "dur": 1.58, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426054.282", "dur": 8.36, "ph": "X"}, {"name": "aclnnAddmm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511426062.642", "dur": 4.5, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 320, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 119}, "ph": "X"}, {"name": "HostToDevice515396075519", "ph": "f", "id": "515396075519", "ts": "1704161511426062.642", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426062.642", "dur": 4.5, "ph": "X"}, {"name": "AscendCL@aclnnAddmm", "pid": 43767502, "tid": 439084, "ts": "1704161511426063.628", "dur": 54.260542062815205, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddmm", "item_id": "0", "connection_id": 127}, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511426065.408", "dur": 5.490054845647908, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 114}, "ph": "X"}, {"name": "HostToDevice498216206335", "ph": "s", "cat": "HostToDevice", "id": "498216206335", "pid": 43767502, "tid": 439084, "ts": "1704161511426066.738"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426067.142", "dur": 6.4695, "ph": "X"}, {"name": "aclnnAddmm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511426073.612", "dur": 4.62, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 321, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 120}, "ph": "X"}, {"name": "HostToDevice519691042815", "ph": "f", "id": "519691042815", "ts": "1704161511426073.612", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426073.612", "dur": 4.62, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511426074.388", "dur": 1.3300132868327355, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 116}, "ph": "X"}, {"name": "HostToDevice515396075519", "ph": "s", "cat": "HostToDevice", "id": "515396075519", "pid": 43767502, "tid": 439084, "ts": "1704161511426076.248"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511426077.468", "dur": 0.8100080919808389, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 117}, "ph": "X"}, {"name": "aclnnAddmm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511426078.342", "dur": 5.24, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 322, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 121}, "ph": "X"}, {"name": "HostToDevice523986010111", "ph": "f", "id": "523986010111", "ts": "1704161511426078.342", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426078.342", "dur": 5.24, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426078.232", "dur": 0.11, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511426078.698", "dur": 0.6700066933668668, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 118}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426083.582", "dur": 0.11, "ph": "X"}, {"name": "aclnnAddmm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511426083.692", "dur": 6.36, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 323, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 122}, "ph": "X"}, {"name": "HostToDevice528280977407", "ph": "f", "id": "528280977407", "ts": "1704161511426083.692", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426083.692", "dur": 6.36, "ph": "X"}, {"name": "HostToDevice519691042815", "ph": "s", "cat": "HostToDevice", "id": "519691042815", "pid": 43767502, "tid": 439084, "ts": "1704161511426083.888"}, {"name": "HostToDevice523986010111", "ph": "s", "cat": "HostToDevice", "id": "523986010111", "pid": 43767502, "tid": 439084, "ts": "1704161511426089.768"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426090.052", "dur": 0.12, "ph": "X"}, {"name": "aclnnAddmm_MatMulCommon_MemSet", "pid": 800, "tid": 3, "ts": "1704161511426090.172", "dur": 7.02, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 324, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 123}, "ph": "X"}, {"name": "HostToDevice532575944703", "ph": "f", "id": "532575944703", "ts": "1704161511426090.172", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426090.172", "dur": 7.02, "ph": "X"}, {"name": "HostToDevice528280977407", "ph": "s", "cat": "HostToDevice", "id": "528280977407", "pid": 43767502, "tid": 439084, "ts": "1704161511426094.468"}, {"name": "aclnnAddmm_MatMulCommon_MatMulV2", "pid": 800, "tid": 3, "ts": "1704161511426097.302", "dur": 8.41, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 325, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 124}, "ph": "X"}, {"name": "HostToDevice536870911999", "ph": "f", "id": "536870911999", "ts": "1704161511426097.302", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426097.302", "dur": 8.41, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426097.192", "dur": 0.11, "ph": "X"}, {"name": "HostToDevice532575944703", "ph": "s", "cat": "HostToDevice", "id": "532575944703", "pid": 43767502, "tid": 439084, "ts": "1704161511426098.808"}, {"name": "HostToDevice536870911999", "ph": "s", "cat": "HostToDevice", "id": "536870911999", "pid": 43767502, "tid": 439084, "ts": "1704161511426102.858"}, {"name": "aclnnAddmm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511426105.822", "dur": 4.13, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 326, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 125}, "ph": "X"}, {"name": "HostToDevice541165879295", "ph": "f", "id": "541165879295", "ts": "1704161511426105.822", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426105.822", "dur": 4.13, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426105.712", "dur": 0.11, "ph": "X"}, {"name": "HostToDevice541165879295", "ph": "s", "cat": "HostToDevice", "id": "541165879295", "pid": 43767502, "tid": 439084, "ts": "1704161511426108.118"}, {"name": "aclnnAddmm_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511426110.062", "dur": 7.78, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 327, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 126}, "ph": "X"}, {"name": "HostToDevice545460846591", "ph": "f", "id": "545460846591", "ts": "1704161511426110.062", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426110.062", "dur": 7.78, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426109.952", "dur": 0.11, "ph": "X"}, {"name": "HostToDevice545460846591", "ph": "s", "cat": "HostToDevice", "id": "545460846591", "pid": 43767502, "tid": 439084, "ts": "1704161511426112.828"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426117.842", "dur": 0.12, "ph": "X"}, {"name": "aclnnRelu_Relu_Relu", "pid": 800, "tid": 3, "ts": "1704161511426117.962", "dur": 2.06, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 328, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 128}, "ph": "X"}, {"name": "HostToDevice554050781183", "ph": "f", "id": "554050781183", "ts": "1704161511426117.962", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426117.962", "dur": 2.06, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426120.022", "dur": 0.11, "ph": "X"}, {"name": "aclnnAddmm_Muls_Muls", "pid": 800, "tid": 3, "ts": "1704161511426120.132", "dur": 1.3795, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 329, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 130}, "ph": "X"}, {"name": "HostToDevice562640715775", "ph": "f", "id": "562640715775", "ts": "1704161511426120.132", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426120.132", "dur": 1.3795, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426121.511", "dur": 10.73, "ph": "X"}, {"name": "AscendCL@aclnnRelu", "pid": 43767502, "tid": 439084, "ts": "1704161511426122.849", "dur": 5.490054845647908, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnRelu", "item_id": "0", "connection_id": 129}, "ph": "X"}, {"name": "HostToDevice554050781183", "ph": "s", "cat": "HostToDevice", "id": "554050781183", "pid": 43767502, "tid": 439084, "ts": "1704161511426123.438"}, {"name": "AscendCL@aclnnAddmm", "pid": 43767502, "tid": 439084, "ts": "1704161511426131.478", "dur": 55.46055405093497, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddmm", "item_id": "0", "connection_id": 139}, "ph": "X"}, {"name": "aclnnAddmm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511426132.241", "dur": 2.04, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 330, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 131}, "ph": "X"}, {"name": "HostToDevice566935683071", "ph": "f", "id": "566935683071", "ts": "1704161511426132.241", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426132.241", "dur": 2.04, "ph": "X"}, {"name": "HostToDevice562640715775", "ph": "s", "cat": "HostToDevice", "id": "562640715775", "pid": 43767502, "tid": 439084, "ts": "1704161511426133.278"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426134.281", "dur": 4.39, "ph": "X"}, {"name": "HostToDevice566935683071", "ph": "s", "cat": "HostToDevice", "id": "566935683071", "pid": 43767502, "tid": 439084, "ts": "1704161511426138.549"}, {"name": "aclnnAddmm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511426138.671", "dur": 2.21, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 331, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 132}, "ph": "X"}, {"name": "HostToDevice571230650367", "ph": "f", "id": "571230650367", "ts": "1704161511426138.671", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426138.671", "dur": 2.21, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426140.881", "dur": 5.46, "ph": "X"}, {"name": "aclnnAddmm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511426146.341", "dur": 8.39, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 332, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 133}, "ph": "X"}, {"name": "HostToDevice575525617663", "ph": "f", "id": "575525617663", "ts": "1704161511426146.341", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426146.341", "dur": 8.39, "ph": "X"}, {"name": "aclnnAddmm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511426154.841", "dur": 7.62, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 333, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 134}, "ph": "X"}, {"name": "HostToDevice579820584959", "ph": "f", "id": "579820584959", "ts": "1704161511426154.841", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426154.841", "dur": 7.62, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426154.731", "dur": 0.11, "ph": "X"}, {"name": "HostToDevice571230650367", "ph": "s", "cat": "HostToDevice", "id": "571230650367", "pid": 43767502, "tid": 439084, "ts": "1704161511426155.799"}, {"name": "HostToDevice575525617663", "ph": "s", "cat": "HostToDevice", "id": "575525617663", "pid": 43767502, "tid": 439084, "ts": "1704161511426159.929"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426162.461", "dur": 3.12, "ph": "X"}, {"name": "HostToDevice579820584959", "ph": "s", "cat": "HostToDevice", "id": "579820584959", "pid": 43767502, "tid": 439084, "ts": "1704161511426164.829"}, {"name": "aclnnAddmm_MatMulCommon_MemSet", "pid": 800, "tid": 3, "ts": "1704161511426165.581", "dur": 6.3295, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 334, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 135}, "ph": "X"}, {"name": "HostToDevice584115552255", "ph": "f", "id": "584115552255", "ts": "1704161511426165.581", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426165.581", "dur": 6.3295, "ph": "X"}, {"name": "HostToDevice584115552255", "ph": "s", "cat": "HostToDevice", "id": "584115552255", "pid": 43767502, "tid": 439084, "ts": "1704161511426169.089"}, {"name": "aclnnAddmm_MatMulCommon_MatMulV2", "pid": 800, "tid": 3, "ts": "1704161511426172.020", "dur": 2.74, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 335, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 136}, "ph": "X"}, {"name": "HostToDevice588410519551", "ph": "f", "id": "588410519551", "ts": "1704161511426172.020", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426172.020", "dur": 2.74, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426171.910", "dur": 0.11, "ph": "X"}, {"name": "HostToDevice588410519551", "ph": "s", "cat": "HostToDevice", "id": "588410519551", "pid": 43767502, "tid": 439084, "ts": "1704161511426172.929"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426174.760", "dur": 6.76, "ph": "X"}, {"name": "HostToDevice592705486847", "ph": "s", "cat": "HostToDevice", "id": "592705486847", "pid": 43767502, "tid": 439084, "ts": "1704161511426176.969"}, {"name": "aclnnAddmm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511426181.520", "dur": 8.12, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 336, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 137}, "ph": "X"}, {"name": "HostToDevice592705486847", "ph": "f", "id": "592705486847", "ts": "1704161511426181.520", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426181.520", "dur": 8.12, "ph": "X"}, {"name": "HostToDevice597000454143", "ph": "s", "cat": "HostToDevice", "id": "597000454143", "pid": 43767502, "tid": 439084, "ts": "1704161511426181.789"}, {"name": "aclnnAddmm_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511426189.750", "dur": 8.96, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 337, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 138}, "ph": "X"}, {"name": "HostToDevice597000454143", "ph": "f", "id": "597000454143", "ts": "1704161511426189.750", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426189.750", "dur": 8.96, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426189.640", "dur": 0.11, "ph": "X"}, {"name": "AscendCL@aclnnRelu", "pid": 43767502, "tid": 439084, "ts": "1704161511426191.109", "dur": 5.650056444063876, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnRelu", "item_id": "0", "connection_id": 141}, "ph": "X"}, {"name": "HostToDevice605590388735", "ph": "s", "cat": "HostToDevice", "id": "605590388735", "pid": 43767502, "tid": 439084, "ts": "1704161511426191.629"}, {"name": "aclnnRelu_Relu_Relu", "pid": 800, "tid": 3, "ts": "1704161511426198.830", "dur": 1.91, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 338, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 140}, "ph": "X"}, {"name": "HostToDevice605590388735", "ph": "f", "id": "605590388735", "ts": "1704161511426198.830", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426198.830", "dur": 1.91, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426198.710", "dur": 0.12, "ph": "X"}, {"name": "AscendCL@aclnnAddmm", "pid": 43767502, "tid": 439084, "ts": "1704161511426199.989", "dur": 48.33048282152339, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddmm", "item_id": "0", "connection_id": 151}, "ph": "X"}, {"name": "aclnnAddmm_Muls_Muls", "pid": 800, "tid": 3, "ts": "1704161511426200.860", "dur": 1.41, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 339, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 142}, "ph": "X"}, {"name": "HostToDevice614180323327", "ph": "f", "id": "614180323327", "ts": "1704161511426200.860", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426200.860", "dur": 1.41, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426200.740", "dur": 0.12, "ph": "X"}, {"name": "HostToDevice614180323327", "ph": "s", "cat": "HostToDevice", "id": "614180323327", "pid": 43767502, "tid": 439084, "ts": "1704161511426201.849"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426202.270", "dur": 0.11, "ph": "X"}, {"name": "aclnnAddmm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511426202.380", "dur": 1.9, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 340, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 143}, "ph": "X"}, {"name": "HostToDevice618475290623", "ph": "f", "id": "618475290623", "ts": "1704161511426202.380", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426202.380", "dur": 1.9, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426204.280", "dur": 19.2595, "ph": "X"}, {"name": "HostToDevice618475290623", "ph": "s", "cat": "HostToDevice", "id": "618475290623", "pid": 43767502, "tid": 439084, "ts": "1704161511426215.529"}, {"name": "HostToDevice622770257919", "ph": "s", "cat": "HostToDevice", "id": "622770257919", "pid": 43767502, "tid": 439084, "ts": "1704161511426220.350"}, {"name": "aclnnAddmm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511426223.540", "dur": 1.49, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 341, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 144}, "ph": "X"}, {"name": "HostToDevice622770257919", "ph": "f", "id": "622770257919", "ts": "1704161511426223.540", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426223.540", "dur": 1.49, "ph": "X"}, {"name": "HostToDevice627065225215", "ph": "s", "cat": "HostToDevice", "id": "627065225215", "pid": 43767502, "tid": 439084, "ts": "1704161511426225.069"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426225.030", "dur": 0.11, "ph": "X"}, {"name": "aclnnAddmm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511426225.140", "dur": 7.41, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 342, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 145}, "ph": "X"}, {"name": "HostToDevice627065225215", "ph": "f", "id": "627065225215", "ts": "1704161511426225.140", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426225.140", "dur": 7.41, "ph": "X"}, {"name": "HostToDevice631360192511", "ph": "s", "cat": "HostToDevice", "id": "631360192511", "pid": 43767502, "tid": 439084, "ts": "1704161511426229.029"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426232.550", "dur": 0.11, "ph": "X"}, {"name": "aclnnAddmm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511426232.660", "dur": 6.87, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 343, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 146}, "ph": "X"}, {"name": "HostToDevice631360192511", "ph": "f", "id": "631360192511", "ts": "1704161511426232.660", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426232.660", "dur": 6.87, "ph": "X"}, {"name": "HostToDevice635655159807", "ph": "s", "cat": "HostToDevice", "id": "635655159807", "pid": 43767502, "tid": 439084, "ts": "1704161511426233.030"}, {"name": "HostToDevice639950127103", "ph": "s", "cat": "HostToDevice", "id": "639950127103", "pid": 43767502, "tid": 439084, "ts": "1704161511426236.550"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426239.530", "dur": 0.12, "ph": "X"}, {"name": "aclnnAddmm_MatMulCommon_MemSet", "pid": 800, "tid": 3, "ts": "1704161511426239.650", "dur": 6.23, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 344, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 147}, "ph": "X"}, {"name": "HostToDevice635655159807", "ph": "f", "id": "635655159807", "ts": "1704161511426239.650", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426239.650", "dur": 6.23, "ph": "X"}, {"name": "HostToDevice644245094399", "ph": "s", "cat": "HostToDevice", "id": "644245094399", "pid": 43767502, "tid": 439084, "ts": "1704161511426240.280"}, {"name": "HostToDevice648540061695", "ph": "s", "cat": "HostToDevice", "id": "648540061695", "pid": 43767502, "tid": 439084, "ts": "1704161511426244.110"}, {"name": "aclnnAddmm_MatMulCommon_MatMulV2", "pid": 800, "tid": 3, "ts": "1704161511426245.990", "dur": 2.35, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 345, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 148}, "ph": "X"}, {"name": "HostToDevice639950127103", "ph": "f", "id": "639950127103", "ts": "1704161511426245.990", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426245.990", "dur": 2.35, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426245.880", "dur": 0.11, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426248.340", "dur": 0.11, "ph": "X"}, {"name": "aclnnAddmm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511426248.450", "dur": 6.04, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 346, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 149}, "ph": "X"}, {"name": "HostToDevice644245094399", "ph": "f", "id": "644245094399", "ts": "1704161511426248.450", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426248.450", "dur": 6.04, "ph": "X"}, {"name": "aclnnAddmm_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511426254.600", "dur": 9.14, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 347, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 150}, "ph": "X"}, {"name": "HostToDevice648540061695", "ph": "f", "id": "648540061695", "ts": "1704161511426254.600", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426254.600", "dur": 9.14, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426254.490", "dur": 0.11, "ph": "X"}, {"name": "AscendCL@aclrtSynchronizeDevice", "pid": 43767502, "tid": 437675, "ts": "1704161511426263.130", "dur": 51.7505169876647, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtSynchronizeDevice", "item_id": "0", "connection_id": 152}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426263.740", "dur": 199.598, "ph": "X"}, {"name": "aclnnLogSoftmax_LogSoftmaxAiCore_LogSoftmaxV2", "pid": 800, "tid": 3, "ts": "1704161511426463.338", "dur": 18.6395, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 348, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 153}, "ph": "X"}, {"name": "HostToDevice661424963583", "ph": "f", "id": "661424963583", "ts": "1704161511426463.338", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426463.338", "dur": 18.6395, "ph": "X"}, {"name": "AscendCL@aclnnLogSoftmax", "pid": 43767502, "tid": 439084, "ts": "1704161511426468.822", "dur": 9.71009700386907, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnLogSoftmax", "item_id": "0", "connection_id": 154}, "ph": "X"}, {"name": "HostToDevice661424963583", "ph": "s", "cat": "HostToDevice", "id": "661424963583", "pid": 43767502, "tid": 439084, "ts": "1704161511426469.572"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426481.978", "dur": 55.7195, "ph": "X"}, {"name": "aclnnInplaceOne_OnesLikeAiCore_OnesLike", "pid": 800, "tid": 3, "ts": "1704161511426537.697", "dur": 1.49, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 349, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 155}, "ph": "X"}, {"name": "HostToDevice670014898175", "ph": "f", "id": "670014898175", "ts": "1704161511426537.697", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426537.697", "dur": 1.49, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426539.187", "dur": 8.48, "ph": "X"}, {"name": "AscendCL@aclnnInplaceOne", "pid": 43767502, "tid": 439084, "ts": "1704161511426545.723", "dur": 7.4000739267385285, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceOne", "item_id": "0", "connection_id": 156}, "ph": "X"}, {"name": "HostToDevice670014898175", "ph": "s", "cat": "HostToDevice", "id": "670014898175", "pid": 43767502, "tid": 439084, "ts": "1704161511426546.733"}, {"name": "aclnnNLLLoss_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511426547.667", "dur": 2.89, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 350, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 157}, "ph": "X"}, {"name": "HostToDevice678604832767", "ph": "f", "id": "678604832767", "ts": "1704161511426547.667", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426547.667", "dur": 2.89, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426550.557", "dur": 0.19, "ph": "X"}, {"name": "aclnnNLLLoss_NLLLossAiCore_MemSet", "pid": 800, "tid": 3, "ts": "1704161511426550.747", "dur": 8.82, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 351, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 158}, "ph": "X"}, {"name": "HostToDevice682899800063", "ph": "f", "id": "682899800063", "ts": "1704161511426550.747", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426550.747", "dur": 8.82, "ph": "X"}, {"name": "AscendCL@aclnnNLLLoss", "pid": 43767502, "tid": 439084, "ts": "1704161511426558.643", "dur": 20.77020749437287, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnNLLLoss", "item_id": "0", "connection_id": 161}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426559.567", "dur": 4.92, "ph": "X"}, {"name": "HostToDevice678604832767", "ph": "s", "cat": "HostToDevice", "id": "678604832767", "pid": 43767502, "tid": 439084, "ts": "1704161511426560.393"}, {"name": "aclnnNLLLoss_NLLLossAiCore_NLLLoss", "pid": 800, "tid": 3, "ts": "1704161511426564.487", "dur": 8.5595, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 352, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 159}, "ph": "X"}, {"name": "HostToDevice687194767359", "ph": "f", "id": "687194767359", "ts": "1704161511426564.487", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426564.487", "dur": 8.5595, "ph": "X"}, {"name": "HostToDevice682899800063", "ph": "s", "cat": "HostToDevice", "id": "682899800063", "pid": 43767502, "tid": 439084, "ts": "1704161511426565.793"}, {"name": "HostToDevice687194767359", "ph": "s", "cat": "HostToDevice", "id": "687194767359", "pid": 43767502, "tid": 439084, "ts": "1704161511426569.433"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426573.046", "dur": 0.19, "ph": "X"}, {"name": "aclnnNLLLoss_DivAiCore_Div", "pid": 800, "tid": 3, "ts": "1704161511426573.236", "dur": 1.37, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 353, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 160}, "ph": "X"}, {"name": "HostToDevice691489734655", "ph": "f", "id": "691489734655", "ts": "1704161511426573.236", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426573.236", "dur": 1.37, "ph": "X"}, {"name": "HostToDevice691489734655", "ph": "s", "cat": "HostToDevice", "id": "691489734655", "pid": 43767502, "tid": 439084, "ts": "1704161511426574.193"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426574.606", "dur": 44.41, "ph": "X"}, {"name": "aclnnInplaceOne_OnesLikeAiCore_OnesLike", "pid": 800, "tid": 3, "ts": "1704161511426619.016", "dur": 1.21, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 354, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 162}, "ph": "X"}, {"name": "HostToDevice700079669247", "ph": "f", "id": "700079669247", "ts": "1704161511426619.016", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426619.016", "dur": 1.21, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426620.226", "dur": 204.3275, "ph": "X"}, {"name": "AscendCL@aclnnInplaceOne", "pid": 43767502, "tid": 439084, "ts": "1704161511426629.164", "dur": 7.470074626045514, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceOne", "item_id": "0", "connection_id": 163}, "ph": "X"}, {"name": "HostToDevice700079669247", "ph": "s", "cat": "HostToDevice", "id": "700079669247", "pid": 43767502, "tid": 439084, "ts": "1704161511426629.863"}, {"name": "aclnnInplaceOne_OnesLikeAiCore_OnesLike", "pid": 800, "tid": 3, "ts": "1704161511426824.554", "dur": 1.21, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 355, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 164}, "ph": "X"}, {"name": "HostToDevice708669603839", "ph": "f", "id": "708669603839", "ts": "1704161511426824.554", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426824.554", "dur": 1.21, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426825.764", "dur": 35.64, "ph": "X"}, {"name": "AscendCL@aclnnInplaceOne", "pid": 43767502, "tid": 439084, "ts": "1704161511426833.366", "dur": 6.780067732876651, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceOne", "item_id": "0", "connection_id": 165}, "ph": "X"}, {"name": "HostToDevice708669603839", "ph": "s", "cat": "HostToDevice", "id": "708669603839", "pid": 43767502, "tid": 439084, "ts": "1704161511426833.936"}, {"name": "AscendCL@aclnnNLLLossBackward", "pid": 43767502, "tid": 439084, "ts": "1704161511426859.756", "dur": 12.0601204806036, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnNLLLossBackward", "item_id": "0", "connection_id": 168}, "ph": "X"}, {"name": "HostToDevice717259538431", "ph": "s", "cat": "HostToDevice", "id": "717259538431", "pid": 43767502, "tid": 439084, "ts": "1704161511426861.036"}, {"name": "aclnnNLLLossBackward_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511426861.404", "dur": 2.69, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 356, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 166}, "ph": "X"}, {"name": "HostToDevice717259538431", "ph": "f", "id": "717259538431", "ts": "1704161511426861.404", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426861.404", "dur": 2.69, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426864.094", "dur": 5.63, "ph": "X"}, {"name": "HostToDevice721554505727", "ph": "s", "cat": "HostToDevice", "id": "721554505727", "pid": 43767502, "tid": 439084, "ts": "1704161511426866.186"}, {"name": "aclnnNLLLossBackward_NLLLossGradAiCore_NLLLossGrad", "pid": 800, "tid": 3, "ts": "1704161511426869.724", "dur": 8.1095, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 357, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 167}, "ph": "X"}, {"name": "HostToDevice721554505727", "ph": "f", "id": "721554505727", "ts": "1704161511426869.724", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426869.724", "dur": 8.1095, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426877.834", "dur": 58.4095, "ph": "X"}, {"name": "aclnnLogSoftmaxBackward_LogSoftmaxGrad_LogSoftmaxGrad", "pid": 800, "tid": 3, "ts": "1704161511426936.243", "dur": 12.21, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 358, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 169}, "ph": "X"}, {"name": "HostToDevice730144440319", "ph": "f", "id": "730144440319", "ts": "1704161511426936.243", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511426936.243", "dur": 12.21, "ph": "X"}, {"name": "AscendCL@aclnnLogSoftmaxBackward", "pid": 43767502, "tid": 439084, "ts": "1704161511426938.956", "dur": 7.39007382683753, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnLogSoftmaxBackward", "item_id": "0", "connection_id": 170}, "ph": "X"}, {"name": "HostToDevice730144440319", "ph": "s", "cat": "HostToDevice", "id": "730144440319", "pid": 43767502, "tid": 439084, "ts": "1704161511426939.677"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511426948.453", "dur": 56.3695, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427004.822", "dur": 1.57, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 359, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 171}, "ph": "X"}, {"name": "HostToDevice738734374911", "ph": "f", "id": "738734374911", "ts": "1704161511427004.822", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427004.822", "dur": 1.57, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427006.582", "dur": 1.49, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 360, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 172}, "ph": "X"}, {"name": "HostToDevice743029342207", "ph": "f", "id": "743029342207", "ts": "1704161511427006.582", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427006.582", "dur": 1.49, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427006.392", "dur": 0.19, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427008.072", "dur": 9.49, "ph": "X"}, {"name": "AscendCL@aclnnMm", "pid": 43767502, "tid": 439084, "ts": "1704161511427010.708", "dur": 33.6803364665613, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnMm", "item_id": "0", "connection_id": 178}, "ph": "X"}, {"name": "HostToDevice738734374911", "ph": "s", "cat": "HostToDevice", "id": "738734374911", "pid": 43767502, "tid": 439084, "ts": "1704161511427012.237"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427017.562", "dur": 5.6195, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 361, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 173}, "ph": "X"}, {"name": "HostToDevice747324309503", "ph": "f", "id": "747324309503", "ts": "1704161511427017.562", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427017.562", "dur": 5.6195, "ph": "X"}, {"name": "HostToDevice743029342207", "ph": "s", "cat": "HostToDevice", "id": "743029342207", "pid": 43767502, "tid": 439084, "ts": "1704161511427017.938"}, {"name": "HostToDevice747324309503", "ph": "s", "cat": "HostToDevice", "id": "747324309503", "pid": 43767502, "tid": 439084, "ts": "1704161511427022.737"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427023.362", "dur": 7.66, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 362, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 174}, "ph": "X"}, {"name": "HostToDevice751619276799", "ph": "f", "id": "751619276799", "ts": "1704161511427023.362", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427023.362", "dur": 7.66, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427023.182", "dur": 0.18, "ph": "X"}, {"name": "HostToDevice751619276799", "ph": "s", "cat": "HostToDevice", "id": "751619276799", "pid": 43767502, "tid": 439084, "ts": "1704161511427027.658"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427031.022", "dur": 0.18, "ph": "X"}, {"name": "aclnnMm_MatMulCommon_MemSet", "pid": 800, "tid": 3, "ts": "1704161511427031.202", "dur": 6.19, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 363, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 175}, "ph": "X"}, {"name": "HostToDevice755914244095", "ph": "f", "id": "755914244095", "ts": "1704161511427031.202", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427031.202", "dur": 6.19, "ph": "X"}, {"name": "HostToDevice755914244095", "ph": "s", "cat": "HostToDevice", "id": "755914244095", "pid": 43767502, "tid": 439084, "ts": "1704161511427031.737"}, {"name": "HostToDevice760209211391", "ph": "s", "cat": "HostToDevice", "id": "760209211391", "pid": 43767502, "tid": 439084, "ts": "1704161511427035.107"}, {"name": "aclnnMm_MatMulCommon_MatMulV2", "pid": 800, "tid": 3, "ts": "1704161511427037.582", "dur": 2.7, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 364, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 176}, "ph": "X"}, {"name": "HostToDevice760209211391", "ph": "f", "id": "760209211391", "ts": "1704161511427037.582", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427037.582", "dur": 2.7, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427037.392", "dur": 0.19, "ph": "X"}, {"name": "HostToDevice764504178687", "ph": "s", "cat": "HostToDevice", "id": "764504178687", "pid": 43767502, "tid": 439084, "ts": "1704161511427040.078"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427040.282", "dur": 0.18, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427040.462", "dur": 7.94, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 365, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 177}, "ph": "X"}, {"name": "HostToDevice764504178687", "ph": "f", "id": "764504178687", "ts": "1704161511427040.462", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427040.462", "dur": 7.94, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427048.582", "dur": 1.42, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 366, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 179}, "ph": "X"}, {"name": "HostToDevice773094113279", "ph": "f", "id": "773094113279", "ts": "1704161511427048.582", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427048.582", "dur": 1.42, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427048.402", "dur": 0.18, "ph": "X"}, {"name": "AscendCL@aclnnMm", "pid": 43767502, "tid": 439084, "ts": "1704161511427049.118", "dur": 33.79033756547228, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnMm", "item_id": "0", "connection_id": 186}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427050.002", "dur": 0.19, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427050.192", "dur": 2.0, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 367, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 180}, "ph": "X"}, {"name": "HostToDevice777389080575", "ph": "f", "id": "777389080575", "ts": "1704161511427050.192", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "HostToDevice773094113279", "ph": "s", "cat": "HostToDevice", "id": "773094113279", "pid": 43767502, "tid": 439084, "ts": "1704161511427050.148"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427050.192", "dur": 2.0, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427052.192", "dur": 3.27, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427055.462", "dur": 5.25, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 368, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 181}, "ph": "X"}, {"name": "HostToDevice781684047871", "ph": "f", "id": "781684047871", "ts": "1704161511427055.462", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427055.462", "dur": 5.25, "ph": "X"}, {"name": "HostToDevice777389080575", "ph": "s", "cat": "HostToDevice", "id": "777389080575", "pid": 43767502, "tid": 439084, "ts": "1704161511427055.828"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427060.822", "dur": 7.84, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 369, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 182}, "ph": "X"}, {"name": "HostToDevice785979015167", "ph": "f", "id": "785979015167", "ts": "1704161511427060.822", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427060.822", "dur": 7.84, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427060.712", "dur": 0.11, "ph": "X"}, {"name": "HostToDevice781684047871", "ph": "s", "cat": "HostToDevice", "id": "781684047871", "pid": 43767502, "tid": 439084, "ts": "1704161511427061.098"}, {"name": "HostToDevice785979015167", "ph": "s", "cat": "HostToDevice", "id": "785979015167", "pid": 43767502, "tid": 439084, "ts": "1704161511427066.298"}, {"name": "aclnnMm_MatMulCommon_MemSet", "pid": 800, "tid": 3, "ts": "1704161511427068.772", "dur": 6.6195, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 370, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 183}, "ph": "X"}, {"name": "HostToDevice790273982463", "ph": "f", "id": "790273982463", "ts": "1704161511427068.772", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427068.772", "dur": 6.6195, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427068.662", "dur": 0.11, "ph": "X"}, {"name": "HostToDevice790273982463", "ph": "s", "cat": "HostToDevice", "id": "790273982463", "pid": 43767502, "tid": 439084, "ts": "1704161511427070.558"}, {"name": "HostToDevice794568949759", "ph": "s", "cat": "HostToDevice", "id": "794568949759", "pid": 43767502, "tid": 439084, "ts": "1704161511427073.708"}, {"name": "aclnnMm_MatMulCommon_MatMulV2", "pid": 800, "tid": 3, "ts": "1704161511427075.512", "dur": 2.08, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 371, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 184}, "ph": "X"}, {"name": "HostToDevice794568949759", "ph": "f", "id": "794568949759", "ts": "1704161511427075.512", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427075.512", "dur": 2.08, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427075.392", "dur": 0.12, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427077.592", "dur": 0.12, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427077.712", "dur": 7.61, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 372, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 185}, "ph": "X"}, {"name": "HostToDevice798863917055", "ph": "f", "id": "798863917055", "ts": "1704161511427077.712", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427077.712", "dur": 7.61, "ph": "X"}, {"name": "HostToDevice798863917055", "ph": "s", "cat": "HostToDevice", "id": "798863917055", "pid": 43767502, "tid": 439084, "ts": "1704161511427077.988"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427085.322", "dur": 0.11, "ph": "X"}, {"name": "aclnnReduceSum_ReduceSumOpAiCore_MemSet", "pid": 800, "tid": 3, "ts": "1704161511427085.432", "dur": 6.81, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 373, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 187}, "ph": "X"}, {"name": "HostToDevice807453851647", "ph": "f", "id": "807453851647", "ts": "1704161511427085.432", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427085.432", "dur": 6.81, "ph": "X"}, {"name": "AscendCL@aclnnReduceSum", "pid": 43767502, "tid": 439084, "ts": "1704161511427088.988", "dur": 10.540105295651903, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnReduceSum", "item_id": "0", "connection_id": 189}, "ph": "X"}, {"name": "HostToDevice807453851647", "ph": "s", "cat": "HostToDevice", "id": "807453851647", "pid": 43767502, "tid": 439084, "ts": "1704161511427089.908"}, {"name": "aclnnReduceSum_ReduceSumOpAiCore_ReduceSum", "pid": 800, "tid": 3, "ts": "1704161511427092.352", "dur": 11.05, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 374, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 188}, "ph": "X"}, {"name": "HostToDevice811748818943", "ph": "f", "id": "811748818943", "ts": "1704161511427092.352", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427092.352", "dur": 11.05, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427092.242", "dur": 0.11, "ph": "X"}, {"name": "HostToDevice811748818943", "ph": "s", "cat": "HostToDevice", "id": "811748818943", "pid": 43767502, "tid": 439084, "ts": "1704161511427093.908"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427103.402", "dur": 200.038, "ph": "X"}, {"name": "AscendCL@aclnnAddGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511427192.119", "dur": 61.02060959588986, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddGetWorkspaceSize", "item_id": "0", "connection_id": 190}, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511427273.780", "dur": 66.04065974619087, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 193}, "ph": "X"}, {"name": "AscendCL@Add_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511427288.240", "dur": 16.470164536943724, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Add_Tiling", "item_id": "0", "connection_id": 191}, "ph": "X"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511427303.440", "dur": 1.47, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 375, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 192}, "ph": "X"}, {"name": "HostToDevice828928688127", "ph": "f", "id": "828928688127", "ts": "1704161511427303.440", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427303.440", "dur": 1.47, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427304.910", "dur": 66.5995, "ph": "X"}, {"name": "HostToDevice828928688127", "ph": "s", "cat": "HostToDevice", "id": "828928688127", "pid": 43767502, "tid": 439084, "ts": "1704161511427309.360"}, {"name": "AscendCL@aclnnAddGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511427327.340", "dur": 19.640196205560095, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddGetWorkspaceSize", "item_id": "0", "connection_id": 194}, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511427364.681", "dur": 32.330322979926564, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 197}, "ph": "X"}, {"name": "AscendCL@Add_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511427370.261", "dur": 7.470074626045514, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Add_Tiling", "item_id": "0", "connection_id": 195}, "ph": "X"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511427371.509", "dur": 1.5295, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 376, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 196}, "ph": "X"}, {"name": "HostToDevice846108557311", "ph": "f", "id": "846108557311", "ts": "1704161511427371.509", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427371.509", "dur": 1.5295, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427373.038", "dur": 58.6495, "ph": "X"}, {"name": "HostToDevice846108557311", "ph": "s", "cat": "HostToDevice", "id": "846108557311", "pid": 43767502, "tid": 439084, "ts": "1704161511427379.931"}, {"name": "aclnnThresholdBackward_ReluGrad_ReluGrad", "pid": 800, "tid": 3, "ts": "1704161511427431.688", "dur": 2.3, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 377, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 198}, "ph": "X"}, {"name": "HostToDevice854698491903", "ph": "f", "id": "854698491903", "ts": "1704161511427431.688", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427431.688", "dur": 2.3, "ph": "X"}, {"name": "AscendCL@aclnnThresholdBackward", "pid": 43767502, "tid": 439084, "ts": "1704161511427432.622", "dur": 9.020090110700206, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnThresholdBackward", "item_id": "0", "connection_id": 199}, "ph": "X"}, {"name": "HostToDevice854698491903", "ph": "s", "cat": "HostToDevice", "id": "854698491903", "pid": 43767502, "tid": 439084, "ts": "1704161511427433.822"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427433.988", "dur": 45.7195, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427479.708", "dur": 1.96, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 378, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 200}, "ph": "X"}, {"name": "HostToDevice863288426495", "ph": "f", "id": "863288426495", "ts": "1704161511427479.708", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427479.708", "dur": 1.96, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427481.828", "dur": 2.25, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 379, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 201}, "ph": "X"}, {"name": "HostToDevice867583393791", "ph": "f", "id": "867583393791", "ts": "1704161511427481.828", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427481.828", "dur": 2.25, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427481.668", "dur": 0.16, "ph": "X"}, {"name": "AscendCL@aclnnMm", "pid": 43767502, "tid": 439084, "ts": "1704161511427483.372", "dur": 32.220321881015586, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnMm", "item_id": "0", "connection_id": 207}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427484.078", "dur": 8.13, "ph": "X"}, {"name": "HostToDevice863288426495", "ph": "s", "cat": "HostToDevice", "id": "863288426495", "pid": 43767502, "tid": 439084, "ts": "1704161511427485.252"}, {"name": "HostToDevice867583393791", "ph": "s", "cat": "HostToDevice", "id": "867583393791", "pid": 43767502, "tid": 439084, "ts": "1704161511427491.112"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427492.208", "dur": 7.37, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 380, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 202}, "ph": "X"}, {"name": "HostToDevice871878361087", "ph": "f", "id": "871878361087", "ts": "1704161511427492.208", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427492.208", "dur": 7.37, "ph": "X"}, {"name": "HostToDevice871878361087", "ph": "s", "cat": "HostToDevice", "id": "871878361087", "pid": 43767502, "tid": 439084, "ts": "1704161511427495.072"}, {"name": "HostToDevice876173328383", "ph": "s", "cat": "HostToDevice", "id": "876173328383", "pid": 43767502, "tid": 439084, "ts": "1704161511427499.422"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427499.578", "dur": 0.15, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427499.728", "dur": 7.9, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 381, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 203}, "ph": "X"}, {"name": "HostToDevice876173328383", "ph": "f", "id": "876173328383", "ts": "1704161511427499.728", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427499.728", "dur": 7.9, "ph": "X"}, {"name": "HostToDevice880468295679", "ph": "s", "cat": "HostToDevice", "id": "880468295679", "pid": 43767502, "tid": 439084, "ts": "1704161511427503.082"}, {"name": "HostToDevice884763262975", "ph": "s", "cat": "HostToDevice", "id": "884763262975", "pid": 43767502, "tid": 439084, "ts": "1704161511427506.962"}, {"name": "aclnnMm_MatMulCommon_MemSet", "pid": 800, "tid": 3, "ts": "1704161511427507.778", "dur": 6.22, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 382, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 204}, "ph": "X"}, {"name": "HostToDevice880468295679", "ph": "f", "id": "880468295679", "ts": "1704161511427507.778", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427507.778", "dur": 6.22, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427507.628", "dur": 0.15, "ph": "X"}, {"name": "HostToDevice889058230271", "ph": "s", "cat": "HostToDevice", "id": "889058230271", "pid": 43767502, "tid": 439084, "ts": "1704161511427511.052"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427513.998", "dur": 0.15, "ph": "X"}, {"name": "aclnnMm_MatMulCommon_MatMulV2", "pid": 800, "tid": 3, "ts": "1704161511427514.148", "dur": 2.82, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 383, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 205}, "ph": "X"}, {"name": "HostToDevice884763262975", "ph": "f", "id": "884763262975", "ts": "1704161511427514.148", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427514.148", "dur": 2.82, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427517.078", "dur": 4.15, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 384, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 206}, "ph": "X"}, {"name": "HostToDevice889058230271", "ph": "f", "id": "889058230271", "ts": "1704161511427517.078", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427517.078", "dur": 4.15, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427516.968", "dur": 0.11, "ph": "X"}, {"name": "AscendCL@aclnnMm", "pid": 43767502, "tid": 439084, "ts": "1704161511427519.732", "dur": 30.78030749527188, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnMm", "item_id": "0", "connection_id": 215}, "ph": "X"}, {"name": "HostToDevice897648164863", "ph": "s", "cat": "HostToDevice", "id": "897648164863", "pid": 43767502, "tid": 439084, "ts": "1704161511427520.692"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427521.338", "dur": 1.8195, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 385, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 208}, "ph": "X"}, {"name": "HostToDevice897648164863", "ph": "f", "id": "897648164863", "ts": "1704161511427521.338", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427521.338", "dur": 1.8195, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427521.228", "dur": 0.11, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427523.267", "dur": 2.08, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 386, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 209}, "ph": "X"}, {"name": "HostToDevice901943132159", "ph": "f", "id": "901943132159", "ts": "1704161511427523.267", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427523.267", "dur": 2.08, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427523.157", "dur": 0.11, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427525.347", "dur": 0.11, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427525.457", "dur": 7.41, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 387, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 210}, "ph": "X"}, {"name": "HostToDevice906238099455", "ph": "f", "id": "906238099455", "ts": "1704161511427525.457", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427525.457", "dur": 7.41, "ph": "X"}, {"name": "HostToDevice901943132159", "ph": "s", "cat": "HostToDevice", "id": "901943132159", "pid": 43767502, "tid": 439084, "ts": "1704161511427526.272"}, {"name": "HostToDevice906238099455", "ph": "s", "cat": "HostToDevice", "id": "906238099455", "pid": 43767502, "tid": 439084, "ts": "1704161511427530.252"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427532.867", "dur": 0.11, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427532.977", "dur": 7.25, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 388, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 211}, "ph": "X"}, {"name": "HostToDevice910533066751", "ph": "f", "id": "910533066751", "ts": "1704161511427532.977", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427532.977", "dur": 7.25, "ph": "X"}, {"name": "HostToDevice910533066751", "ph": "s", "cat": "HostToDevice", "id": "910533066751", "pid": 43767502, "tid": 439084, "ts": "1704161511427534.482"}, {"name": "HostToDevice914828034047", "ph": "s", "cat": "HostToDevice", "id": "914828034047", "pid": 43767502, "tid": 439084, "ts": "1704161511427538.232"}, {"name": "aclnnMm_MatMulCommon_MemSet", "pid": 800, "tid": 3, "ts": "1704161511427540.347", "dur": 6.53, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 389, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 212}, "ph": "X"}, {"name": "HostToDevice914828034047", "ph": "f", "id": "914828034047", "ts": "1704161511427540.347", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427540.347", "dur": 6.53, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427540.227", "dur": 0.12, "ph": "X"}, {"name": "HostToDevice919123001343", "ph": "s", "cat": "HostToDevice", "id": "919123001343", "pid": 43767502, "tid": 439084, "ts": "1704161511427541.812"}, {"name": "HostToDevice923417968639", "ph": "s", "cat": "HostToDevice", "id": "923417968639", "pid": 43767502, "tid": 439084, "ts": "1704161511427546.513"}, {"name": "aclnnMm_MatMulCommon_MatMulV2", "pid": 800, "tid": 3, "ts": "1704161511427546.987", "dur": 2.53, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 390, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 213}, "ph": "X"}, {"name": "HostToDevice919123001343", "ph": "f", "id": "919123001343", "ts": "1704161511427546.987", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427546.987", "dur": 2.53, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427546.877", "dur": 0.11, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427549.517", "dur": 0.11, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427549.627", "dur": 4.13, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 391, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 214}, "ph": "X"}, {"name": "HostToDevice923417968639", "ph": "f", "id": "923417968639", "ts": "1704161511427549.627", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427549.627", "dur": 4.13, "ph": "X"}, {"name": "aclnnReduceSum_ReduceSumOpAiCore_MemSet", "pid": 800, "tid": 3, "ts": "1704161511427553.867", "dur": 6.81, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 392, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 216}, "ph": "X"}, {"name": "HostToDevice932007903231", "ph": "f", "id": "932007903231", "ts": "1704161511427553.867", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427553.867", "dur": 6.81, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427553.757", "dur": 0.11, "ph": "X"}, {"name": "AscendCL@aclnnReduceSum", "pid": 43767502, "tid": 439084, "ts": "1704161511427554.443", "dur": 8.5500854153533, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnReduceSum", "item_id": "0", "connection_id": 218}, "ph": "X"}, {"name": "HostToDevice932007903231", "ph": "s", "cat": "HostToDevice", "id": "932007903231", "pid": 43767502, "tid": 439084, "ts": "1704161511427554.953"}, {"name": "HostToDevice936302870527", "ph": "s", "cat": "HostToDevice", "id": "936302870527", "pid": 43767502, "tid": 439084, "ts": "1704161511427558.863"}, {"name": "aclnnReduceSum_ReduceSumOpAiCore_ReduceSum", "pid": 800, "tid": 3, "ts": "1704161511427560.787", "dur": 11.8195, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 393, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 217}, "ph": "X"}, {"name": "HostToDevice936302870527", "ph": "f", "id": "936302870527", "ts": "1704161511427560.787", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427560.787", "dur": 11.8195, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427560.677", "dur": 0.11, "ph": "X"}, {"name": "AscendCL@aclnnAddGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511427568.703", "dur": 19.980199602194023, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddGetWorkspaceSize", "item_id": "0", "connection_id": 219}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427572.606", "dur": 40.25, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511427604.683", "dur": 30.740307095667887, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 222}, "ph": "X"}, {"name": "AscendCL@Add_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511427610.213", "dur": 6.900068931688628, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Add_Tiling", "item_id": "0", "connection_id": 220}, "ph": "X"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511427612.856", "dur": 1.39, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 394, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 221}, "ph": "X"}, {"name": "HostToDevice953482739711", "ph": "f", "id": "953482739711", "ts": "1704161511427612.856", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427612.856", "dur": 1.39, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427614.246", "dur": 69.229, "ph": "X"}, {"name": "HostToDevice953482739711", "ph": "s", "cat": "HostToDevice", "id": "953482739711", "pid": 43767502, "tid": 439084, "ts": "1704161511427619.394"}, {"name": "AscendCL@aclnnAddGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511427646.074", "dur": 17.870178523083446, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddGetWorkspaceSize", "item_id": "0", "connection_id": 223}, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511427681.664", "dur": 26.54026513724872, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 226}, "ph": "X"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511427683.476", "dur": 2.57, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 395, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 225}, "ph": "X"}, {"name": "HostToDevice970662608895", "ph": "f", "id": "970662608895", "ts": "1704161511427683.476", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427683.476", "dur": 2.57, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427686.046", "dur": 19.13, "ph": "X"}, {"name": "AscendCL@Add_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511427686.134", "dur": 5.400053946538926, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Add_Tiling", "item_id": "0", "connection_id": 224}, "ph": "X"}, {"name": "HostToDevice970662608895", "ph": "s", "cat": "HostToDevice", "id": "970662608895", "pid": 43767502, "tid": 439084, "ts": "1704161511427693.494"}, {"name": "aclnnThresholdBackward_ReluGrad_ReluGrad", "pid": 800, "tid": 3, "ts": "1704161511427705.176", "dur": 2.72, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 396, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 227}, "ph": "X"}, {"name": "HostToDevice979252543487", "ph": "f", "id": "979252543487", "ts": "1704161511427705.176", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427705.176", "dur": 2.72, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427707.896", "dur": 50.1595, "ph": "X"}, {"name": "AscendCL@aclnnThresholdBackward", "pid": 43767502, "tid": 439084, "ts": "1704161511427713.924", "dur": 6.050060440103796, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnThresholdBackward", "item_id": "0", "connection_id": 228}, "ph": "X"}, {"name": "HostToDevice979252543487", "ph": "s", "cat": "HostToDevice", "id": "979252543487", "pid": 43767502, "tid": 439084, "ts": "1704161511427714.424"}, {"name": "AscendCL@aclnnMm", "pid": 43767502, "tid": 439084, "ts": "1704161511427757.805", "dur": 31.850318184678667, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnMm", "item_id": "0", "connection_id": 236}, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427758.055", "dur": 2.29, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 397, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 229}, "ph": "X"}, {"name": "HostToDevice987842478079", "ph": "f", "id": "987842478079", "ts": "1704161511427758.055", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427758.055", "dur": 2.29, "ph": "X"}, {"name": "HostToDevice987842478079", "ph": "s", "cat": "HostToDevice", "id": "987842478079", "pid": 43767502, "tid": 439084, "ts": "1704161511427759.035"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427760.345", "dur": 0.16, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427760.505", "dur": 4.89, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 398, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 230}, "ph": "X"}, {"name": "HostToDevice992137445375", "ph": "f", "id": "992137445375", "ts": "1704161511427760.505", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427760.505", "dur": 4.89, "ph": "X"}, {"name": "HostToDevice992137445375", "ph": "s", "cat": "HostToDevice", "id": "992137445375", "pid": 43767502, "tid": 439084, "ts": "1704161511427764.535"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427765.395", "dur": 9.6895, "ph": "X"}, {"name": "HostToDevice996432412671", "ph": "s", "cat": "HostToDevice", "id": "996432412671", "pid": 43767502, "tid": 439084, "ts": "1704161511427768.645"}, {"name": "HostToDevice1000727379967", "ph": "s", "cat": "HostToDevice", "id": "1000727379967", "pid": 43767502, "tid": 439084, "ts": "1704161511427772.625"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427775.084", "dur": 7.39, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 399, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 231}, "ph": "X"}, {"name": "HostToDevice996432412671", "ph": "f", "id": "996432412671", "ts": "1704161511427775.084", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427775.084", "dur": 7.39, "ph": "X"}, {"name": "AscendCL@aclMallocMemInner", "pid": 43767502, "tid": 439228, "ts": "1704161511427775.935", "dur": 72.83072757896852, "args": {"Thread Id": 439228, "Mode": "ACL_RTS", "level": "acl", "id": "aclMallocMemInner", "item_id": "0", "connection_id": 237}, "ph": "X"}, {"name": "HostToDevice1005022347263", "ph": "s", "cat": "HostToDevice", "id": "1005022347263", "pid": 43767502, "tid": 439084, "ts": "1704161511427777.025"}, {"name": "HostToDevice1009317314559", "ph": "s", "cat": "HostToDevice", "id": "1009317314559", "pid": 43767502, "tid": 439084, "ts": "1704161511427780.625"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427782.584", "dur": 6.33, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 400, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 232}, "ph": "X"}, {"name": "HostToDevice1000727379967", "ph": "f", "id": "1000727379967", "ts": "1704161511427782.584", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427782.584", "dur": 6.33, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427782.474", "dur": 0.11, "ph": "X"}, {"name": "HostToDevice1013612281855", "ph": "s", "cat": "HostToDevice", "id": "1013612281855", "pid": 43767502, "tid": 439084, "ts": "1704161511427784.865"}, {"name": "aclnnMm_MatMulCommon_MemSet", "pid": 800, "tid": 3, "ts": "1704161511427789.024", "dur": 6.32, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 401, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 233}, "ph": "X"}, {"name": "HostToDevice1005022347263", "ph": "f", "id": "1005022347263", "ts": "1704161511427789.024", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427789.024", "dur": 6.32, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427788.914", "dur": 0.11, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427795.344", "dur": 0.11, "ph": "X"}, {"name": "aclnnMm_MatMulCommon_MatMulV2", "pid": 800, "tid": 3, "ts": "1704161511427795.454", "dur": 5.55, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 402, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 234}, "ph": "X"}, {"name": "HostToDevice1009317314559", "ph": "f", "id": "1009317314559", "ts": "1704161511427795.454", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427795.454", "dur": 5.55, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427801.114", "dur": 6.09, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 403, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 235}, "ph": "X"}, {"name": "HostToDevice1013612281855", "ph": "f", "id": "1013612281855", "ts": "1704161511427801.114", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427801.114", "dur": 6.09, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427801.004", "dur": 0.11, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427807.204", "dur": 61.3095, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427868.514", "dur": 2.23, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 404, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 238}, "ph": "X"}, {"name": "HostToDevice1026497183743", "ph": "f", "id": "1026497183743", "ts": "1704161511427868.514", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427868.514", "dur": 2.23, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427870.744", "dur": 0.16, "ph": "X"}, {"name": "aclnnMm_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511427870.904", "dur": 4.7095, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 405, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 239}, "ph": "X"}, {"name": "HostToDevice1030792151039", "ph": "f", "id": "1030792151039", "ts": "1704161511427870.904", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427870.904", "dur": 4.7095, "ph": "X"}, {"name": "AscendCL@aclnnMm", "pid": 43767502, "tid": 439084, "ts": "1704161511427873.146", "dur": 30.810307794974868, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnMm", "item_id": "0", "connection_id": 245}, "ph": "X"}, {"name": "HostToDevice1026497183743", "ph": "s", "cat": "HostToDevice", "id": "1026497183743", "pid": 43767502, "tid": 439084, "ts": "1704161511427874.756"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427875.614", "dur": 8.65, "ph": "X"}, {"name": "HostToDevice1030792151039", "ph": "s", "cat": "HostToDevice", "id": "1030792151039", "pid": 43767502, "tid": 439084, "ts": "1704161511427880.236"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427884.264", "dur": 7.55, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 406, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 240}, "ph": "X"}, {"name": "HostToDevice1035087118335", "ph": "f", "id": "1035087118335", "ts": "1704161511427884.264", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "HostToDevice1035087118335", "ph": "s", "cat": "HostToDevice", "id": "1035087118335", "pid": 43767502, "tid": 439084, "ts": "1704161511427884.296"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427884.264", "dur": 7.55, "ph": "X"}, {"name": "HostToDevice1039382085631", "ph": "s", "cat": "HostToDevice", "id": "1039382085631", "pid": 43767502, "tid": 439084, "ts": "1704161511427888.546"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427891.814", "dur": 0.11, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427891.924", "dur": 5.45, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 407, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 241}, "ph": "X"}, {"name": "HostToDevice1039382085631", "ph": "f", "id": "1039382085631", "ts": "1704161511427891.924", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427891.924", "dur": 5.45, "ph": "X"}, {"name": "HostToDevice1043677052927", "ph": "s", "cat": "HostToDevice", "id": "1043677052927", "pid": 43767502, "tid": 439084, "ts": "1704161511427892.126"}, {"name": "HostToDevice1047972020223", "ph": "s", "cat": "HostToDevice", "id": "1047972020223", "pid": 43767502, "tid": 439084, "ts": "1704161511427895.426"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427897.374", "dur": 0.12, "ph": "X"}, {"name": "aclnnMm_MatMulCommon_MemSet", "pid": 800, "tid": 3, "ts": "1704161511427897.494", "dur": 6.1, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 408, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 242}, "ph": "X"}, {"name": "HostToDevice1043677052927", "ph": "f", "id": "1043677052927", "ts": "1704161511427897.494", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427897.494", "dur": 6.1, "ph": "X"}, {"name": "HostToDevice1052266987519", "ph": "s", "cat": "HostToDevice", "id": "1052266987519", "pid": 43767502, "tid": 439084, "ts": "1704161511427899.886"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427903.594", "dur": 0.11, "ph": "X"}, {"name": "aclnnMm_MatMulCommon_MatMulV2", "pid": 800, "tid": 3, "ts": "1704161511427903.704", "dur": 4.78, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 409, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 243}, "ph": "X"}, {"name": "HostToDevice1047972020223", "ph": "f", "id": "1047972020223", "ts": "1704161511427903.704", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427903.704", "dur": 4.78, "ph": "X"}, {"name": "aclnnMm_TransData_TransData", "pid": 800, "tid": 3, "ts": "1704161511427908.594", "dur": 9.02, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 410, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 244}, "ph": "X"}, {"name": "HostToDevice1052266987519", "ph": "f", "id": "1052266987519", "ts": "1704161511427908.594", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "AscendCL@aclnnReduceSum", "pid": 43767502, "tid": 439084, "ts": "1704161511427908.476", "dur": 8.660086514264277, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnReduceSum", "item_id": "0", "connection_id": 248}, "ph": "X"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427908.594", "dur": 9.02, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427908.484", "dur": 0.11, "ph": "X"}, {"name": "HostToDevice1060856922111", "ph": "s", "cat": "HostToDevice", "id": "1060856922111", "pid": 43767502, "tid": 439084, "ts": "1704161511427908.986"}, {"name": "HostToDevice1065151889407", "ph": "s", "cat": "HostToDevice", "id": "1065151889407", "pid": 43767502, "tid": 439084, "ts": "1704161511427912.766"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427917.614", "dur": 0.11, "ph": "X"}, {"name": "aclnnReduceSum_ReduceSumOpAiCore_MemSet", "pid": 800, "tid": 3, "ts": "1704161511427917.724", "dur": 6.3495, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 411, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 246}, "ph": "X"}, {"name": "HostToDevice1060856922111", "ph": "f", "id": "1060856922111", "ts": "1704161511427917.724", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427917.724", "dur": 6.3495, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427924.073", "dur": 0.11, "ph": "X"}, {"name": "aclnnReduceSum_ReduceSumOpAiCore_ReduceSum", "pid": 800, "tid": 3, "ts": "1704161511427924.183", "dur": 11.24, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 412, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 247}, "ph": "X"}, {"name": "HostToDevice1065151889407", "ph": "f", "id": "1065151889407", "ts": "1704161511427924.183", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427924.183", "dur": 11.24, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427935.423", "dur": 54.6095, "ph": "X"}, {"name": "AscendCL@aclnnAddGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511427947.477", "dur": 20.73020709476888, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddGetWorkspaceSize", "item_id": "0", "connection_id": 249}, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511427986.077", "dur": 28.140281121408403, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 252}, "ph": "X"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511427990.032", "dur": 1.56, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 413, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 251}, "ph": "X"}, {"name": "HostToDevice1082331758591", "ph": "f", "id": "1082331758591", "ts": "1704161511427990.032", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511427990.032", "dur": 1.56, "ph": "X"}, {"name": "AscendCL@Add_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511427991.087", "dur": 5.870058641885833, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Add_Tiling", "item_id": "0", "connection_id": 250}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511427991.592", "dur": 72.6195, "ph": "X"}, {"name": "HostToDevice1082331758591", "ph": "s", "cat": "HostToDevice", "id": "1082331758591", "pid": 43767502, "tid": 439084, "ts": "1704161511427999.137"}, {"name": "AscendCL@aclnnAddGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511428029.438", "dur": 17.0901707308056, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddGetWorkspaceSize", "item_id": "0", "connection_id": 253}, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511428062.508", "dur": 26.1502612411098, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 256}, "ph": "X"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511428064.212", "dur": 4.9, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 414, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 255}, "ph": "X"}, {"name": "HostToDevice1099511627775", "ph": "f", "id": "1099511627775", "ts": "1704161511428064.212", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511428064.212", "dur": 4.9, "ph": "X"}, {"name": "AscendCL@Add_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511428067.368", "dur": 5.110051049409984, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Add_Tiling", "item_id": "0", "connection_id": 254}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511428069.112", "dur": 738.4625, "ph": "X"}, {"name": "HostToDevice1099511627775", "ph": "s", "cat": "HostToDevice", "id": "1099511627775", "pid": 43767502, "tid": 439084, "ts": "1704161511428074.598"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428133.548", "dur": 1.4300142858427156, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 257}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428138.449", "dur": 0.2700026973269463, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 258}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428146.938", "dur": 0.47000469534690653, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 259}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428147.989", "dur": 0.13000129871297414, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 260}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428149.339", "dur": 0.42000419584191645, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 261}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428150.369", "dur": 0.15000149851497016, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 262}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428151.489", "dur": 0.42000419584191645, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 263}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428152.309", "dur": 0.16000159841596817, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 264}, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 439228, "ts": "1704161511428153.329", "dur": 1.0200101899017973, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 265}, "ph": "X"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511428176.399", "dur": 811.2981048680675, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 329}, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511428178.259", "dur": 14.520145056249111, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 266}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428326.880", "dur": 0.8000079920798409, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 267}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428331.090", "dur": 0.2600025974259483, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 268}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428333.050", "dur": 0.42000419584191645, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 269}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428334.570", "dur": 0.16000159841596817, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 270}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428335.380", "dur": 0.39000389613892245, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 271}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428336.340", "dur": 0.14000139861397215, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 272}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428337.101", "dur": 0.39000389613892245, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 273}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428337.871", "dur": 0.12000119881197613, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 274}, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 439228, "ts": "1704161511428339.040", "dur": 0.1900018981189622, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 275}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428354.520", "dur": 0.7400073926738528, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 276}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428356.051", "dur": 0.1800017982179642, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 277}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428371.151", "dur": 0.5100050949508986, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 278}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428372.141", "dur": 0.14000139861397215, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 279}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428373.191", "dur": 0.41000409594091847, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 280}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428374.271", "dur": 0.15000149851497016, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 281}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428374.781", "dur": 0.44000439564391247, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 282}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428375.641", "dur": 0.11000109891097812, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 283}, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 439228, "ts": "1704161511428376.511", "dur": 0.13000129871297414, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 284}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428422.522", "dur": 0.5100050949508986, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 285}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428423.662", "dur": 0.1900018981189622, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 286}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428427.522", "dur": 0.44000439564391247, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 287}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428428.432", "dur": 0.15000149851497016, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 288}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428429.042", "dur": 0.3800037962379244, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 289}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428430.242", "dur": 0.17000169831696618, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 290}, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 439228, "ts": "1704161511428432.012", "dur": 0.23000229772295425, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 291}, "ph": "X"}, {"name": "AscendCL@aclnnInplaceCopyGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511428501.992", "dur": 18.310182918727357, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceCopyGetWorkspaceSize", "item_id": "0", "connection_id": 292}, "ph": "X"}, {"name": "AscendCL@aclnnAddGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511428579.803", "dur": 19.42019400773814, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddGetWorkspaceSize", "item_id": "0", "connection_id": 293}, "ph": "X"}, {"name": "AscendCL@aclnnAddGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511428638.814", "dur": 13.5701355656543, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddGetWorkspaceSize", "item_id": "0", "connection_id": 294}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428685.224", "dur": 1.2700126874267474, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 295}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428688.284", "dur": 0.25000249752495024, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 296}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428689.494", "dur": 0.7200071928718568, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 297}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428690.744", "dur": 0.17000169831696618, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 298}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428692.334", "dur": 0.4800047952479045, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 299}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428693.404", "dur": 0.14000139861397215, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 300}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428694.284", "dur": 0.40000399603992043, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 301}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428695.264", "dur": 0.14000139861397215, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 302}, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 439228, "ts": "1704161511428695.914", "dur": 0.15000149851497016, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 303}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428776.685", "dur": 0.8200081918818369, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 304}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428779.025", "dur": 0.1800017982179642, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 305}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428805.935", "dur": 0.6300062937628746, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 306}, "ph": "X"}, {"name": "trans_Cast_20", "pid": 800, "tid": 3, "ts": "1704161511428807.574", "dur": 4.11, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 415, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 313}, "ph": "X"}, {"name": "HostToDevice1348619730943", "ph": "f", "id": "1348619730943", "ts": "1704161511428807.574", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428807.475", "dur": 0.15000149851497016, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 307}, "ph": "X"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511428807.574", "dur": 4.11, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428808.675", "dur": 0.4800047952479045, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 308}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428809.795", "dur": 0.15000149851497016, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 309}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428810.535", "dur": 0.6000059940598806, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 310}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428811.475", "dur": 0.14000139861397215, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 311}, "ph": "X"}, {"name": "HostToDevice1348619730943", "ph": "s", "cat": "HostToDevice", "id": "1348619730943", "pid": 43767502, "tid": 439084, "ts": "1704161511428811.675"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511428811.684", "dur": 32.7395, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 439228, "ts": "1704161511428812.166", "dur": 0.13000129871297414, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 312}, "ph": "X"}, {"name": "trans_TransData_21", "pid": 800, "tid": 3, "ts": "1704161511428844.424", "dur": 14.48, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 416, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 321}, "ph": "X"}, {"name": "HostToDevice1382979469311", "ph": "f", "id": "1382979469311", "ts": "1704161511428844.424", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511428844.424", "dur": 14.48, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428844.806", "dur": 0.5900058941588827, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 314}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428846.565", "dur": 0.15000149851497016, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 315}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428850.496", "dur": 0.44000439564391247, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 316}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428851.426", "dur": 0.14000139861397215, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 317}, "ph": "X"}, {"name": "AscendCL@aclCreateTensorDesc", "pid": 43767502, "tid": 439228, "ts": "1704161511428851.966", "dur": 0.39000389613892245, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateTensorDesc", "item_id": "0", "connection_id": 318}, "ph": "X"}, {"name": "AscendCL@aclCreateDataBuffer", "pid": 43767502, "tid": 439228, "ts": "1704161511428852.706", "dur": 0.14000139861397215, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclCreateDataBuffer", "item_id": "0", "connection_id": 319}, "ph": "X"}, {"name": "AscendCL@aclopCreateAttr", "pid": 43767502, "tid": 439228, "ts": "1704161511428853.246", "dur": 0.4600045954459085, "args": {"Thread Id": 439228, "Mode": "ACL_OP", "level": "acl", "id": "aclopCreateAttr", "item_id": "0", "connection_id": 320}, "ph": "X"}, {"name": "HostToDevice1382979469311", "ph": "s", "cat": "HostToDevice", "id": "1382979469311", "pid": 43767502, "tid": 439084, "ts": "1704161511428854.676"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511428858.904", "dur": 12.76, "ph": "X"}, {"name": "trans_Cast_22", "pid": 800, "tid": 3, "ts": "1704161511428871.664", "dur": 4.0595, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 417, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 322}, "ph": "X"}, {"name": "HostToDevice1387274436607", "ph": "f", "id": "1387274436607", "ts": "1704161511428871.664", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511428871.664", "dur": 4.0595, "ph": "X"}, {"name": "AscendCL@aclnnInplaceCopyGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511428874.666", "dur": 13.540135265951307, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceCopyGetWorkspaceSize", "item_id": "0", "connection_id": 323}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511428875.724", "dur": 12.97, "ph": "X"}, {"name": "HostToDevice1387274436607", "ph": "s", "cat": "HostToDevice", "id": "1387274436607", "pid": 43767502, "tid": 439084, "ts": "1704161511428880.716"}, {"name": "trans_TransData_23", "pid": 800, "tid": 3, "ts": "1704161511428888.694", "dur": 13.56, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 418, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 324}, "ph": "X"}, {"name": "HostToDevice1395864371199", "ph": "f", "id": "1395864371199", "ts": "1704161511428888.694", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511428888.694", "dur": 13.56, "ph": "X"}, {"name": "HostToDevice1395864371199", "ph": "s", "cat": "HostToDevice", "id": "1395864371199", "pid": 43767502, "tid": 439084, "ts": "1704161511428901.746"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511428902.254", "dur": 37.9895, "ph": "X"}, {"name": "AscendCL@aclnnAddGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511428931.977", "dur": 17.80017782377646, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddGetWorkspaceSize", "item_id": "0", "connection_id": 326}, "ph": "X"}, {"name": "HostToDevice1400159338495", "ph": "s", "cat": "HostToDevice", "id": "1400159338495", "pid": 43767502, "tid": 439084, "ts": "1704161511428936.876"}, {"name": "MaxPoolGradWithArgmaxV14", "pid": 800, "tid": 3, "ts": "1704161511428940.243", "dur": 29.47, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 419, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 325}, "ph": "X"}, {"name": "HostToDevice1400159338495", "ph": "f", "id": "1400159338495", "ts": "1704161511428940.243", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511428940.243", "dur": 29.47, "ph": "X"}, {"name": "HostToDevice1408749273087", "ph": "s", "cat": "HostToDevice", "id": "1408749273087", "pid": 43767502, "tid": 439084, "ts": "1704161511428960.637"}, {"name": "trans_TransData_24", "pid": 800, "tid": 3, "ts": "1704161511428969.833", "dur": 11.8695, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 420, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 327}, "ph": "X"}, {"name": "HostToDevice1408749273087", "ph": "f", "id": "1408749273087", "ts": "1704161511428969.833", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511428969.833", "dur": 11.8695, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511428969.713", "dur": 0.12, "ph": "X"}, {"name": "HostToDevice1413044240383", "ph": "s", "cat": "HostToDevice", "id": "1413044240383", "pid": 43767502, "tid": 439084, "ts": "1704161511428977.147"}, {"name": "AscendCL@aclnnAddGetWorkspaceSize", "pid": 43767502, "tid": 439228, "ts": "1704161511428980.067", "dur": 12.380123677435538, "args": {"Thread Id": 439228, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddGetWorkspaceSize", "item_id": "0", "connection_id": 330}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511428981.702", "dur": 0.19, "ph": "X"}, {"name": "trans_Cast_25", "pid": 800, "tid": 3, "ts": "1704161511428981.892", "dur": 4.8, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 421, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 328}, "ph": "X"}, {"name": "HostToDevice1413044240383", "ph": "f", "id": "1413044240383", "ts": "1704161511428981.892", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511428981.892", "dur": 4.8, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511428986.692", "dur": 10.77, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511428995.497", "dur": 7.84007832238244, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 331}, "ph": "X"}, {"name": "aclnnThresholdBackward_ReluGrad_ReluGrad", "pid": 800, "tid": 3, "ts": "1704161511428997.462", "dur": 6.06, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 422, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 332}, "ph": "X"}, {"name": "HostToDevice1430224109567", "ph": "f", "id": "1430224109567", "ts": "1704161511428997.462", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511428997.462", "dur": 6.06, "ph": "X"}, {"name": "AscendCL@aclnnThresholdBackward", "pid": 43767502, "tid": 439084, "ts": "1704161511429000.177", "dur": 13.700136864367275, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnThresholdBackward", "item_id": "0", "connection_id": 335}, "ph": "X"}, {"name": "HostToDevice1430224109567", "ph": "s", "cat": "HostToDevice", "id": "1430224109567", "pid": 43767502, "tid": 439084, "ts": "1704161511429001.247"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511429003.522", "dur": 723.8325, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511429005.307", "dur": 2.8000279722794432, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 333}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511429010.867", "dur": 1.1300112888127751, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 334}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511429012.617", "dur": 1.3300132868327355, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 336}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511429018.467", "dur": 0.8800087912878249, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 337}, "ph": "X"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511429020.697", "dur": 898.1689727080374, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 352}, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511429022.177", "dur": 15.19015174961598, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 338}, "ph": "X"}, {"name": "AscendCL@aclnnDivsGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511429338.161", "dur": 53.51053457024035, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivsGetWorkspaceSize", "item_id": "0", "connection_id": 339}, "ph": "X"}, {"name": "AscendCL@aclnnAddcdivGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511429456.792", "dur": 42.2304218819146, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddcdivGetWorkspaceSize", "item_id": "0", "connection_id": 340}, "ph": "X"}, {"name": "AscendCL@aclnnDivsGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511429627.794", "dur": 25.590255646653908, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivsGetWorkspaceSize", "item_id": "0", "connection_id": 341}, "ph": "X"}, {"name": "AscendCL@aclnnAddcdivGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511429707.764", "dur": 27.690276625863493, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddcdivGetWorkspaceSize", "item_id": "0", "connection_id": 342}, "ph": "X"}, {"name": "trans_Cast_26", "pid": 800, "tid": 3, "ts": "1704161511429727.355", "dur": 2.26, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 423, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 343}, "ph": "X"}, {"name": "HostToDevice1477468749823", "ph": "f", "id": "1477468749823", "ts": "1704161511429727.355", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511429727.355", "dur": 2.26, "ph": "X"}, {"name": "HostToDevice1477468749823", "ph": "s", "cat": "HostToDevice", "id": "1477468749823", "pid": 43767502, "tid": 439084, "ts": "1704161511429727.564"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511429729.615", "dur": 25.98, "ph": "X"}, {"name": "trans_TransData_27_MemSet", "pid": 800, "tid": 3, "ts": "1704161511429755.595", "dur": 9.77, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 424, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 344}, "ph": "X"}, {"name": "HostToDevice1481763717119", "ph": "f", "id": "1481763717119", "ts": "1704161511429755.595", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511429755.595", "dur": 9.77, "ph": "X"}, {"name": "HostToDevice1481763717119", "ph": "s", "cat": "HostToDevice", "id": "1481763717119", "pid": 43767502, "tid": 439084, "ts": "1704161511429762.925"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511429765.365", "dur": 0.11, "ph": "X"}, {"name": "trans_TransData_27", "pid": 800, "tid": 3, "ts": "1704161511429765.475", "dur": 6.28, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 425, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 345}, "ph": "X"}, {"name": "HostToDevice1486058684415", "ph": "f", "id": "1486058684415", "ts": "1704161511429765.475", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511429765.475", "dur": 6.28, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511429771.755", "dur": 6.3195, "ph": "X"}, {"name": "HostToDevice1486058684415", "ph": "s", "cat": "HostToDevice", "id": "1486058684415", "pid": 43767502, "tid": 439084, "ts": "1704161511429772.385"}, {"name": "trans_Cast_28", "pid": 800, "tid": 3, "ts": "1704161511429778.074", "dur": 4.77, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 426, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 346}, "ph": "X"}, {"name": "HostToDevice1490353651711", "ph": "f", "id": "1490353651711", "ts": "1704161511429778.074", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511429778.074", "dur": 4.77, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511429782.844", "dur": 33.2, "ph": "X"}, {"name": "HostToDevice1490353651711", "ph": "s", "cat": "HostToDevice", "id": "1490353651711", "pid": 43767502, "tid": 439084, "ts": "1704161511429796.665"}, {"name": "trans_TransData_29", "pid": 800, "tid": 3, "ts": "1704161511429816.044", "dur": 13.7395, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 427, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 347}, "ph": "X"}, {"name": "HostToDevice1494648619007", "ph": "f", "id": "1494648619007", "ts": "1704161511429816.044", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511429816.044", "dur": 13.7395, "ph": "X"}, {"name": "HostToDevice1494648619007", "ph": "s", "cat": "HostToDevice", "id": "1494648619007", "pid": 43767502, "tid": 439084, "ts": "1704161511429826.896"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511429829.784", "dur": 26.46, "ph": "X"}, {"name": "Conv2DBackpropInput5", "pid": 800, "tid": 3, "ts": "1704161511429856.244", "dur": 23.6695, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 428, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 348}, "ph": "X"}, {"name": "HostToDevice1498943586303", "ph": "f", "id": "1498943586303", "ts": "1704161511429856.244", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511429856.244", "dur": 23.6695, "ph": "X"}, {"name": "HostToDevice1498943586303", "ph": "s", "cat": "HostToDevice", "id": "1498943586303", "pid": 43767502, "tid": 439084, "ts": "1704161511429866.016"}, {"name": "AscendCL@aclnnDivsGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511429873.516", "dur": 26.880268533882653, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivsGetWorkspaceSize", "item_id": "0", "connection_id": 350}, "ph": "X"}, {"name": "trans_TransData_30", "pid": 800, "tid": 3, "ts": "1704161511429880.104", "dur": 10.79, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 429, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 349}, "ph": "X"}, {"name": "HostToDevice1503238553599", "ph": "f", "id": "1503238553599", "ts": "1704161511429880.104", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511429880.104", "dur": 10.79, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511429879.914", "dur": 0.19, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511429890.894", "dur": 7.58, "ph": "X"}, {"name": "HostToDevice1503238553599", "ph": "s", "cat": "HostToDevice", "id": "1503238553599", "pid": 43767502, "tid": 439084, "ts": "1704161511429891.336"}, {"name": "trans_Cast_31", "pid": 800, "tid": 3, "ts": "1704161511429898.474", "dur": 4.81, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 430, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 351}, "ph": "X"}, {"name": "HostToDevice1511828488191", "ph": "f", "id": "1511828488191", "ts": "1704161511429898.474", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511429898.474", "dur": 4.81, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511429903.284", "dur": 685.463, "ph": "X"}, {"name": "HostToDevice1511828488191", "ph": "s", "cat": "HostToDevice", "id": "1511828488191", "pid": 43767502, "tid": 439084, "ts": "1704161511429907.486"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511429934.466", "dur": 825.9382511231287, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 374}, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511429936.376", "dur": 14.830148153180051, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 357}, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511429936.736", "dur": 6.020060140400802, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 353}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511429944.467", "dur": 1.66001658356567, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 354}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511429946.577", "dur": 2.0800207794075862, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 355}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511429948.867", "dur": 1.830018281882636, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 356}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511429952.217", "dur": 0.6000059940598806, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 358}, "ph": "X"}, {"name": "AscendCL@aclnnAddcdivGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511429952.387", "dur": 29.320292909726167, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddcdivGetWorkspaceSize", "item_id": "0", "connection_id": 359}, "ph": "X"}, {"name": "AscendCL@aclnnDivsGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511430100.448", "dur": 22.42022397803754, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivsGetWorkspaceSize", "item_id": "0", "connection_id": 360}, "ph": "X"}, {"name": "AscendCL@aclnnAddcdivGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511430170.659", "dur": 24.730247055168082, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddcdivGetWorkspaceSize", "item_id": "0", "connection_id": 361}, "ph": "X"}, {"name": "AscendCL@aclnnDivsGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511430319.690", "dur": 24.740247155069078, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivsGetWorkspaceSize", "item_id": "0", "connection_id": 362}, "ph": "X"}, {"name": "AscendCL@aclnnAddcdivGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511430392.901", "dur": 28.830288014577267, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddcdivGetWorkspaceSize", "item_id": "0", "connection_id": 363}, "ph": "X"}, {"name": "AscendCL@aclnnDivsGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511430533.042", "dur": 20.820207993877858, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivsGetWorkspaceSize", "item_id": "0", "connection_id": 364}, "ph": "X"}, {"name": "trans_Cast_32", "pid": 800, "tid": 3, "ts": "1704161511430588.746", "dur": 4.68, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 431, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 365}, "ph": "X"}, {"name": "HostToDevice1571958030335", "ph": "f", "id": "1571958030335", "ts": "1704161511430588.746", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511430588.746", "dur": 4.68, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511430593.426", "dur": 28.77, "ph": "X"}, {"name": "HostToDevice1571958030335", "ph": "s", "cat": "HostToDevice", "id": "1571958030335", "pid": 43767502, "tid": 439084, "ts": "1704161511430594.283"}, {"name": "AscendCL@aclnnAddcdivGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511430600.623", "dur": 24.220241960217184, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddcdivGetWorkspaceSize", "item_id": "0", "connection_id": 366}, "ph": "X"}, {"name": "trans_TransData_33", "pid": 800, "tid": 3, "ts": "1704161511430622.196", "dur": 9.3995, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 432, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 367}, "ph": "X"}, {"name": "HostToDevice1580547964927", "ph": "f", "id": "1580547964927", "ts": "1704161511430622.196", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511430622.196", "dur": 9.3995, "ph": "X"}, {"name": "HostToDevice1580547964927", "ph": "s", "cat": "HostToDevice", "id": "1580547964927", "pid": 43767502, "tid": 439084, "ts": "1704161511430630.843"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511430631.596", "dur": 12.93, "ph": "X"}, {"name": "trans_Cast_34", "pid": 800, "tid": 3, "ts": "1704161511430644.526", "dur": 4.06, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 433, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 368}, "ph": "X"}, {"name": "HostToDevice1584842932223", "ph": "f", "id": "1584842932223", "ts": "1704161511430644.526", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511430644.526", "dur": 4.06, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511430648.586", "dur": 14.14, "ph": "X"}, {"name": "HostToDevice1584842932223", "ph": "s", "cat": "HostToDevice", "id": "1584842932223", "pid": 43767502, "tid": 439084, "ts": "1704161511430656.704"}, {"name": "trans_TransData_35", "pid": 800, "tid": 3, "ts": "1704161511430662.726", "dur": 13.7795, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 434, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 369}, "ph": "X"}, {"name": "HostToDevice1589137899519", "ph": "f", "id": "1589137899519", "ts": "1704161511430662.726", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511430662.726", "dur": 13.7795, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511430676.506", "dur": 28.48, "ph": "X"}, {"name": "HostToDevice1589137899519", "ph": "s", "cat": "HostToDevice", "id": "1589137899519", "pid": 43767502, "tid": 439084, "ts": "1704161511430677.454"}, {"name": "Conv2DBackpropFilter6_MemSet", "pid": 800, "tid": 3, "ts": "1704161511430704.986", "dur": 8.29, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 435, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 370}, "ph": "X"}, {"name": "HostToDevice1593432866815", "ph": "f", "id": "1593432866815", "ts": "1704161511430704.986", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511430704.986", "dur": 8.29, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511430713.276", "dur": 0.2, "ph": "X"}, {"name": "Conv2DBackpropFilter6", "pid": 800, "tid": 3, "ts": "1704161511430713.476", "dur": 15.0695, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 436, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 371}, "ph": "X"}, {"name": "HostToDevice1597727834111", "ph": "f", "id": "1597727834111", "ts": "1704161511430713.476", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511430713.476", "dur": 15.0695, "ph": "X"}, {"name": "HostToDevice1593432866815", "ph": "s", "cat": "HostToDevice", "id": "1593432866815", "pid": 43767502, "tid": 439084, "ts": "1704161511430715.814"}, {"name": "HostToDevice1597727834111", "ph": "s", "cat": "HostToDevice", "id": "1597727834111", "pid": 43767502, "tid": 439084, "ts": "1704161511430727.975"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511430728.545", "dur": 10.69, "ph": "X"}, {"name": "AscendCL@aclnnDivsGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511430738.035", "dur": 21.550215286650715, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivsGetWorkspaceSize", "item_id": "0", "connection_id": 373}, "ph": "X"}, {"name": "trans_TransData_36", "pid": 800, "tid": 3, "ts": "1704161511430739.235", "dur": 11.42, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 437, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 372}, "ph": "X"}, {"name": "HostToDevice1602022801407", "ph": "f", "id": "1602022801407", "ts": "1704161511430739.235", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511430739.235", "dur": 11.42, "ph": "X"}, {"name": "HostToDevice1602022801407", "ph": "s", "cat": "HostToDevice", "id": "1602022801407", "pid": 43767502, "tid": 439084, "ts": "1704161511430749.075"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511430750.655", "dur": 301.677, "ph": "X"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511430773.035", "dur": 317.3531703581719, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 386}, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511430774.535", "dur": 8.70008691386827, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 375}, "ph": "X"}, {"name": "AscendCL@aclnnAddcdivGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511430803.885", "dur": 27.110270831605607, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddcdivGetWorkspaceSize", "item_id": "0", "connection_id": 381}, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511430808.775", "dur": 7.130071229411582, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 376}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511430817.885", "dur": 1.86001858158563, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 377}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511430820.445", "dur": 1.2300122878227553, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 378}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511430822.505", "dur": 0.8700086913868269, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 379}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511430824.365", "dur": 0.6100060939608787, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 380}, "ph": "X"}, {"name": "AscendCL@aclnnDivsGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511430938.186", "dur": 20.490204697144925, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivsGetWorkspaceSize", "item_id": "0", "connection_id": 382}, "ph": "X"}, {"name": "AscendCL@aclnnAddcdivGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511431003.617", "dur": 23.540235166949316, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddcdivGetWorkspaceSize", "item_id": "0", "connection_id": 383}, "ph": "X"}, {"name": "ReduceSum7_MemSet", "pid": 800, "tid": 3, "ts": "1704161511431052.332", "dur": 7.96, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 438, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 384}, "ph": "X"}, {"name": "HostToDevice1653562408959", "ph": "f", "id": "1653562408959", "ts": "1704161511431052.332", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511431052.332", "dur": 7.96, "ph": "X"}, {"name": "HostToDevice1653562408959", "ph": "s", "cat": "HostToDevice", "id": "1653562408959", "pid": 43767502, "tid": 439084, "ts": "1704161511431058.928"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511431060.292", "dur": 10.97, "ph": "X"}, {"name": "ReduceSum7", "pid": 800, "tid": 3, "ts": "1704161511431071.262", "dur": 19.3195, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 439, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 385}, "ph": "X"}, {"name": "HostToDevice1657857376255", "ph": "f", "id": "1657857376255", "ts": "1704161511431071.262", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511431071.262", "dur": 19.3195, "ph": "X"}, {"name": "HostToDevice1657857376255", "ph": "s", "cat": "HostToDevice", "id": "1657857376255", "pid": 43767502, "tid": 439084, "ts": "1704161511431077.838"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511431090.582", "dur": 77.1695, "ph": "X"}, {"name": "AscendCL@aclnnInplaceCopy", "pid": 43767502, "tid": 439084, "ts": "1704161511431108.718", "dur": 20.640206195659893, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceCopy", "item_id": "0", "connection_id": 387}, "ph": "X"}, {"name": "MEMCPY_ASYNC", "pid": 800, "tid": 3, "ts": "1704161511431121.892", "dur": 1.11, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 440, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": -1}, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511431138.119", "dur": 57.58057522994655, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 395}, "ph": "X"}, {"name": "AscendCL@aclnnDivsGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511431141.639", "dur": 22.200221780215585, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivsGetWorkspaceSize", "item_id": "0", "connection_id": 392}, "ph": "X"}, {"name": "AscendCL@Add_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511431147.878", "dur": 16.160161440012786, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Add_Tiling", "item_id": "0", "connection_id": 393}, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511431150.119", "dur": 1.720017182971658, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 388}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511431154.039", "dur": 1.5600155845556898, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 389}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511431156.019", "dur": 0.7900078921788428, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 390}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511431157.159", "dur": 0.42000419584191645, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 391}, "ph": "X"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511431167.751", "dur": 2.83, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 441, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 394}, "ph": "X"}, {"name": "HostToDevice1696512081919", "ph": "f", "id": "1696512081919", "ts": "1704161511431167.751", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511431167.751", "dur": 2.83, "ph": "X"}, {"name": "HostToDevice1696512081919", "ph": "s", "cat": "HostToDevice", "id": "1696512081919", "pid": 43767502, "tid": 439084, "ts": "1704161511431168.359"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511431170.581", "dur": 37.7295, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511431202.059", "dur": 29.400293708934154, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 398}, "ph": "X"}, {"name": "AscendCL@Add_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511431207.049", "dur": 6.260062538024755, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Add_Tiling", "item_id": "0", "connection_id": 396}, "ph": "X"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511431208.310", "dur": 1.42, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 442, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 397}, "ph": "X"}, {"name": "HostToDevice1709396983807", "ph": "f", "id": "1709396983807", "ts": "1704161511431208.310", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511431208.310", "dur": 1.42, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511431209.730", "dur": 648.9735, "ph": "X"}, {"name": "AscendCL@aclnnAddcdivGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511431211.750", "dur": 27.55027522724952, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddcdivGetWorkspaceSize", "item_id": "0", "connection_id": 399}, "ph": "X"}, {"name": "HostToDevice1709396983807", "ph": "s", "cat": "HostToDevice", "id": "1709396983807", "pid": 43767502, "tid": 439084, "ts": "1704161511431215.129"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511431236.740", "dur": 790.5478975734967, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 412}, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511431238.000", "dur": 14.48014465664512, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 400}, "ph": "X"}, {"name": "AscendCL@aclnnDivsGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511431349.821", "dur": 20.870208493382847, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivsGetWorkspaceSize", "item_id": "0", "connection_id": 401}, "ph": "X"}, {"name": "AscendCL@aclnnAddcdivGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511431415.551", "dur": 23.07023047160241, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnAddcdivGetWorkspaceSize", "item_id": "0", "connection_id": 402}, "ph": "X"}, {"name": "AscendCL@aclnnReduceSumGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511431631.564", "dur": 62.8906282773765, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnReduceSumGetWorkspaceSize", "item_id": "0", "connection_id": 403}, "ph": "X"}, {"name": "AscendCL@aclnnCastGetWorkspaceSize", "pid": 43767502, "tid": 437675, "ts": "1704161511431736.575", "dur": 16.070160540903803, "args": {"Thread Id": 437675, "Mode": "ACL_NN", "level": "acl", "id": "aclnnCastGetWorkspaceSize", "item_id": "0", "connection_id": 404}, "ph": "X"}, {"name": "trans_Cast_37", "pid": 800, "tid": 3, "ts": "1704161511431858.704", "dur": 5.43, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 443, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 405}, "ph": "X"}, {"name": "HostToDevice1743756722175", "ph": "f", "id": "1743756722175", "ts": "1704161511431858.704", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511431858.704", "dur": 5.43, "ph": "X"}, {"name": "HostToDevice1743756722175", "ph": "s", "cat": "HostToDevice", "id": "1743756722175", "pid": 43767502, "tid": 439084, "ts": "1704161511431862.766"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511431864.134", "dur": 27.5995, "ph": "X"}, {"name": "trans_TransData_38", "pid": 800, "tid": 3, "ts": "1704161511431891.734", "dur": 8.44, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 444, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 406}, "ph": "X"}, {"name": "HostToDevice1748051689471", "ph": "f", "id": "1748051689471", "ts": "1704161511431891.734", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511431891.734", "dur": 8.44, "ph": "X"}, {"name": "HostToDevice1748051689471", "ph": "s", "cat": "HostToDevice", "id": "1748051689471", "pid": 43767502, "tid": 439084, "ts": "1704161511431900.226"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511431900.174", "dur": 14.11, "ph": "X"}, {"name": "trans_Cast_39", "pid": 800, "tid": 3, "ts": "1704161511431914.284", "dur": 4.11, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 445, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 407}, "ph": "X"}, {"name": "HostToDevice1752346656767", "ph": "f", "id": "1752346656767", "ts": "1704161511431914.284", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511431914.284", "dur": 4.11, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511431918.394", "dur": 18.8995, "ph": "X"}, {"name": "HostToDevice1752346656767", "ph": "s", "cat": "HostToDevice", "id": "1752346656767", "pid": 43767502, "tid": 439084, "ts": "1704161511431926.356"}, {"name": "trans_TransData_40", "pid": 800, "tid": 3, "ts": "1704161511431937.293", "dur": 8.9, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 446, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 408}, "ph": "X"}, {"name": "HostToDevice1756641624063", "ph": "f", "id": "1756641624063", "ts": "1704161511431937.293", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511431937.293", "dur": 8.9, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511431946.193", "dur": 15.29, "ph": "X"}, {"name": "HostToDevice1756641624063", "ph": "s", "cat": "HostToDevice", "id": "1756641624063", "pid": 43767502, "tid": 439084, "ts": "1704161511431947.836"}, {"name": "MaxPoolGradWithArgmaxV18", "pid": 800, "tid": 3, "ts": "1704161511431961.483", "dur": 24.7395, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 447, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 409}, "ph": "X"}, {"name": "HostToDevice1760936591359", "ph": "f", "id": "1760936591359", "ts": "1704161511431961.483", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511431961.483", "dur": 24.7395, "ph": "X"}, {"name": "HostToDevice1760936591359", "ph": "s", "cat": "HostToDevice", "id": "1760936591359", "pid": 43767502, "tid": 439084, "ts": "1704161511431976.447"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511431986.222", "dur": 0.29, "ph": "X"}, {"name": "trans_TransData_41", "pid": 800, "tid": 3, "ts": "1704161511431986.512", "dur": 7.71, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 448, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 410}, "ph": "X"}, {"name": "HostToDevice1765231558655", "ph": "f", "id": "1765231558655", "ts": "1704161511431986.512", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511431986.512", "dur": 7.71, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511431994.222", "dur": 7.44, "ph": "X"}, {"name": "HostToDevice1765231558655", "ph": "s", "cat": "HostToDevice", "id": "1765231558655", "pid": 43767502, "tid": 439084, "ts": "1704161511431999.017"}, {"name": "trans_Cast_42", "pid": 800, "tid": 3, "ts": "1704161511432001.662", "dur": 5.71, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 449, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 411}, "ph": "X"}, {"name": "HostToDevice1769526525951", "ph": "f", "id": "1769526525951", "ts": "1704161511432001.662", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432001.662", "dur": 5.71, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432007.372", "dur": 25.1095, "ph": "X"}, {"name": "HostToDevice1769526525951", "ph": "s", "cat": "HostToDevice", "id": "1769526525951", "pid": 43767502, "tid": 439084, "ts": "1704161511432015.887"}, {"name": "aclnnThresholdBackward_ReluGrad_ReluGrad", "pid": 800, "tid": 3, "ts": "1704161511432032.482", "dur": 11.59, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 450, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 413}, "ph": "X"}, {"name": "HostToDevice1778116460543", "ph": "f", "id": "1778116460543", "ts": "1704161511432032.482", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432032.482", "dur": 11.59, "ph": "X"}, {"name": "AscendCL@aclnnThresholdBackward", "pid": 43767502, "tid": 439084, "ts": "1704161511432039.808", "dur": 10.080100700205994, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnThresholdBackward", "item_id": "0", "connection_id": 414}, "ph": "X"}, {"name": "HostToDevice1778116460543", "ph": "s", "cat": "HostToDevice", "id": "1778116460543", "pid": 43767502, "tid": 439084, "ts": "1704161511432040.698"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432044.072", "dur": 39.2995, "ph": "X"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511432055.558", "dur": 170.31170141389714, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 428}, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511432056.938", "dur": 12.01011998109861, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 415}, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511432076.118", "dur": 8.230082218521362, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 416}, "ph": "X"}, {"name": "trans_Cast_32", "pid": 800, "tid": 3, "ts": "1704161511432083.372", "dur": 4.7, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 451, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 421}, "ph": "X"}, {"name": "HostToDevice1812476198911", "ph": "f", "id": "1812476198911", "ts": "1704161511432083.372", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432083.372", "dur": 4.7, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432086.128", "dur": 1.9100190810906204, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 417}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432088.072", "dur": 21.32, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432088.258", "dur": 1.6300162838626757, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 418}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432090.318", "dur": 1.0800107893077853, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 419}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432092.108", "dur": 1.0900108892087832, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 420}, "ph": "X"}, {"name": "HostToDevice1812476198911", "ph": "s", "cat": "HostToDevice", "id": "1812476198911", "pid": 43767502, "tid": 439084, "ts": "1704161511432092.728"}, {"name": "trans_TransData_33", "pid": 800, "tid": 3, "ts": "1704161511432109.392", "dur": 9.53, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 452, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 422}, "ph": "X"}, {"name": "HostToDevice1816771166207", "ph": "f", "id": "1816771166207", "ts": "1704161511432109.392", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432109.392", "dur": 9.53, "ph": "X"}, {"name": "HostToDevice1816771166207", "ph": "s", "cat": "HostToDevice", "id": "1816771166207", "pid": 43767502, "tid": 439084, "ts": "1704161511432116.988"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432118.922", "dur": 7.64, "ph": "X"}, {"name": "trans_Cast_34", "pid": 800, "tid": 3, "ts": "1704161511432126.562", "dur": 5.7795, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 453, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 423}, "ph": "X"}, {"name": "HostToDevice1821066133503", "ph": "f", "id": "1821066133503", "ts": "1704161511432126.562", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432126.562", "dur": 5.7795, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432132.341", "dur": 12.78, "ph": "X"}, {"name": "HostToDevice1821066133503", "ph": "s", "cat": "HostToDevice", "id": "1821066133503", "pid": 43767502, "tid": 439084, "ts": "1704161511432138.228"}, {"name": "trans_TransData_35", "pid": 800, "tid": 3, "ts": "1704161511432145.121", "dur": 8.87, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 454, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 424}, "ph": "X"}, {"name": "HostToDevice1825361100799", "ph": "f", "id": "1825361100799", "ts": "1704161511432145.121", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432145.121", "dur": 8.87, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432153.991", "dur": 25.4195, "ph": "X"}, {"name": "HostToDevice1825361100799", "ph": "s", "cat": "HostToDevice", "id": "1825361100799", "pid": 43767502, "tid": 439084, "ts": "1704161511432156.669"}, {"name": "Conv2DBackpropFilter6_MemSet", "pid": 800, "tid": 3, "ts": "1704161511432179.410", "dur": 9.84, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 455, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 425}, "ph": "X"}, {"name": "HostToDevice1829656068095", "ph": "f", "id": "1829656068095", "ts": "1704161511432179.410", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432179.410", "dur": 9.84, "ph": "X"}, {"name": "HostToDevice1829656068095", "ph": "s", "cat": "HostToDevice", "id": "1829656068095", "pid": 43767502, "tid": 439084, "ts": "1704161511432188.079"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432189.250", "dur": 0.15, "ph": "X"}, {"name": "Conv2DBackpropFilter6", "pid": 800, "tid": 3, "ts": "1704161511432189.400", "dur": 35.18, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 456, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 426}, "ph": "X"}, {"name": "HostToDevice1833951035391", "ph": "f", "id": "1833951035391", "ts": "1704161511432189.400", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432189.400", "dur": 35.18, "ph": "X"}, {"name": "HostToDevice1833951035391", "ph": "s", "cat": "HostToDevice", "id": "1833951035391", "pid": 43767502, "tid": 439084, "ts": "1704161511432199.149"}, {"name": "HostToDevice1838246002687", "ph": "s", "cat": "HostToDevice", "id": "1838246002687", "pid": 43767502, "tid": 439084, "ts": "1704161511432216.440"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432224.580", "dur": 0.15, "ph": "X"}, {"name": "trans_TransData_36", "pid": 800, "tid": 3, "ts": "1704161511432224.730", "dur": 9.4295, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 457, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 427}, "ph": "X"}, {"name": "HostToDevice1838246002687", "ph": "f", "id": "1838246002687", "ts": "1704161511432224.730", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432224.730", "dur": 9.4295, "ph": "X"}, {"name": "AscendCL@aclopCompileAndExecute", "pid": 43767502, "tid": 439084, "ts": "1704161511432234.100", "dur": 63.06062997569346, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "aclopCompileAndExecute", "item_id": "0", "connection_id": 437}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432234.160", "dur": 26.77, "ph": "X"}, {"name": "AscendCL@opCompile", "pid": 43767502, "tid": 439084, "ts": "1704161511432235.290", "dur": 5.850058442083837, "args": {"Thread Id": 439084, "Mode": "ACL_OP", "level": "acl", "id": "opCompile", "item_id": "0", "connection_id": 429}, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511432257.490", "dur": 9.010090010799207, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 430}, "ph": "X"}, {"name": "ReduceSum7_MemSet", "pid": 800, "tid": 3, "ts": "1704161511432260.930", "dur": 9.23, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 458, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 433}, "ph": "X"}, {"name": "HostToDevice1864015806463", "ph": "f", "id": "1864015806463", "ts": "1704161511432260.930", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432260.930", "dur": 9.23, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432268.150", "dur": 2.930029270992417, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 431}, "ph": "X"}, {"name": "HostToDevice1864015806463", "ph": "s", "cat": "HostToDevice", "id": "1864015806463", "pid": 43767502, "tid": 439084, "ts": "1704161511432269.360"}, {"name": "ReduceSum7", "pid": 800, "tid": 3, "ts": "1704161511432270.310", "dur": 14.4895, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 459, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 436}, "ph": "X"}, {"name": "HostToDevice1876900708351", "ph": "f", "id": "1876900708351", "ts": "1704161511432270.310", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432270.310", "dur": 14.4895, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432270.160", "dur": 0.15, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432271.510", "dur": 1.6000159841596817, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 432}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432274.190", "dur": 1.6000159841596817, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 434}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432279.580", "dur": 0.4300042957429145, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 435}, "ph": "X"}, {"name": "HostToDevice1876900708351", "ph": "s", "cat": "HostToDevice", "id": "1876900708351", "pid": 43767502, "tid": 439084, "ts": "1704161511432284.190"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432284.800", "dur": 69.8395, "ph": "X"}, {"name": "MEMCPY_ASYNC", "pid": 800, "tid": 3, "ts": "1704161511432297.430", "dur": 1.08, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 460, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": -1}, "ph": "X"}, {"name": "AscendCL@aclnnInplaceCopy", "pid": 43767502, "tid": 439084, "ts": "1704161511432306.670", "dur": 12.000119881197614, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceCopy", "item_id": "0", "connection_id": 438}, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511432325.940", "dur": 49.130490813603224, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 445}, "ph": "X"}, {"name": "AscendCL@Add_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511432334.270", "dur": 13.850138362882246, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Add_Tiling", "item_id": "0", "connection_id": 443}, "ph": "X"}, {"name": "AscendCL@aclopDestroyAttr", "pid": 43767502, "tid": 439085, "ts": "1704161511432336.340", "dur": 1.890018881288624, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclopDestroyAttr", "item_id": "0", "connection_id": 439}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432340.310", "dur": 1.3000129871297414, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 440}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432342.331", "dur": 1.4300142858427156, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 441}, "ph": "X"}, {"name": "AscendCL@aclDestroyTensorDesc", "pid": 43767502, "tid": 439085, "ts": "1704161511432344.681", "dur": 0.39000389613892245, "args": {"Thread Id": 439085, "Mode": "ACL_OP", "level": "acl", "id": "aclDestroyTensorDesc", "item_id": "0", "connection_id": 442}, "ph": "X"}, {"name": "HostToDevice1911260446719", "ph": "s", "cat": "HostToDevice", "id": "1911260446719", "pid": 43767502, "tid": 439084, "ts": "1704161511432352.371"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511432354.639", "dur": 1.61, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 461, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 444}, "ph": "X"}, {"name": "HostToDevice1911260446719", "ph": "f", "id": "1911260446719", "ts": "1704161511432354.639", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432354.639", "dur": 1.61, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432356.249", "dur": 30.3995, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511432382.661", "dur": 30.36030329942996, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 448}, "ph": "X"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511432386.648", "dur": 1.4, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 462, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 447}, "ph": "X"}, {"name": "HostToDevice1924145348607", "ph": "f", "id": "1924145348607", "ts": "1704161511432386.648", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432386.648", "dur": 1.4, "ph": "X"}, {"name": "AscendCL@Add_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511432387.551", "dur": 6.740067333272659, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Add_Tiling", "item_id": "0", "connection_id": 446}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432388.048", "dur": 25.71, "ph": "X"}, {"name": "HostToDevice1924145348607", "ph": "s", "cat": "HostToDevice", "id": "1924145348607", "pid": 43767502, "tid": 439084, "ts": "1704161511432396.041"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511432413.758", "dur": 1.59, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 463, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 449}, "ph": "X"}, {"name": "HostToDevice1932735283199", "ph": "f", "id": "1932735283199", "ts": "1704161511432413.758", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432413.758", "dur": 1.59, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432415.348", "dur": 6.92, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511432422.011", "dur": 8.500084915848308, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 450}, "ph": "X"}, {"name": "aclnnAdd_AxpyAiCore_Axpy", "pid": 800, "tid": 3, "ts": "1704161511432422.268", "dur": 1.63, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 464, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 451}, "ph": "X"}, {"name": "HostToDevice1941325217791", "ph": "f", "id": "1941325217791", "ts": "1704161511432422.268", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432422.268", "dur": 1.63, "ph": "X"}, {"name": "HostToDevice1932735283199", "ph": "s", "cat": "HostToDevice", "id": "1932735283199", "pid": 43767502, "tid": 439084, "ts": "1704161511432422.761"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432423.898", "dur": 10.6495, "ph": "X"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511432434.548", "dur": 1.59, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 465, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 453}, "ph": "X"}, {"name": "HostToDevice1949915152383", "ph": "f", "id": "1949915152383", "ts": "1704161511432434.548", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432434.548", "dur": 1.59, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511432434.691", "dur": 6.360063537034735, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 452}, "ph": "X"}, {"name": "HostToDevice1941325217791", "ph": "s", "cat": "HostToDevice", "id": "1941325217791", "pid": 43767502, "tid": 439084, "ts": "1704161511432435.122"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432436.138", "dur": 8.4, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511432444.372", "dur": 6.510065035549705, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 454}, "ph": "X"}, {"name": "aclnnAddcmul_AddcmulAiCore_Addcmul", "pid": 800, "tid": 3, "ts": "1704161511432444.538", "dur": 4.41, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 466, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 455}, "ph": "X"}, {"name": "HostToDevice1958505086975", "ph": "f", "id": "1958505086975", "ts": "1704161511432444.538", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432444.538", "dur": 4.41, "ph": "X"}, {"name": "HostToDevice1949915152383", "ph": "s", "cat": "HostToDevice", "id": "1949915152383", "pid": 43767502, "tid": 439084, "ts": "1704161511432445.191"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432448.948", "dur": 6.36, "ph": "X"}, {"name": "aclnnSqrt_SqrtAiCore_Sqrt", "pid": 800, "tid": 3, "ts": "1704161511432455.308", "dur": 1.7, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 467, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 457}, "ph": "X"}, {"name": "HostToDevice1967095021567", "ph": "f", "id": "1967095021567", "ts": "1704161511432455.308", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432455.308", "dur": 1.7, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcmul", "pid": 43767502, "tid": 439084, "ts": "1704161511432457.072", "dur": 6.9900698307976095, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcmul", "item_id": "0", "connection_id": 456}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432457.008", "dur": 39.9695, "ph": "X"}, {"name": "HostToDevice1958505086975", "ph": "s", "cat": "HostToDevice", "id": "1958505086975", "pid": 43767502, "tid": 439084, "ts": "1704161511432457.782"}, {"name": "AscendCL@aclnnSqrt", "pid": 43767502, "tid": 439084, "ts": "1704161511432471.662", "dur": 6.140061339212778, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnSqrt", "item_id": "0", "connection_id": 458}, "ph": "X"}, {"name": "HostToDevice1967095021567", "ph": "s", "cat": "HostToDevice", "id": "1967095021567", "pid": 43767502, "tid": 439084, "ts": "1704161511432472.222"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511432485.042", "dur": 37.10037063270262, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 461}, "ph": "X"}, {"name": "AscendCL@RealDiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511432493.122", "dur": 8.080080720006393, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "RealDiv_Tiling", "item_id": "0", "connection_id": 459}, "ph": "X"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511432496.978", "dur": 1.87, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 468, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 460}, "ph": "X"}, {"name": "HostToDevice1979979923455", "ph": "f", "id": "1979979923455", "ts": "1704161511432496.978", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432496.978", "dur": 1.87, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432498.848", "dur": 16.15, "ph": "X"}, {"name": "HostToDevice1979979923455", "ph": "s", "cat": "HostToDevice", "id": "1979979923455", "pid": 43767502, "tid": 439084, "ts": "1704161511432503.672"}, {"name": "aclnnAdds_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511432514.998", "dur": 1.73, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 469, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 462}, "ph": "X"}, {"name": "HostToDevice1988569858047", "ph": "f", "id": "1988569858047", "ts": "1704161511432514.998", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432514.998", "dur": 1.73, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432516.728", "dur": 38.8295, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdds", "pid": 43767502, "tid": 439084, "ts": "1704161511432529.373", "dur": 7.170071629015574, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdds", "item_id": "0", "connection_id": 463}, "ph": "X"}, {"name": "HostToDevice1988569858047", "ph": "s", "cat": "HostToDevice", "id": "1988569858047", "pid": 43767502, "tid": 439084, "ts": "1704161511432529.962"}, {"name": "AscendCL@aclnnInplaceAddcdiv", "pid": 43767502, "tid": 439084, "ts": "1704161511432542.152", "dur": 40.230401901715, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcdiv", "item_id": "0", "connection_id": 466}, "ph": "X"}, {"name": "AscendCL@Addcdiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511432551.852", "dur": 9.10009090990819, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Addcdiv_Tiling", "item_id": "0", "connection_id": 464}, "ph": "X"}, {"name": "aclnnAddcdiv_AddcdivAiCore_Addcdiv", "pid": 800, "tid": 3, "ts": "1704161511432555.557", "dur": 4.03, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 470, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 465}, "ph": "X"}, {"name": "HostToDevice2001454759935", "ph": "f", "id": "2001454759935", "ts": "1704161511432555.557", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432555.557", "dur": 4.03, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432559.587", "dur": 19.5795, "ph": "X"}, {"name": "HostToDevice2001454759935", "ph": "s", "cat": "HostToDevice", "id": "2001454759935", "pid": 43767502, "tid": 439084, "ts": "1704161511432563.503"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511432579.166", "dur": 1.51, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 471, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 467}, "ph": "X"}, {"name": "HostToDevice2010044694527", "ph": "f", "id": "2010044694527", "ts": "1704161511432579.166", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432579.166", "dur": 1.51, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432580.676", "dur": 6.74, "ph": "X"}, {"name": "aclnnAdd_AxpyAiCore_Axpy", "pid": 800, "tid": 3, "ts": "1704161511432587.416", "dur": 1.47, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 472, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 469}, "ph": "X"}, {"name": "HostToDevice2018634629119", "ph": "f", "id": "2018634629119", "ts": "1704161511432587.416", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432587.416", "dur": 1.47, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511432588.743", "dur": 7.24007232832256, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 468}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432588.886", "dur": 8.7, "ph": "X"}, {"name": "HostToDevice2010044694527", "ph": "s", "cat": "HostToDevice", "id": "2010044694527", "pid": 43767502, "tid": 439084, "ts": "1704161511432589.223"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511432597.586", "dur": 1.48, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 473, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 471}, "ph": "X"}, {"name": "HostToDevice2027224563711", "ph": "f", "id": "2027224563711", "ts": "1704161511432597.586", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432597.586", "dur": 1.48, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432599.066", "dur": 7.29, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511432599.183", "dur": 6.650066434163677, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 470}, "ph": "X"}, {"name": "HostToDevice2018634629119", "ph": "s", "cat": "HostToDevice", "id": "2018634629119", "pid": 43767502, "tid": 439084, "ts": "1704161511432599.713"}, {"name": "aclnnAddcmul_AddcmulAiCore_Addcmul", "pid": 800, "tid": 3, "ts": "1704161511432606.356", "dur": 3.87, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 474, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 473}, "ph": "X"}, {"name": "HostToDevice2035814498303", "ph": "f", "id": "2035814498303", "ts": "1704161511432606.356", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432606.356", "dur": 3.87, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511432609.163", "dur": 6.340063337232739, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 472}, "ph": "X"}, {"name": "HostToDevice2027224563711", "ph": "s", "cat": "HostToDevice", "id": "2027224563711", "pid": 43767502, "tid": 439084, "ts": "1704161511432609.573"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432610.226", "dur": 4.76, "ph": "X"}, {"name": "aclnnSqrt_SqrtAiCore_Sqrt", "pid": 800, "tid": 3, "ts": "1704161511432614.986", "dur": 1.4, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 475, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 475}, "ph": "X"}, {"name": "HostToDevice2044404432895", "ph": "f", "id": "2044404432895", "ts": "1704161511432614.986", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432614.986", "dur": 1.4, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432616.386", "dur": 24.6695, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcmul", "pid": 43767502, "tid": 439084, "ts": "1704161511432618.983", "dur": 6.310063037529745, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcmul", "item_id": "0", "connection_id": 474}, "ph": "X"}, {"name": "HostToDevice2035814498303", "ph": "s", "cat": "HostToDevice", "id": "2035814498303", "pid": 43767502, "tid": 439084, "ts": "1704161511432619.544"}, {"name": "AscendCL@aclnnSqrt", "pid": 43767502, "tid": 439084, "ts": "1704161511432629.343", "dur": 5.760057542974854, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnSqrt", "item_id": "0", "connection_id": 476}, "ph": "X"}, {"name": "HostToDevice2044404432895", "ph": "s", "cat": "HostToDevice", "id": "2044404432895", "pid": 43767502, "tid": 439084, "ts": "1704161511432629.903"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511432638.704", "dur": 26.270262439921776, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 479}, "ph": "X"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511432641.056", "dur": 1.46, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 476, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 478}, "ph": "X"}, {"name": "HostToDevice2057289334783", "ph": "f", "id": "2057289334783", "ts": "1704161511432641.056", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432641.056", "dur": 1.46, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432642.516", "dur": 12.48, "ph": "X"}, {"name": "AscendCL@RealDiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511432643.404", "dur": 5.370053646835932, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "RealDiv_Tiling", "item_id": "0", "connection_id": 477}, "ph": "X"}, {"name": "HostToDevice2057289334783", "ph": "s", "cat": "HostToDevice", "id": "2057289334783", "pid": 43767502, "tid": 439084, "ts": "1704161511432650.344"}, {"name": "aclnnAdds_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511432654.996", "dur": 1.49, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 477, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 480}, "ph": "X"}, {"name": "HostToDevice2065879269375", "ph": "f", "id": "2065879269375", "ts": "1704161511432654.996", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432654.996", "dur": 1.49, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432656.486", "dur": 26.4495, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdds", "pid": 43767502, "tid": 439084, "ts": "1704161511432669.504", "dur": 6.620066134460683, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdds", "item_id": "0", "connection_id": 481}, "ph": "X"}, {"name": "HostToDevice2065879269375", "ph": "s", "cat": "HostToDevice", "id": "2065879269375", "pid": 43767502, "tid": 439084, "ts": "1704161511432669.944"}, {"name": "AscendCL@aclnnInplaceAddcdiv", "pid": 43767502, "tid": 439084, "ts": "1704161511432679.574", "dur": 27.85027822427946, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcdiv", "item_id": "0", "connection_id": 484}, "ph": "X"}, {"name": "aclnnAddcdiv_AddcdivAiCore_Addcdiv", "pid": 800, "tid": 3, "ts": "1704161511432682.936", "dur": 3.47, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 478, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 483}, "ph": "X"}, {"name": "HostToDevice2078764171263", "ph": "f", "id": "2078764171263", "ts": "1704161511432682.936", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432682.936", "dur": 3.47, "ph": "X"}, {"name": "AscendCL@Addcdiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511432684.574", "dur": 5.740057343172858, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Addcdiv_Tiling", "item_id": "0", "connection_id": 482}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432686.406", "dur": 13.41, "ph": "X"}, {"name": "HostToDevice2078764171263", "ph": "s", "cat": "HostToDevice", "id": "2078764171263", "pid": 43767502, "tid": 439084, "ts": "1704161511432692.384"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511432699.816", "dur": 2.96, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 479, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 485}, "ph": "X"}, {"name": "HostToDevice2087354105855", "ph": "f", "id": "2087354105855", "ts": "1704161511432699.816", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432699.816", "dur": 2.96, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432702.776", "dur": 12.42, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511432712.334", "dur": 6.9900698307976095, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 486}, "ph": "X"}, {"name": "HostToDevice2087354105855", "ph": "s", "cat": "HostToDevice", "id": "2087354105855", "pid": 43767502, "tid": 439084, "ts": "1704161511432713.154"}, {"name": "aclnnAdd_AxpyAiCore_Axpy", "pid": 800, "tid": 3, "ts": "1704161511432715.196", "dur": 2.95, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 480, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 487}, "ph": "X"}, {"name": "HostToDevice2095944040447", "ph": "f", "id": "2095944040447", "ts": "1704161511432715.196", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432715.196", "dur": 2.95, "ph": "X"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511432718.256", "dur": 3.49, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 481, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 489}, "ph": "X"}, {"name": "HostToDevice2104533975039", "ph": "f", "id": "2104533975039", "ts": "1704161511432718.256", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432718.256", "dur": 3.49, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432718.146", "dur": 0.11, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432721.746", "dur": 12.2195, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511432722.354", "dur": 6.6800667338666715, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 488}, "ph": "X"}, {"name": "HostToDevice2095944040447", "ph": "s", "cat": "HostToDevice", "id": "2095944040447", "pid": 43767502, "tid": 439084, "ts": "1704161511432722.884"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511432732.094", "dur": 5.73005724327186, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 490}, "ph": "X"}, {"name": "HostToDevice2104533975039", "ph": "s", "cat": "HostToDevice", "id": "2104533975039", "pid": 43767502, "tid": 439084, "ts": "1704161511432732.555"}, {"name": "aclnnAddcmul_AddcmulAiCore_Addcmul", "pid": 800, "tid": 3, "ts": "1704161511432733.965", "dur": 6.45, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 482, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 491}, "ph": "X"}, {"name": "HostToDevice2113123909631", "ph": "f", "id": "2113123909631", "ts": "1704161511432733.965", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432733.965", "dur": 6.45, "ph": "X"}, {"name": "aclnnSqrt_SqrtAiCore_Sqrt", "pid": 800, "tid": 3, "ts": "1704161511432740.525", "dur": 2.91, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 483, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 493}, "ph": "X"}, {"name": "HostToDevice2121713844223", "ph": "f", "id": "2121713844223", "ts": "1704161511432740.525", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432740.525", "dur": 2.91, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432740.415", "dur": 0.11, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcmul", "pid": 43767502, "tid": 439084, "ts": "1704161511432741.285", "dur": 6.160061539014775, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcmul", "item_id": "0", "connection_id": 492}, "ph": "X"}, {"name": "HostToDevice2113123909631", "ph": "s", "cat": "HostToDevice", "id": "2113123909631", "pid": 43767502, "tid": 439084, "ts": "1704161511432741.885"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432743.435", "dur": 24.18, "ph": "X"}, {"name": "AscendCL@aclnnSqrt", "pid": 43767502, "tid": 439084, "ts": "1704161511432751.084", "dur": 5.450054446043916, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnSqrt", "item_id": "0", "connection_id": 494}, "ph": "X"}, {"name": "HostToDevice2121713844223", "ph": "s", "cat": "HostToDevice", "id": "2121713844223", "pid": 43767502, "tid": 439084, "ts": "1704161511432751.435"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511432759.735", "dur": 24.910248853386044, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 497}, "ph": "X"}, {"name": "AscendCL@RealDiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511432763.755", "dur": 5.350053447033935, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "RealDiv_Tiling", "item_id": "0", "connection_id": 495}, "ph": "X"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511432767.615", "dur": 3.67, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 484, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 496}, "ph": "X"}, {"name": "HostToDevice2134598746111", "ph": "f", "id": "2134598746111", "ts": "1704161511432767.615", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432767.615", "dur": 3.67, "ph": "X"}, {"name": "HostToDevice2134598746111", "ph": "s", "cat": "HostToDevice", "id": "2134598746111", "pid": 43767502, "tid": 439084, "ts": "1704161511432770.755"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432771.285", "dur": 5.9795, "ph": "X"}, {"name": "aclnnAdds_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511432777.264", "dur": 3.59, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 485, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 498}, "ph": "X"}, {"name": "HostToDevice2143188680703", "ph": "f", "id": "2143188680703", "ts": "1704161511432777.264", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432777.264", "dur": 3.59, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432780.854", "dur": 16.23, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdds", "pid": 43767502, "tid": 439084, "ts": "1704161511432789.215", "dur": 7.44007432634252, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdds", "item_id": "0", "connection_id": 499}, "ph": "X"}, {"name": "HostToDevice2143188680703", "ph": "s", "cat": "HostToDevice", "id": "2143188680703", "pid": 43767502, "tid": 439084, "ts": "1704161511432789.805"}, {"name": "aclnnAddcdiv_AddcdivAiCore_Addcdiv", "pid": 800, "tid": 3, "ts": "1704161511432797.084", "dur": 5.41, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 486, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 501}, "ph": "X"}, {"name": "HostToDevice2156073582591", "ph": "f", "id": "2156073582591", "ts": "1704161511432797.084", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432797.084", "dur": 5.41, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcdiv", "pid": 43767502, "tid": 439084, "ts": "1704161511432800.145", "dur": 27.330273029427566, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcdiv", "item_id": "0", "connection_id": 502}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432802.494", "dur": 14.19, "ph": "X"}, {"name": "AscendCL@Addcdiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511432804.605", "dur": 5.710057043469864, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Addcdiv_Tiling", "item_id": "0", "connection_id": 500}, "ph": "X"}, {"name": "HostToDevice2156073582591", "ph": "s", "cat": "HostToDevice", "id": "2156073582591", "pid": 43767502, "tid": 439084, "ts": "1704161511432812.045"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511432816.684", "dur": 1.43, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 487, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 503}, "ph": "X"}, {"name": "HostToDevice2164663517183", "ph": "f", "id": "2164663517183", "ts": "1704161511432816.684", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432816.684", "dur": 1.43, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432818.114", "dur": 18.0095, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511432832.396", "dur": 7.180071728916571, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 504}, "ph": "X"}, {"name": "HostToDevice2164663517183", "ph": "s", "cat": "HostToDevice", "id": "2164663517183", "pid": 43767502, "tid": 439084, "ts": "1704161511432832.706"}, {"name": "aclnnAdd_AxpyAiCore_Axpy", "pid": 800, "tid": 3, "ts": "1704161511432836.124", "dur": 1.43, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 488, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 505}, "ph": "X"}, {"name": "HostToDevice2173253451775", "ph": "f", "id": "2173253451775", "ts": "1704161511432836.124", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432836.124", "dur": 1.43, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432837.554", "dur": 0.12, "ph": "X"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511432837.674", "dur": 1.34, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 489, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 507}, "ph": "X"}, {"name": "HostToDevice2181843386367", "ph": "f", "id": "2181843386367", "ts": "1704161511432837.674", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432837.674", "dur": 1.34, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432839.014", "dur": 11.15, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511432842.866", "dur": 6.220062138420763, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 506}, "ph": "X"}, {"name": "HostToDevice2173253451775", "ph": "s", "cat": "HostToDevice", "id": "2173253451775", "pid": 43767502, "tid": 439084, "ts": "1704161511432843.206"}, {"name": "aclnnAddcmul_AddcmulAiCore_Addcmul", "pid": 800, "tid": 3, "ts": "1704161511432850.164", "dur": 3.55, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 490, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 509}, "ph": "X"}, {"name": "HostToDevice2190433320959", "ph": "f", "id": "2190433320959", "ts": "1704161511432850.164", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432850.164", "dur": 3.55, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511432852.536", "dur": 5.660056543964875, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 508}, "ph": "X"}, {"name": "HostToDevice2181843386367", "ph": "s", "cat": "HostToDevice", "id": "2181843386367", "pid": 43767502, "tid": 439084, "ts": "1704161511432852.966"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432853.714", "dur": 13.2, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcmul", "pid": 43767502, "tid": 439084, "ts": "1704161511432861.446", "dur": 6.170061638915772, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcmul", "item_id": "0", "connection_id": 510}, "ph": "X"}, {"name": "HostToDevice2190433320959", "ph": "s", "cat": "HostToDevice", "id": "2190433320959", "pid": 43767502, "tid": 439084, "ts": "1704161511432861.956"}, {"name": "aclnnSqrt_SqrtAiCore_Sqrt", "pid": 800, "tid": 3, "ts": "1704161511432866.914", "dur": 1.33, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 491, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 511}, "ph": "X"}, {"name": "HostToDevice2199023255551", "ph": "f", "id": "2199023255551", "ts": "1704161511432866.914", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432866.914", "dur": 1.33, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432868.244", "dur": 26.0795, "ph": "X"}, {"name": "AscendCL@aclnnSqrt", "pid": 43767502, "tid": 439084, "ts": "1704161511432871.026", "dur": 17.930179122489434, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnSqrt", "item_id": "0", "connection_id": 512}, "ph": "X"}, {"name": "HostToDevice2199023255551", "ph": "s", "cat": "HostToDevice", "id": "2199023255551", "pid": 43767502, "tid": 439084, "ts": "1704161511432871.696"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511432892.606", "dur": 26.270262439921776, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 515}, "ph": "X"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511432894.324", "dur": 1.51, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 492, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 514}, "ph": "X"}, {"name": "HostToDevice2211908157439", "ph": "f", "id": "2211908157439", "ts": "1704161511432894.324", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432894.324", "dur": 1.51, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432895.834", "dur": 15.04, "ph": "X"}, {"name": "AscendCL@RealDiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511432896.976", "dur": 5.440054346142918, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "RealDiv_Tiling", "item_id": "0", "connection_id": 513}, "ph": "X"}, {"name": "HostToDevice2211908157439", "ph": "s", "cat": "HostToDevice", "id": "2211908157439", "pid": 43767502, "tid": 439084, "ts": "1704161511432904.086"}, {"name": "aclnnAdds_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511432910.874", "dur": 1.39, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 493, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 516}, "ph": "X"}, {"name": "HostToDevice2220498092031", "ph": "f", "id": "2220498092031", "ts": "1704161511432910.874", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432910.874", "dur": 1.39, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432912.264", "dur": 24.0095, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdds", "pid": 43767502, "tid": 439084, "ts": "1704161511432923.727", "dur": 6.660066534064675, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdds", "item_id": "0", "connection_id": 517}, "ph": "X"}, {"name": "HostToDevice2220498092031", "ph": "s", "cat": "HostToDevice", "id": "2220498092031", "pid": 43767502, "tid": 439084, "ts": "1704161511432924.376"}, {"name": "AscendCL@aclnnInplaceAddcdiv", "pid": 43767502, "tid": 439084, "ts": "1704161511432934.727", "dur": 26.710266835565687, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcdiv", "item_id": "0", "connection_id": 520}, "ph": "X"}, {"name": "aclnnAddcdiv_AddcdivAiCore_Addcdiv", "pid": 800, "tid": 3, "ts": "1704161511432936.273", "dur": 3.4, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 494, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 519}, "ph": "X"}, {"name": "HostToDevice2233382993919", "ph": "f", "id": "2233382993919", "ts": "1704161511432936.273", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432936.273", "dur": 3.4, "ph": "X"}, {"name": "AscendCL@Addcdiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511432939.436", "dur": 5.250052448023956, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Addcdiv_Tiling", "item_id": "0", "connection_id": 518}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432939.673", "dur": 11.91, "ph": "X"}, {"name": "HostToDevice2233382993919", "ph": "s", "cat": "HostToDevice", "id": "2233382993919", "pid": 43767502, "tid": 439084, "ts": "1704161511432946.407"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511432951.583", "dur": 6.22, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 495, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 521}, "ph": "X"}, {"name": "HostToDevice2241972928511", "ph": "f", "id": "2241972928511", "ts": "1704161511432951.583", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432951.583", "dur": 6.22, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432957.803", "dur": 7.15, "ph": "X"}, {"name": "aclnnAdd_AxpyAiCore_Axpy", "pid": 800, "tid": 3, "ts": "1704161511432964.953", "dur": 5.26, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 496, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 523}, "ph": "X"}, {"name": "HostToDevice2250562863103", "ph": "f", "id": "2250562863103", "ts": "1704161511432964.953", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432964.953", "dur": 5.26, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511432965.897", "dur": 6.8000679326786475, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 522}, "ph": "X"}, {"name": "HostToDevice2241972928511", "ph": "s", "cat": "HostToDevice", "id": "2241972928511", "pid": 43767502, "tid": 439084, "ts": "1704161511432966.387"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511432970.373", "dur": 5.94, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 497, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 525}, "ph": "X"}, {"name": "HostToDevice2259152797695", "ph": "f", "id": "2259152797695", "ts": "1704161511432970.373", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432970.373", "dur": 5.94, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432970.213", "dur": 0.16, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511432975.617", "dur": 5.470054645845912, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 524}, "ph": "X"}, {"name": "HostToDevice2250562863103", "ph": "s", "cat": "HostToDevice", "id": "2250562863103", "pid": 43767502, "tid": 439084, "ts": "1704161511432976.057"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432976.313", "dur": 7.7395, "ph": "X"}, {"name": "aclnnAddcmul_AddcmulAiCore_Addcmul", "pid": 800, "tid": 3, "ts": "1704161511432984.052", "dur": 10.05, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 498, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 527}, "ph": "X"}, {"name": "HostToDevice2267742732287", "ph": "f", "id": "2267742732287", "ts": "1704161511432984.052", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432984.052", "dur": 10.05, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511432985.217", "dur": 5.790057842677848, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 526}, "ph": "X"}, {"name": "HostToDevice2259152797695", "ph": "s", "cat": "HostToDevice", "id": "2259152797695", "pid": 43767502, "tid": 439084, "ts": "1704161511432985.647"}, {"name": "AscendCL@aclnnInplaceAddcmul", "pid": 43767502, "tid": 439084, "ts": "1704161511432993.937", "dur": 6.450064436143718, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcmul", "item_id": "0", "connection_id": 528}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432994.102", "dur": 0.11, "ph": "X"}, {"name": "aclnnSqrt_SqrtAiCore_Sqrt", "pid": 800, "tid": 3, "ts": "1704161511432994.212", "dur": 5.24, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 499, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 529}, "ph": "X"}, {"name": "HostToDevice2276332666879", "ph": "f", "id": "2276332666879", "ts": "1704161511432994.212", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511432994.212", "dur": 5.24, "ph": "X"}, {"name": "HostToDevice2267742732287", "ph": "s", "cat": "HostToDevice", "id": "2267742732287", "pid": 43767502, "tid": 439084, "ts": "1704161511432994.787"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511432999.452", "dur": 12.54, "ph": "X"}, {"name": "AscendCL@aclnnSqrt", "pid": 43767502, "tid": 439084, "ts": "1704161511433003.517", "dur": 5.400053946538926, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnSqrt", "item_id": "0", "connection_id": 530}, "ph": "X"}, {"name": "HostToDevice2276332666879", "ph": "s", "cat": "HostToDevice", "id": "2276332666879", "pid": 43767502, "tid": 439084, "ts": "1704161511433003.857"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511433011.747", "dur": 23.72023696516728, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 533}, "ph": "X"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511433011.992", "dur": 6.86, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 500, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 532}, "ph": "X"}, {"name": "HostToDevice2289217568767", "ph": "f", "id": "2289217568767", "ts": "1704161511433011.992", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433011.992", "dur": 6.86, "ph": "X"}, {"name": "AscendCL@RealDiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433015.637", "dur": 4.790047852578047, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "RealDiv_Tiling", "item_id": "0", "connection_id": 531}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433018.852", "dur": 6.4, "ph": "X"}, {"name": "HostToDevice2289217568767", "ph": "s", "cat": "HostToDevice", "id": "2289217568767", "pid": 43767502, "tid": 439084, "ts": "1704161511433021.998"}, {"name": "aclnnAdds_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511433025.252", "dur": 6.5495, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 501, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 534}, "ph": "X"}, {"name": "HostToDevice2297807503359", "ph": "f", "id": "2297807503359", "ts": "1704161511433025.252", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433025.252", "dur": 6.5495, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433031.802", "dur": 13.27, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdds", "pid": 43767502, "tid": 439084, "ts": "1704161511433039.978", "dur": 6.550065435153697, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdds", "item_id": "0", "connection_id": 535}, "ph": "X"}, {"name": "HostToDevice2297807503359", "ph": "s", "cat": "HostToDevice", "id": "2297807503359", "pid": 43767502, "tid": 439084, "ts": "1704161511433040.627"}, {"name": "aclnnAddcdiv_AddcdivAiCore_Addcdiv", "pid": 800, "tid": 3, "ts": "1704161511433045.072", "dur": 8.44, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 502, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 537}, "ph": "X"}, {"name": "HostToDevice2310692405247", "ph": "f", "id": "2310692405247", "ts": "1704161511433045.072", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433045.072", "dur": 8.44, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcdiv", "pid": 43767502, "tid": 439084, "ts": "1704161511433050.428", "dur": 26.6502662361597, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcdiv", "item_id": "0", "connection_id": 538}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433053.512", "dur": 26.7895, "ph": "X"}, {"name": "AscendCL@Addcdiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433054.728", "dur": 5.260052547924953, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Addcdiv_Tiling", "item_id": "0", "connection_id": 536}, "ph": "X"}, {"name": "HostToDevice2310692405247", "ph": "s", "cat": "HostToDevice", "id": "2310692405247", "pid": 43767502, "tid": 439084, "ts": "1704161511433061.928"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511433080.302", "dur": 1.6, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 503, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 539}, "ph": "X"}, {"name": "HostToDevice2319282339839", "ph": "f", "id": "2319282339839", "ts": "1704161511433080.302", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433080.302", "dur": 1.6, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511433081.688", "dur": 6.530065235351701, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 540}, "ph": "X"}, {"name": "HostToDevice2319282339839", "ph": "s", "cat": "HostToDevice", "id": "2319282339839", "pid": 43767502, "tid": 439084, "ts": "1704161511433082.108"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433081.902", "dur": 11.03, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511433091.558", "dur": 5.610056044459884, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 542}, "ph": "X"}, {"name": "HostToDevice2327872274431", "ph": "s", "cat": "HostToDevice", "id": "2327872274431", "pid": 43767502, "tid": 439084, "ts": "1704161511433092.128"}, {"name": "aclnnAdd_AxpyAiCore_Axpy", "pid": 800, "tid": 3, "ts": "1704161511433092.932", "dur": 1.59, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 504, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 541}, "ph": "X"}, {"name": "HostToDevice2327872274431", "ph": "f", "id": "2327872274431", "ts": "1704161511433092.932", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433092.932", "dur": 1.59, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433094.522", "dur": 0.12, "ph": "X"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511433094.642", "dur": 1.57, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 505, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 543}, "ph": "X"}, {"name": "HostToDevice2336462209023", "ph": "f", "id": "2336462209023", "ts": "1704161511433094.642", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433094.642", "dur": 1.57, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433096.212", "dur": 25.53, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511433101.018", "dur": 5.590055844657888, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 544}, "ph": "X"}, {"name": "HostToDevice2336462209023", "ph": "s", "cat": "HostToDevice", "id": "2336462209023", "pid": 43767502, "tid": 439084, "ts": "1704161511433101.498"}, {"name": "AscendCL@aclnnInplaceAddcmul", "pid": 43767502, "tid": 439084, "ts": "1704161511433110.648", "dur": 6.0800607398067905, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcmul", "item_id": "0", "connection_id": 546}, "ph": "X"}, {"name": "HostToDevice2345052143615", "ph": "s", "cat": "HostToDevice", "id": "2345052143615", "pid": 43767502, "tid": 439084, "ts": "1704161511433111.138"}, {"name": "AscendCL@aclnnSqrt", "pid": 43767502, "tid": 439084, "ts": "1704161511433119.938", "dur": 5.030050250202, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnSqrt", "item_id": "0", "connection_id": 548}, "ph": "X"}, {"name": "HostToDevice2353642078207", "ph": "s", "cat": "HostToDevice", "id": "2353642078207", "pid": 43767502, "tid": 439084, "ts": "1704161511433120.438"}, {"name": "aclnnAddcmul_AddcmulAiCore_Addcmul", "pid": 800, "tid": 3, "ts": "1704161511433121.742", "dur": 4.02, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 506, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 545}, "ph": "X"}, {"name": "HostToDevice2345052143615", "ph": "f", "id": "2345052143615", "ts": "1704161511433121.742", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433121.742", "dur": 4.02, "ph": "X"}, {"name": "aclnnSqrt_SqrtAiCore_Sqrt", "pid": 800, "tid": 3, "ts": "1704161511433125.872", "dur": 1.58, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 507, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 547}, "ph": "X"}, {"name": "HostToDevice2353642078207", "ph": "f", "id": "2353642078207", "ts": "1704161511433125.872", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433125.872", "dur": 1.58, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433125.762", "dur": 0.11, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433127.452", "dur": 5.2195, "ph": "X"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511433127.738", "dur": 23.170231470612393, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 551}, "ph": "X"}, {"name": "AscendCL@RealDiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433131.579", "dur": 4.780047752677049, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "RealDiv_Tiling", "item_id": "0", "connection_id": 549}, "ph": "X"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511433132.671", "dur": 1.64, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 508, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 550}, "ph": "X"}, {"name": "HostToDevice2366526980095", "ph": "f", "id": "2366526980095", "ts": "1704161511433132.671", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433132.671", "dur": 1.64, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433134.311", "dur": 5.06, "ph": "X"}, {"name": "HostToDevice2366526980095", "ph": "s", "cat": "HostToDevice", "id": "2366526980095", "pid": 43767502, "tid": 439084, "ts": "1704161511433138.008"}, {"name": "aclnnAdds_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511433139.371", "dur": 1.58, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 509, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 552}, "ph": "X"}, {"name": "HostToDevice2375116914687", "ph": "f", "id": "2375116914687", "ts": "1704161511433139.371", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433139.371", "dur": 1.58, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433140.951", "dur": 20.81, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdds", "pid": 43767502, "tid": 439084, "ts": "1704161511433155.559", "dur": 6.64006633426268, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdds", "item_id": "0", "connection_id": 553}, "ph": "X"}, {"name": "HostToDevice2375116914687", "ph": "s", "cat": "HostToDevice", "id": "2375116914687", "pid": 43767502, "tid": 439084, "ts": "1704161511433155.998"}, {"name": "aclnnAddcdiv_AddcdivAiCore_Addcdiv", "pid": 800, "tid": 3, "ts": "1704161511433161.761", "dur": 3.94, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 510, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 555}, "ph": "X"}, {"name": "HostToDevice2388001816575", "ph": "f", "id": "2388001816575", "ts": "1704161511433161.761", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433161.761", "dur": 3.94, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433165.701", "dur": 18.2895, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcdiv", "pid": 43767502, "tid": 439084, "ts": "1704161511433166.169", "dur": 26.35026323912976, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcdiv", "item_id": "0", "connection_id": 556}, "ph": "X"}, {"name": "AscendCL@Addcdiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433170.589", "dur": 5.200051948518966, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Addcdiv_Tiling", "item_id": "0", "connection_id": 554}, "ph": "X"}, {"name": "HostToDevice2388001816575", "ph": "s", "cat": "HostToDevice", "id": "2388001816575", "pid": 43767502, "tid": 439084, "ts": "1704161511433177.299"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511433183.990", "dur": 2.48, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 511, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 557}, "ph": "X"}, {"name": "HostToDevice2396591751167", "ph": "f", "id": "2396591751167", "ts": "1704161511433183.990", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433183.990", "dur": 2.48, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433186.470", "dur": 34.51, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511433197.159", "dur": 12.300122878227555, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 558}, "ph": "X"}, {"name": "HostToDevice2396591751167", "ph": "s", "cat": "HostToDevice", "id": "2396591751167", "pid": 43767502, "tid": 439084, "ts": "1704161511433197.529"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511433213.029", "dur": 7.19007182881757, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 560}, "ph": "X"}, {"name": "HostToDevice2405181685759", "ph": "s", "cat": "HostToDevice", "id": "2405181685759", "pid": 43767502, "tid": 439084, "ts": "1704161511433213.509"}, {"name": "aclnnAdd_AxpyAiCore_Axpy", "pid": 800, "tid": 3, "ts": "1704161511433220.980", "dur": 2.53, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 513, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 559}, "ph": "X"}, {"name": "HostToDevice2405181685759", "ph": "f", "id": "2405181685759", "ts": "1704161511433220.980", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433220.980", "dur": 2.53, "ph": "X"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511433223.620", "dur": 2.87, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 514, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 561}, "ph": "X"}, {"name": "HostToDevice2413771620351", "ph": "f", "id": "2413771620351", "ts": "1704161511433223.620", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433223.620", "dur": 2.87, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433223.510", "dur": 0.11, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511433223.799", "dur": 5.790057842677848, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 562}, "ph": "X"}, {"name": "HostToDevice2413771620351", "ph": "s", "cat": "HostToDevice", "id": "2413771620351", "pid": 43767502, "tid": 439084, "ts": "1704161511433224.460"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433226.490", "dur": 7.2395, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcmul", "pid": 43767502, "tid": 439084, "ts": "1704161511433233.049", "dur": 6.2000619386187665, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcmul", "item_id": "0", "connection_id": 564}, "ph": "X"}, {"name": "HostToDevice2422361554943", "ph": "s", "cat": "HostToDevice", "id": "2422361554943", "pid": 43767502, "tid": 439084, "ts": "1704161511433233.549"}, {"name": "aclnnAddcmul_AddcmulAiCore_Addcmul", "pid": 800, "tid": 3, "ts": "1704161511433233.730", "dur": 5.71, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 515, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 563}, "ph": "X"}, {"name": "HostToDevice2422361554943", "ph": "f", "id": "2422361554943", "ts": "1704161511433233.730", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433233.730", "dur": 5.71, "ph": "X"}, {"name": "aclnnSqrt_SqrtAiCore_Sqrt", "pid": 800, "tid": 3, "ts": "1704161511433239.550", "dur": 2.61, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 516, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 565}, "ph": "X"}, {"name": "HostToDevice2430951489535", "ph": "f", "id": "2430951489535", "ts": "1704161511433239.550", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433239.550", "dur": 2.61, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433239.440", "dur": 0.11, "ph": "X"}, {"name": "AscendCL@aclnnSqrt", "pid": 43767502, "tid": 439084, "ts": "1704161511433242.339", "dur": 5.18005174871697, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnSqrt", "item_id": "0", "connection_id": 566}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433242.160", "dur": 5.95, "ph": "X"}, {"name": "HostToDevice2430951489535", "ph": "s", "cat": "HostToDevice", "id": "2430951489535", "pid": 43767502, "tid": 439084, "ts": "1704161511433242.800"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511433248.110", "dur": 3.03, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 517, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 568}, "ph": "X"}, {"name": "HostToDevice2443836391423", "ph": "f", "id": "2443836391423", "ts": "1704161511433248.110", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433248.110", "dur": 3.03, "ph": "X"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511433250.669", "dur": 25.30025274952497, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 569}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433251.140", "dur": 12.71, "ph": "X"}, {"name": "AscendCL@RealDiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433254.920", "dur": 5.510055045449905, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "RealDiv_Tiling", "item_id": "0", "connection_id": 567}, "ph": "X"}, {"name": "HostToDevice2443836391423", "ph": "s", "cat": "HostToDevice", "id": "2443836391423", "pid": 43767502, "tid": 439084, "ts": "1704161511433262.100"}, {"name": "aclnnAdds_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511433263.850", "dur": 2.88, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 518, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 570}, "ph": "X"}, {"name": "HostToDevice2452426326015", "ph": "f", "id": "2452426326015", "ts": "1704161511433263.850", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433263.850", "dur": 2.88, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433266.730", "dur": 25.3095, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdds", "pid": 43767502, "tid": 439084, "ts": "1704161511433280.750", "dur": 6.480064735846711, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdds", "item_id": "0", "connection_id": 571}, "ph": "X"}, {"name": "HostToDevice2452426326015", "ph": "s", "cat": "HostToDevice", "id": "2452426326015", "pid": 43767502, "tid": 439084, "ts": "1704161511433281.280"}, {"name": "AscendCL@aclnnInplaceAddcdiv", "pid": 43767502, "tid": 439084, "ts": "1704161511433290.490", "dur": 26.680266535862692, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcdiv", "item_id": "0", "connection_id": 574}, "ph": "X"}, {"name": "aclnnAddcdiv_AddcdivAiCore_Addcdiv", "pid": 800, "tid": 3, "ts": "1704161511433292.040", "dur": 5.13, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 519, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 573}, "ph": "X"}, {"name": "HostToDevice2465311227903", "ph": "f", "id": "2465311227903", "ts": "1704161511433292.040", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433292.040", "dur": 5.13, "ph": "X"}, {"name": "AscendCL@Addcdiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433294.770", "dur": 5.310053047429943, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Addcdiv_Tiling", "item_id": "0", "connection_id": 572}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433297.170", "dur": 13.21, "ph": "X"}, {"name": "HostToDevice2465311227903", "ph": "s", "cat": "HostToDevice", "id": "2465311227903", "pid": 43767502, "tid": 439084, "ts": "1704161511433301.850"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511433310.380", "dur": 1.42, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 520, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 575}, "ph": "X"}, {"name": "HostToDevice2473901162495", "ph": "f", "id": "2473901162495", "ts": "1704161511433310.380", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433310.380", "dur": 1.42, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433311.800", "dur": 12.84, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511433321.660", "dur": 6.550065435153697, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 576}, "ph": "X"}, {"name": "HostToDevice2473901162495", "ph": "s", "cat": "HostToDevice", "id": "2473901162495", "pid": 43767502, "tid": 439084, "ts": "1704161511433322.130"}, {"name": "aclnnAdd_AxpyAiCore_Axpy", "pid": 800, "tid": 3, "ts": "1704161511433324.640", "dur": 1.48, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 521, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 577}, "ph": "X"}, {"name": "HostToDevice2482491097087", "ph": "f", "id": "2482491097087", "ts": "1704161511433324.640", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433324.640", "dur": 1.48, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433326.120", "dur": 0.11, "ph": "X"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511433326.230", "dur": 1.39, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 522, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 579}, "ph": "X"}, {"name": "HostToDevice2491081031679", "ph": "f", "id": "2491081031679", "ts": "1704161511433326.230", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433326.230", "dur": 1.39, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433327.620", "dur": 7.3895, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511433331.750", "dur": 6.0800607398067905, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 578}, "ph": "X"}, {"name": "HostToDevice2482491097087", "ph": "s", "cat": "HostToDevice", "id": "2482491097087", "pid": 43767502, "tid": 439084, "ts": "1704161511433332.371"}, {"name": "aclnnAddcmul_AddcmulAiCore_Addcmul", "pid": 800, "tid": 3, "ts": "1704161511433335.009", "dur": 3.65, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 523, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 581}, "ph": "X"}, {"name": "HostToDevice2499670966271", "ph": "f", "id": "2499670966271", "ts": "1704161511433335.009", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433335.009", "dur": 3.65, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433338.659", "dur": 13.56, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511433341.101", "dur": 5.73005724327186, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 580}, "ph": "X"}, {"name": "HostToDevice2491081031679", "ph": "s", "cat": "HostToDevice", "id": "2491081031679", "pid": 43767502, "tid": 439084, "ts": "1704161511433341.721"}, {"name": "AscendCL@aclnnInplaceAddcmul", "pid": 43767502, "tid": 439084, "ts": "1704161511433349.980", "dur": 6.190061838717769, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcmul", "item_id": "0", "connection_id": 582}, "ph": "X"}, {"name": "HostToDevice2499670966271", "ph": "s", "cat": "HostToDevice", "id": "2499670966271", "pid": 43767502, "tid": 439084, "ts": "1704161511433350.871"}, {"name": "aclnnSqrt_SqrtAiCore_Sqrt", "pid": 800, "tid": 3, "ts": "1704161511433352.219", "dur": 1.41, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 524, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 583}, "ph": "X"}, {"name": "HostToDevice2508260900863", "ph": "f", "id": "2508260900863", "ts": "1704161511433352.219", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433352.219", "dur": 1.41, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433353.629", "dur": 8.66, "ph": "X"}, {"name": "AscendCL@aclnnSqrt", "pid": 43767502, "tid": 439084, "ts": "1704161511433359.201", "dur": 5.150051449013976, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnSqrt", "item_id": "0", "connection_id": 584}, "ph": "X"}, {"name": "HostToDevice2508260900863", "ph": "s", "cat": "HostToDevice", "id": "2508260900863", "pid": 43767502, "tid": 439084, "ts": "1704161511433359.661"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511433362.289", "dur": 1.48, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 525, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 586}, "ph": "X"}, {"name": "HostToDevice2521145802751", "ph": "f", "id": "2521145802751", "ts": "1704161511433362.289", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433362.289", "dur": 1.48, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433363.769", "dur": 20.7295, "ph": "X"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511433367.111", "dur": 23.68023656556329, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 587}, "ph": "X"}, {"name": "AscendCL@RealDiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433371.031", "dur": 4.770047652776051, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "RealDiv_Tiling", "item_id": "0", "connection_id": 585}, "ph": "X"}, {"name": "HostToDevice2521145802751", "ph": "s", "cat": "HostToDevice", "id": "2521145802751", "pid": 43767502, "tid": 439084, "ts": "1704161511433377.291"}, {"name": "aclnnAdds_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511433384.498", "dur": 1.38, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 526, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 588}, "ph": "X"}, {"name": "HostToDevice2529735737343", "ph": "f", "id": "2529735737343", "ts": "1704161511433384.498", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433384.498", "dur": 1.38, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433385.878", "dur": 16.82, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdds", "pid": 43767502, "tid": 439084, "ts": "1704161511433395.881", "dur": 6.410064036539725, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdds", "item_id": "0", "connection_id": 589}, "ph": "X"}, {"name": "HostToDevice2529735737343", "ph": "s", "cat": "HostToDevice", "id": "2529735737343", "pid": 43767502, "tid": 439084, "ts": "1704161511433396.381"}, {"name": "aclnnAddcdiv_AddcdivAiCore_Addcdiv", "pid": 800, "tid": 3, "ts": "1704161511433402.698", "dur": 3.45, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 527, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 591}, "ph": "X"}, {"name": "HostToDevice2542620639231", "ph": "f", "id": "2542620639231", "ts": "1704161511433402.698", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433402.698", "dur": 3.45, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcdiv", "pid": 43767502, "tid": 439084, "ts": "1704161511433405.991", "dur": 26.300262739624767, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcdiv", "item_id": "0", "connection_id": 592}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433406.148", "dur": 15.76, "ph": "X"}, {"name": "AscendCL@Addcdiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433410.411", "dur": 5.4300542462419195, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Addcdiv_Tiling", "item_id": "0", "connection_id": 590}, "ph": "X"}, {"name": "HostToDevice2542620639231", "ph": "s", "cat": "HostToDevice", "id": "2542620639231", "pid": 43767502, "tid": 439084, "ts": "1704161511433417.361"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511433421.908", "dur": 1.53, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 528, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 593}, "ph": "X"}, {"name": "HostToDevice2551210573823", "ph": "f", "id": "2551210573823", "ts": "1704161511433421.908", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433421.908", "dur": 1.53, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433423.438", "dur": 17.9795, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511433437.102", "dur": 6.4900648357477095, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 594}, "ph": "X"}, {"name": "HostToDevice2551210573823", "ph": "s", "cat": "HostToDevice", "id": "2551210573823", "pid": 43767502, "tid": 439084, "ts": "1704161511433437.532"}, {"name": "aclnnAdd_AxpyAiCore_Axpy", "pid": 800, "tid": 3, "ts": "1704161511433441.418", "dur": 1.55, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 529, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 595}, "ph": "X"}, {"name": "HostToDevice2559800508415", "ph": "f", "id": "2559800508415", "ts": "1704161511433441.418", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433441.418", "dur": 1.55, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433442.968", "dur": 0.28, "ph": "X"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511433443.248", "dur": 1.53, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 530, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 597}, "ph": "X"}, {"name": "HostToDevice2568390443007", "ph": "f", "id": "2568390443007", "ts": "1704161511433443.248", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433443.248", "dur": 1.53, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433444.778", "dur": 9.63, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511433447.702", "dur": 5.820058142380843, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 596}, "ph": "X"}, {"name": "HostToDevice2559800508415", "ph": "s", "cat": "HostToDevice", "id": "2559800508415", "pid": 43767502, "tid": 439084, "ts": "1704161511433448.242"}, {"name": "aclnnAddcmul_AddcmulAiCore_Addcmul", "pid": 800, "tid": 3, "ts": "1704161511433454.408", "dur": 3.92, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 531, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 599}, "ph": "X"}, {"name": "HostToDevice2576980377599", "ph": "f", "id": "2576980377599", "ts": "1704161511433454.408", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433454.408", "dur": 3.92, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511433456.632", "dur": 5.160051548914973, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 598}, "ph": "X"}, {"name": "HostToDevice2568390443007", "ph": "s", "cat": "HostToDevice", "id": "2568390443007", "pid": 43767502, "tid": 439084, "ts": "1704161511433456.972"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433458.328", "dur": 7.01, "ph": "X"}, {"name": "aclnnSqrt_SqrtAiCore_Sqrt", "pid": 800, "tid": 3, "ts": "1704161511433465.338", "dur": 1.63, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 532, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 601}, "ph": "X"}, {"name": "HostToDevice2585570312191", "ph": "f", "id": "2585570312191", "ts": "1704161511433465.338", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433465.338", "dur": 1.63, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433466.968", "dur": 15.7595, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcmul", "pid": 43767502, "tid": 439084, "ts": "1704161511433469.622", "dur": 6.590065834757689, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcmul", "item_id": "0", "connection_id": 600}, "ph": "X"}, {"name": "HostToDevice2576980377599", "ph": "s", "cat": "HostToDevice", "id": "2576980377599", "pid": 43767502, "tid": 439084, "ts": "1704161511433470.042"}, {"name": "AscendCL@aclnnSqrt", "pid": 43767502, "tid": 439084, "ts": "1704161511433479.902", "dur": 5.390053846637928, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnSqrt", "item_id": "0", "connection_id": 602}, "ph": "X"}, {"name": "HostToDevice2585570312191", "ph": "s", "cat": "HostToDevice", "id": "2585570312191", "pid": 43767502, "tid": 439084, "ts": "1704161511433480.702"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511433482.728", "dur": 1.77, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 533, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 604}, "ph": "X"}, {"name": "HostToDevice2598455214079", "ph": "f", "id": "2598455214079", "ts": "1704161511433482.728", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433482.728", "dur": 1.77, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433484.498", "dur": 20.93, "ph": "X"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511433488.382", "dur": 25.320252949326964, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 605}, "ph": "X"}, {"name": "AscendCL@RealDiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433492.322", "dur": 5.030050250202, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "RealDiv_Tiling", "item_id": "0", "connection_id": 603}, "ph": "X"}, {"name": "HostToDevice2598455214079", "ph": "s", "cat": "HostToDevice", "id": "2598455214079", "pid": 43767502, "tid": 439084, "ts": "1704161511433498.942"}, {"name": "aclnnAdds_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511433505.428", "dur": 1.53, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 534, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 606}, "ph": "X"}, {"name": "HostToDevice2607045148671", "ph": "f", "id": "2607045148671", "ts": "1704161511433505.428", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433505.428", "dur": 1.53, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433506.958", "dur": 18.09, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdds", "pid": 43767502, "tid": 439084, "ts": "1704161511433518.652", "dur": 6.220062138420763, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdds", "item_id": "0", "connection_id": 607}, "ph": "X"}, {"name": "HostToDevice2607045148671", "ph": "s", "cat": "HostToDevice", "id": "2607045148671", "pid": 43767502, "tid": 439084, "ts": "1704161511433519.022"}, {"name": "aclnnAddcdiv_AddcdivAiCore_Addcdiv", "pid": 800, "tid": 3, "ts": "1704161511433525.048", "dur": 3.7595, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 535, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 609}, "ph": "X"}, {"name": "HostToDevice2619930050559", "ph": "f", "id": "2619930050559", "ts": "1704161511433525.048", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433525.048", "dur": 3.7595, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcdiv", "pid": 43767502, "tid": 439084, "ts": "1704161511433528.692", "dur": 26.700266735664687, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcdiv", "item_id": "0", "connection_id": 610}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433528.807", "dur": 17.73, "ph": "X"}, {"name": "AscendCL@Addcdiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433533.072", "dur": 5.33005324723194, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Addcdiv_Tiling", "item_id": "0", "connection_id": 608}, "ph": "X"}, {"name": "HostToDevice2619930050559", "ph": "s", "cat": "HostToDevice", "id": "2619930050559", "pid": 43767502, "tid": 439084, "ts": "1704161511433539.973"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511433546.537", "dur": 1.44, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 536, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 611}, "ph": "X"}, {"name": "HostToDevice2628519985151", "ph": "f", "id": "2628519985151", "ts": "1704161511433546.537", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433546.537", "dur": 1.44, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433547.977", "dur": 6.52, "ph": "X"}, {"name": "aclnnAdd_AxpyAiCore_Axpy", "pid": 800, "tid": 3, "ts": "1704161511433554.497", "dur": 1.49, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 537, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 613}, "ph": "X"}, {"name": "HostToDevice2637109919743", "ph": "f", "id": "2637109919743", "ts": "1704161511433554.497", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433554.497", "dur": 1.49, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433555.987", "dur": 8.98, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511433560.413", "dur": 6.540065335252699, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 612}, "ph": "X"}, {"name": "HostToDevice2628519985151", "ph": "s", "cat": "HostToDevice", "id": "2628519985151", "pid": 43767502, "tid": 439084, "ts": "1704161511433560.883"}, {"name": "aclnnInplaceMuls_MulAiCore_Mul", "pid": 800, "tid": 3, "ts": "1704161511433564.967", "dur": 1.45, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 538, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 615}, "ph": "X"}, {"name": "HostToDevice2645699854335", "ph": "f", "id": "2645699854335", "ts": "1704161511433564.967", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433564.967", "dur": 1.45, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433566.417", "dur": 7.8, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511433570.623", "dur": 5.700056943568867, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 614}, "ph": "X"}, {"name": "HostToDevice2637109919743", "ph": "s", "cat": "HostToDevice", "id": "2637109919743", "pid": 43767502, "tid": 439084, "ts": "1704161511433571.063"}, {"name": "aclnnAddcmul_AddcmulAiCore_Addcmul", "pid": 800, "tid": 3, "ts": "1704161511433574.217", "dur": 3.62, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 539, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 617}, "ph": "X"}, {"name": "HostToDevice2654289788927", "ph": "f", "id": "2654289788927", "ts": "1704161511433574.217", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433574.217", "dur": 3.62, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433577.837", "dur": 4.8895, "ph": "X"}, {"name": "AscendCL@aclnnInplaceMuls", "pid": 43767502, "tid": 439084, "ts": "1704161511433579.363", "dur": 5.040050350102997, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceMuls", "item_id": "0", "connection_id": 616}, "ph": "X"}, {"name": "HostToDevice2645699854335", "ph": "s", "cat": "HostToDevice", "id": "2645699854335", "pid": 43767502, "tid": 439084, "ts": "1704161511433579.873"}, {"name": "aclnnSqrt_SqrtAiCore_Sqrt", "pid": 800, "tid": 3, "ts": "1704161511433582.726", "dur": 1.39, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 540, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 619}, "ph": "X"}, {"name": "HostToDevice2662879723519", "ph": "f", "id": "2662879723519", "ts": "1704161511433582.726", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433582.726", "dur": 1.39, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433584.116", "dur": 25.17, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAddcmul", "pid": 43767502, "tid": 439084, "ts": "1704161511433587.783", "dur": 5.4300542462419195, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcmul", "item_id": "0", "connection_id": 618}, "ph": "X"}, {"name": "HostToDevice2654289788927", "ph": "s", "cat": "HostToDevice", "id": "2654289788927", "pid": 43767502, "tid": 439084, "ts": "1704161511433588.083"}, {"name": "AscendCL@aclnnSqrt", "pid": 43767502, "tid": 439084, "ts": "1704161511433599.233", "dur": 5.2400523481229575, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnSqrt", "item_id": "0", "connection_id": 620}, "ph": "X"}, {"name": "HostToDevice2662879723519", "ph": "s", "cat": "HostToDevice", "id": "2662879723519", "pid": 43767502, "tid": 439084, "ts": "1704161511433599.563"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511433608.033", "dur": 26.100260741604806, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 623}, "ph": "X"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511433609.286", "dur": 1.65, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 541, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 622}, "ph": "X"}, {"name": "HostToDevice2675764625407", "ph": "f", "id": "2675764625407", "ts": "1704161511433609.286", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433609.286", "dur": 1.65, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433610.936", "dur": 12.36, "ph": "X"}, {"name": "AscendCL@RealDiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433612.794", "dur": 4.750047452974055, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "RealDiv_Tiling", "item_id": "0", "connection_id": 621}, "ph": "X"}, {"name": "HostToDevice2675764625407", "ph": "s", "cat": "HostToDevice", "id": "2675764625407", "pid": 43767502, "tid": 439084, "ts": "1704161511433619.093"}, {"name": "aclnnAdds_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511433623.296", "dur": 1.45, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 542, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 624}, "ph": "X"}, {"name": "HostToDevice2684354559999", "ph": "f", "id": "2684354559999", "ts": "1704161511433623.296", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433623.296", "dur": 1.45, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433624.746", "dur": 25.1195, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdds", "pid": 43767502, "tid": 439084, "ts": "1704161511433639.284", "dur": 6.110061039509785, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdds", "item_id": "0", "connection_id": 625}, "ph": "X"}, {"name": "HostToDevice2684354559999", "ph": "s", "cat": "HostToDevice", "id": "2684354559999", "pid": 43767502, "tid": 439084, "ts": "1704161511433639.713"}, {"name": "AscendCL@aclnnInplaceAddcdiv", "pid": 43767502, "tid": 439084, "ts": "1704161511433648.634", "dur": 25.6402561461589, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAddcdiv", "item_id": "0", "connection_id": 628}, "ph": "X"}, {"name": "aclnnAddcdiv_AddcdivAiCore_Addcdiv", "pid": 800, "tid": 3, "ts": "1704161511433649.866", "dur": 3.43, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 543, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 627}, "ph": "X"}, {"name": "HostToDevice2697239461887", "ph": "f", "id": "2697239461887", "ts": "1704161511433649.866", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433649.866", "dur": 3.43, "ph": "X"}, {"name": "AscendCL@Addcdiv_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433652.894", "dur": 5.610056044459884, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Addcdiv_Tiling", "item_id": "0", "connection_id": 626}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433653.296", "dur": 12.88, "ph": "X"}, {"name": "HostToDevice2697239461887", "ph": "s", "cat": "HostToDevice", "id": "2697239461887", "pid": 43767502, "tid": 439084, "ts": "1704161511433660.104"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511433666.176", "dur": 1.46, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 544, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 629}, "ph": "X"}, {"name": "HostToDevice2705829396479", "ph": "f", "id": "2705829396479", "ts": "1704161511433666.176", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433666.176", "dur": 1.46, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433667.636", "dur": 19.1495, "ph": "X"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511433678.514", "dur": 6.220062138420763, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 630}, "ph": "X"}, {"name": "HostToDevice2705829396479", "ph": "s", "cat": "HostToDevice", "id": "2705829396479", "pid": 43767502, "tid": 439084, "ts": "1704161511433678.994"}, {"name": "aclnnArgMax_ArgMaxV2AiCore_ArgMaxV2", "pid": 800, "tid": 3, "ts": "1704161511433686.786", "dur": 4.7, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 545, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 631}, "ph": "X"}, {"name": "HostToDevice2714419331071", "ph": "f", "id": "2714419331071", "ts": "1704161511433686.786", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433686.786", "dur": 4.7, "ph": "X"}, {"name": "AscendCL@aclnnArgMax", "pid": 43767502, "tid": 439084, "ts": "1704161511433691.454", "dur": 10.030100200701005, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnArgMax", "item_id": "0", "connection_id": 633}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433691.486", "dur": 0.16, "ph": "X"}, {"name": "aclnnArgMax_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511433691.646", "dur": 5.99, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 546, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 632}, "ph": "X"}, {"name": "HostToDevice2718714298367", "ph": "f", "id": "2718714298367", "ts": "1704161511433691.646", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433691.646", "dur": 5.99, "ph": "X"}, {"name": "HostToDevice2714419331071", "ph": "s", "cat": "HostToDevice", "id": "2714419331071", "pid": 43767502, "tid": 439084, "ts": "1704161511433692.034"}, {"name": "HostToDevice2718714298367", "ph": "s", "cat": "HostToDevice", "id": "2718714298367", "pid": 43767502, "tid": 439084, "ts": "1704161511433697.304"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433697.636", "dur": 3.56, "ph": "X"}, {"name": "aclnnEqTensor_EqualAiCore_Equal", "pid": 800, "tid": 3, "ts": "1704161511433701.196", "dur": 3.42, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 547, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 634}, "ph": "X"}, {"name": "HostToDevice2727304232959", "ph": "f", "id": "2727304232959", "ts": "1704161511433701.196", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433701.196", "dur": 3.42, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433704.616", "dur": 20.98, "ph": "X"}, {"name": "AscendCL@aclnnEqTensor", "pid": 43767502, "tid": 439084, "ts": "1704161511433706.914", "dur": 6.6300662343616805, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnEqTensor", "item_id": "0", "connection_id": 635}, "ph": "X"}, {"name": "HostToDevice2727304232959", "ph": "s", "cat": "HostToDevice", "id": "2727304232959", "pid": 43767502, "tid": 439084, "ts": "1704161511433707.594"}, {"name": "AscendCL@aclnnReduceSum", "pid": 43767502, "tid": 439084, "ts": "1704161511433717.564", "dur": 117.74117623435058, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnReduceSum", "item_id": "0", "connection_id": 642}, "ph": "X"}, {"name": "aclnnReduceSum_CastAiCore_Cast", "pid": 800, "tid": 3, "ts": "1704161511433725.596", "dur": 1.63, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 548, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 637}, "ph": "X"}, {"name": "HostToDevice2740189134847", "ph": "f", "id": "2740189134847", "ts": "1704161511433725.596", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433725.596", "dur": 1.63, "ph": "X"}, {"name": "AscendCL@Cast_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433727.185", "dur": 5.300052947528946, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "Cast_Tiling", "item_id": "0", "connection_id": 636}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433727.226", "dur": 60.229, "ph": "X"}, {"name": "HostToDevice2740189134847", "ph": "s", "cat": "HostToDevice", "id": "2740189134847", "pid": 43767502, "tid": 439084, "ts": "1704161511433735.634"}, {"name": "AscendCL@ReduceSum_Tiling", "pid": 43767502, "tid": 439084, "ts": "1704161511433754.325", "dur": 19.35019330843115, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "ReduceSum_Tiling", "item_id": "0", "connection_id": 638}, "ph": "X"}, {"name": "aclnnReduceSum_ReduceSumOpAiCore_MemSet", "pid": 800, "tid": 3, "ts": "1704161511433787.454", "dur": 6.65, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 549, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 639}, "ph": "X"}, {"name": "HostToDevice2748779069439", "ph": "f", "id": "2748779069439", "ts": "1704161511433787.454", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433787.454", "dur": 6.65, "ph": "X"}, {"name": "HostToDevice2748779069439", "ph": "s", "cat": "HostToDevice", "id": "2748779069439", "pid": 43767502, "tid": 439084, "ts": "1704161511433790.805"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433794.104", "dur": 0.16, "ph": "X"}, {"name": "aclnnReduceSum_ReduceSumOpAiCore_ReduceSum", "pid": 800, "tid": 3, "ts": "1704161511433794.264", "dur": 2.17, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 550, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 640}, "ph": "X"}, {"name": "HostToDevice2753074036735", "ph": "f", "id": "2753074036735", "ts": "1704161511433794.264", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433794.264", "dur": 2.17, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511433796.434", "dur": 19.67, "ph": "X"}, {"name": "HostToDevice2753074036735", "ph": "s", "cat": "HostToDevice", "id": "2753074036735", "pid": 43767502, "tid": 439084, "ts": "1704161511433802.405"}, {"name": "HostToDevice2757369004031", "ph": "s", "cat": "HostToDevice", "id": "2757369004031", "pid": 43767502, "tid": 439084, "ts": "1704161511433815.135"}, {"name": "aclnnReduceSum_CastAiCpu_Cast", "pid": 800, "tid": 3, "ts": "1704161511433816.104", "dur": 188.14802169799805, "args": {"Model Id": 4294967295, "Task Type": "AI_CPU", "Stream Id": 3, "Task Id": 551, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 641}, "ph": "X"}, {"name": "HostToDevice2757369004031", "ph": "f", "id": "2757369004031", "ts": "1704161511433816.104", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511433816.104", "dur": 188.148, "ph": "X"}, {"name": "AscendCL@aclnnCast", "pid": 43767502, "tid": 439084, "ts": "1704161511433852.085", "dur": 11.62011608495969, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnCast", "item_id": "0", "connection_id": 644}, "ph": "X"}, {"name": "HostToDevice2765958938623", "ph": "s", "cat": "HostToDevice", "id": "2765958938623", "pid": 43767502, "tid": 439084, "ts": "1704161511433854.696"}, {"name": "AscendCL@aclnnInplaceAdd", "pid": 43767502, "tid": 439084, "ts": "1704161511433868.746", "dur": 7.150071429213577, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnInplaceAdd", "item_id": "0", "connection_id": 646}, "ph": "X"}, {"name": "HostToDevice2774548873215", "ph": "s", "cat": "HostToDevice", "id": "2774548873215", "pid": 43767502, "tid": 439084, "ts": "1704161511433869.086"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511433879.846", "dur": 5.7700576428758525, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 648}, "ph": "X"}, {"name": "HostToDevice2783138807807", "ph": "s", "cat": "HostToDevice", "id": "2783138807807", "pid": 43767502, "tid": 439084, "ts": "1704161511433880.276"}, {"name": "AscendCL@aclnnDivs", "pid": 43767502, "tid": 439084, "ts": "1704161511433888.746", "dur": 5.010050050400003, "args": {"Thread Id": 439084, "Mode": "ACL_NN", "level": "acl", "id": "aclnnDivs", "item_id": "0", "connection_id": 650}, "ph": "X"}, {"name": "HostToDevice2791728742399", "ph": "s", "cat": "HostToDevice", "id": "2791728742399", "pid": 43767502, "tid": 439084, "ts": "1704161511433888.986"}, {"name": "AscendCL@aclrtSynchronizeStreamWithTimeout", "pid": 43767502, "tid": 437675, "ts": "1704161511433911.036", "dur": 309.6830937341064, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtSynchronizeStreamWithTimeout", "item_id": "0", "connection_id": 651}, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511434004.252", "dur": 14.72, "ph": "X"}, {"name": "aclnnCast_CastAiCpu_Cast", "pid": 800, "tid": 3, "ts": "1704161511434018.972", "dur": 139.4186019897461, "args": {"Model Id": 4294967295, "Task Type": "AI_CPU", "Stream Id": 3, "Task Id": 552, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 643}, "ph": "X"}, {"name": "HostToDevice2765958938623", "ph": "f", "id": "2765958938623", "ts": "1704161511434018.972", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511434018.972", "dur": 139.4185, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511434158.391", "dur": 11.23, "ph": "X"}, {"name": "aclnnAdd_AddAiCore_Add", "pid": 800, "tid": 3, "ts": "1704161511434169.621", "dur": 1.4, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 553, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 645}, "ph": "X"}, {"name": "HostToDevice2774548873215", "ph": "f", "id": "2774548873215", "ts": "1704161511434169.621", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511434169.621", "dur": 1.4, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511434171.021", "dur": 0.16, "ph": "X"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511434171.181", "dur": 1.34, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 554, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 647}, "ph": "X"}, {"name": "HostToDevice2783138807807", "ph": "f", "id": "2783138807807", "ts": "1704161511434171.181", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511434171.181", "dur": 1.34, "ph": "X"}, {"name": "Free", "pid": 4376751100, "tid": 3, "ts": "1704161511434172.521", "dur": 0.18, "ph": "X"}, {"name": "aclnnDivs_RealDivAiCore_RealDiv", "pid": 800, "tid": 3, "ts": "1704161511434172.701", "dur": 1.42, "args": {"Model Id": 4294967295, "Task Type": "Other", "Stream Id": 3, "Task Id": 555, "Batch Id": 0, "Subtask Id": 4294967295, "connection_id": 649}, "ph": "X"}, {"name": "HostToDevice2791728742399", "ph": "f", "id": "2791728742399", "ts": "1704161511434172.701", "cat": "HostToDevice", "pid": 800, "tid": 3, "bp": "e"}, {"name": "Computing", "pid": 4376751100, "tid": 0, "ts": "1704161511434172.701", "dur": 1.42, "ph": "X"}, {"name": "AscendCL@aclrtMemcpy", "pid": 43767502, "tid": 437675, "ts": "1704161511434222.419", "dur": 30.57030539735092, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtMemcpy", "item_id": "0", "connection_id": 652}, "ph": "X"}, {"name": "AscendCL@aclrtSynchronizeStreamWithTimeout", "pid": 43767502, "tid": 437675, "ts": "1704161511434273.420", "dur": 3.47003466564631, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtSynchronizeStreamWithTimeout", "item_id": "0", "connection_id": 653}, "ph": "X"}, {"name": "AscendCL@aclrtMemcpy", "pid": 43767502, "tid": 437675, "ts": "1704161511434277.340", "dur": 16.570165535953706, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtMemcpy", "item_id": "0", "connection_id": 654}, "ph": "X"}, {"name": "AscendCL@aclrtSynchronizeDevice", "pid": 43767502, "tid": 437675, "ts": "1704161511434416.011", "dur": 6.430064236341721, "args": {"Thread Id": 437675, "Mode": "ACL_RTS", "level": "acl", "id": "aclrtSynchronizeDevice", "item_id": "0", "connection_id": 655}, "ph": "X"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511422904204, "pid": 437675, "tid": 437675, "ts": "1704161511422855.116", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511422904204, "pid": 800, "tid": 3, "ts": "1704161511422904.204", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511422917264, "pid": 437675, "tid": 437675, "ts": "1704161511422892.817", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511422917264, "pid": 800, "tid": 3, "ts": "1704161511422917.264", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511422950043, "pid": 437675, "tid": 437675, "ts": "1704161511422907.207", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511422950043, "pid": 800, "tid": 3, "ts": "1704161511422950.043", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511422952673, "pid": 437675, "tid": 437675, "ts": "1704161511422920.807", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511422952673, "pid": 800, "tid": 3, "ts": "1704161511422952.673", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511422954043, "pid": 437675, "tid": 437675, "ts": "1704161511422933.197", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511422954043, "pid": 800, "tid": 3, "ts": "1704161511422954.043", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511422962253, "pid": 437675, "tid": 437675, "ts": "1704161511422945.917", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511422962253, "pid": 800, "tid": 3, "ts": "1704161511422962.253", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511422968132, "pid": 437675, "tid": 437675, "ts": "1704161511422958.477", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511422968132, "pid": 800, "tid": 3, "ts": "1704161511422968.132", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511422983032, "pid": 437675, "tid": 437675, "ts": "1704161511422970.767", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511422983032, "pid": 800, "tid": 3, "ts": "1704161511422983.032", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511422984392, "pid": 437675, "tid": 437675, "ts": "1704161511422984.268", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511422984392, "pid": 800, "tid": 3, "ts": "1704161511422984.392", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511422991032, "pid": 437675, "tid": 437675, "ts": "1704161511422996.918", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511422991032, "pid": 800, "tid": 3, "ts": "1704161511422991.032", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424085712, "pid": 437675, "tid": 437675, "ts": "1704161511423205.030", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424085712, "pid": 800, "tid": 3, "ts": "1704161511424085.712", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424137431, "pid": 437675, "tid": 437675, "ts": "1704161511423205.030", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424137431, "pid": 800, "tid": 3, "ts": "1704161511424137.431", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424159001, "pid": 437675, "tid": 437675, "ts": "1704161511423205.030", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424159001, "pid": 800, "tid": 3, "ts": "1704161511424159.001", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424197230, "pid": 437675, "tid": 437675, "ts": "1704161511423205.030", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424197230, "pid": 800, "tid": 3, "ts": "1704161511424197.230", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424215770, "pid": 437675, "tid": 437675, "ts": "1704161511423205.030", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424215770, "pid": 800, "tid": 3, "ts": "1704161511424215.770", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424223930, "pid": 437675, "tid": 437675, "ts": "1704161511423205.030", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424223930, "pid": 800, "tid": 3, "ts": "1704161511424223.930", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424257870, "pid": 437675, "tid": 437675, "ts": "1704161511423205.030", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424257870, "pid": 800, "tid": 3, "ts": "1704161511424257.870", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424286260, "pid": 437675, "tid": 437675, "ts": "1704161511423205.030", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424286260, "pid": 800, "tid": 3, "ts": "1704161511424286.260", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424294210, "pid": 437675, "tid": 437675, "ts": "1704161511423205.030", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424294210, "pid": 800, "tid": 3, "ts": "1704161511424294.210", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424333429, "pid": 437675, "tid": 437675, "ts": "1704161511423257.470", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424333429, "pid": 800, "tid": 3, "ts": "1704161511424333.429", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424837944, "pid": 437675, "tid": 437675, "ts": "1704161511423396.302", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424837944, "pid": 800, "tid": 3, "ts": "1704161511424837.944", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424871414, "pid": 437675, "tid": 437675, "ts": "1704161511423396.302", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424871414, "pid": 800, "tid": 3, "ts": "1704161511424871.414", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424907934, "pid": 437675, "tid": 437675, "ts": "1704161511423396.302", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424907934, "pid": 800, "tid": 3, "ts": "1704161511424907.934", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424929423, "pid": 437675, "tid": 437675, "ts": "1704161511423396.302", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424929423, "pid": 800, "tid": 3, "ts": "1704161511424929.423", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511424946003, "pid": 437675, "tid": 437675, "ts": "1704161511423396.302", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511424946003, "pid": 800, "tid": 3, "ts": "1704161511424946.003", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425697246, "pid": 437675, "tid": 437675, "ts": "1704161511423468.142", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425697246, "pid": 800, "tid": 3, "ts": "1704161511425697.246", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425733735, "pid": 437675, "tid": 437675, "ts": "1704161511423468.142", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425733735, "pid": 800, "tid": 3, "ts": "1704161511425733.735", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425760075, "pid": 437675, "tid": 437675, "ts": "1704161511423468.142", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425760075, "pid": 800, "tid": 3, "ts": "1704161511425760.075", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425777564, "pid": 437675, "tid": 437675, "ts": "1704161511423468.142", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425777564, "pid": 800, "tid": 3, "ts": "1704161511425777.564", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425786644, "pid": 437675, "tid": 437675, "ts": "1704161511423468.142", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425786644, "pid": 800, "tid": 3, "ts": "1704161511425786.644", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425803044, "pid": 437675, "tid": 437675, "ts": "1704161511423468.142", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425803044, "pid": 800, "tid": 3, "ts": "1704161511425803.044", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425830794, "pid": 437675, "tid": 437675, "ts": "1704161511423468.142", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425830794, "pid": 800, "tid": 3, "ts": "1704161511425830.794", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425853424, "pid": 437675, "tid": 437675, "ts": "1704161511423468.142", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425853424, "pid": 800, "tid": 3, "ts": "1704161511425853.424", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425869524, "pid": 437675, "tid": 437675, "ts": "1704161511423468.142", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425869524, "pid": 800, "tid": 3, "ts": "1704161511425869.524", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425895714, "pid": 437675, "tid": 437675, "ts": "1704161511423495.183", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425895714, "pid": 800, "tid": 3, "ts": "1704161511425895.714", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425955963, "pid": 437675, "tid": 437675, "ts": "1704161511423553.623", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425955963, "pid": 800, "tid": 3, "ts": "1704161511425955.963", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425975312, "pid": 437675, "tid": 437675, "ts": "1704161511423553.623", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425975312, "pid": 800, "tid": 3, "ts": "1704161511425975.312", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511425997052, "pid": 437675, "tid": 437675, "ts": "1704161511423553.623", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511425997052, "pid": 800, "tid": 3, "ts": "1704161511425997.052", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426019682, "pid": 437675, "tid": 437675, "ts": "1704161511423553.623", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426019682, "pid": 800, "tid": 3, "ts": "1704161511426019.682", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426033542, "pid": 437675, "tid": 437675, "ts": "1704161511423553.623", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426033542, "pid": 800, "tid": 3, "ts": "1704161511426033.542", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426052702, "pid": 437675, "tid": 437675, "ts": "1704161511423669.464", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426052702, "pid": 800, "tid": 3, "ts": "1704161511426052.702", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426062642, "pid": 437675, "tid": 437675, "ts": "1704161511423669.464", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426062642, "pid": 800, "tid": 3, "ts": "1704161511426062.642", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426073612, "pid": 437675, "tid": 437675, "ts": "1704161511423669.464", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426073612, "pid": 800, "tid": 3, "ts": "1704161511426073.612", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426078342, "pid": 437675, "tid": 437675, "ts": "1704161511423669.464", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426078342, "pid": 800, "tid": 3, "ts": "1704161511426078.342", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426083692, "pid": 437675, "tid": 437675, "ts": "1704161511423669.464", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426083692, "pid": 800, "tid": 3, "ts": "1704161511426083.692", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426090172, "pid": 437675, "tid": 437675, "ts": "1704161511423669.464", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426090172, "pid": 800, "tid": 3, "ts": "1704161511426090.172", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426097302, "pid": 437675, "tid": 437675, "ts": "1704161511423669.464", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426097302, "pid": 800, "tid": 3, "ts": "1704161511426097.302", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426105822, "pid": 437675, "tid": 437675, "ts": "1704161511423669.464", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426105822, "pid": 800, "tid": 3, "ts": "1704161511426105.822", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426110062, "pid": 437675, "tid": 437675, "ts": "1704161511423669.464", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426110062, "pid": 800, "tid": 3, "ts": "1704161511426110.062", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426117962, "pid": 437675, "tid": 437675, "ts": "1704161511423691.065", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426117962, "pid": 800, "tid": 3, "ts": "1704161511426117.962", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426120132, "pid": 437675, "tid": 437675, "ts": "1704161511423743.715", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426120132, "pid": 800, "tid": 3, "ts": "1704161511426120.132", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426132241, "pid": 437675, "tid": 437675, "ts": "1704161511423743.715", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426132241, "pid": 800, "tid": 3, "ts": "1704161511426132.241", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426138671, "pid": 437675, "tid": 437675, "ts": "1704161511423743.715", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426138671, "pid": 800, "tid": 3, "ts": "1704161511426138.671", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426146341, "pid": 437675, "tid": 437675, "ts": "1704161511423743.715", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426146341, "pid": 800, "tid": 3, "ts": "1704161511426146.341", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426154841, "pid": 437675, "tid": 437675, "ts": "1704161511423743.715", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426154841, "pid": 800, "tid": 3, "ts": "1704161511426154.841", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426165581, "pid": 437675, "tid": 437675, "ts": "1704161511423743.715", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426165581, "pid": 800, "tid": 3, "ts": "1704161511426165.581", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426172020, "pid": 437675, "tid": 437675, "ts": "1704161511423743.715", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426172020, "pid": 800, "tid": 3, "ts": "1704161511426172.020", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426181520, "pid": 437675, "tid": 437675, "ts": "1704161511423743.715", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426181520, "pid": 800, "tid": 3, "ts": "1704161511426181.520", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426189750, "pid": 437675, "tid": 437675, "ts": "1704161511423743.715", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426189750, "pid": 800, "tid": 3, "ts": "1704161511426189.750", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426198830, "pid": 437675, "tid": 437675, "ts": "1704161511423760.635", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426198830, "pid": 800, "tid": 3, "ts": "1704161511426198.830", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426200860, "pid": 437675, "tid": 437675, "ts": "1704161511423810.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426200860, "pid": 800, "tid": 3, "ts": "1704161511426200.860", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426202380, "pid": 437675, "tid": 437675, "ts": "1704161511423810.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426202380, "pid": 800, "tid": 3, "ts": "1704161511426202.380", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426223540, "pid": 437675, "tid": 437675, "ts": "1704161511423810.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426223540, "pid": 800, "tid": 3, "ts": "1704161511426223.540", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426225140, "pid": 437675, "tid": 437675, "ts": "1704161511423810.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426225140, "pid": 800, "tid": 3, "ts": "1704161511426225.140", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426232660, "pid": 437675, "tid": 437675, "ts": "1704161511423810.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426232660, "pid": 800, "tid": 3, "ts": "1704161511426232.660", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426239650, "pid": 437675, "tid": 437675, "ts": "1704161511423810.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426239650, "pid": 800, "tid": 3, "ts": "1704161511426239.650", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426245990, "pid": 437675, "tid": 437675, "ts": "1704161511423810.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426245990, "pid": 800, "tid": 3, "ts": "1704161511426245.990", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426248450, "pid": 437675, "tid": 437675, "ts": "1704161511423810.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426248450, "pid": 800, "tid": 3, "ts": "1704161511426248.450", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426254600, "pid": 437675, "tid": 437675, "ts": "1704161511423810.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426254600, "pid": 800, "tid": 3, "ts": "1704161511426254.600", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426463338, "pid": 437675, "tid": 437675, "ts": "1704161511426449.252", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426463338, "pid": 800, "tid": 3, "ts": "1704161511426463.338", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426537697, "pid": 437675, "tid": 437675, "ts": "1704161511426532.533", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426537697, "pid": 800, "tid": 3, "ts": "1704161511426537.697", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426547667, "pid": 437675, "tid": 437675, "ts": "1704161511426550.453", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426547667, "pid": 800, "tid": 3, "ts": "1704161511426547.667", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426550747, "pid": 437675, "tid": 437675, "ts": "1704161511426550.453", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426550747, "pid": 800, "tid": 3, "ts": "1704161511426550.747", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426564487, "pid": 437675, "tid": 437675, "ts": "1704161511426550.453", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426564487, "pid": 800, "tid": 3, "ts": "1704161511426564.487", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426573236, "pid": 437675, "tid": 437675, "ts": "1704161511426550.453", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426573236, "pid": 800, "tid": 3, "ts": "1704161511426573.236", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426619016, "pid": 437675, "tid": 437675, "ts": "1704161511426617.294", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426619016, "pid": 800, "tid": 3, "ts": "1704161511426619.016", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426824554, "pid": 437675, "tid": 439228, "ts": "1704161511426816.966", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426824554, "pid": 800, "tid": 3, "ts": "1704161511426824.554", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426861404, "pid": 437675, "tid": 439228, "ts": "1704161511426846.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426861404, "pid": 800, "tid": 3, "ts": "1704161511426861.404", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426869724, "pid": 437675, "tid": 439228, "ts": "1704161511426846.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426869724, "pid": 800, "tid": 3, "ts": "1704161511426869.724", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511426936243, "pid": 437675, "tid": 439228, "ts": "1704161511426924.287", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511426936243, "pid": 800, "tid": 3, "ts": "1704161511426936.243", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427004822, "pid": 437675, "tid": 439228, "ts": "1704161511426997.638", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427004822, "pid": 800, "tid": 3, "ts": "1704161511427004.822", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427006582, "pid": 437675, "tid": 439228, "ts": "1704161511426997.638", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427006582, "pid": 800, "tid": 3, "ts": "1704161511427006.582", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427017562, "pid": 437675, "tid": 439228, "ts": "1704161511426997.638", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427017562, "pid": 800, "tid": 3, "ts": "1704161511427017.562", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427023362, "pid": 437675, "tid": 439228, "ts": "1704161511426997.638", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427023362, "pid": 800, "tid": 3, "ts": "1704161511427023.362", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427031202, "pid": 437675, "tid": 439228, "ts": "1704161511426997.638", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427031202, "pid": 800, "tid": 3, "ts": "1704161511427031.202", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427037582, "pid": 437675, "tid": 439228, "ts": "1704161511426997.638", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427037582, "pid": 800, "tid": 3, "ts": "1704161511427037.582", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427040462, "pid": 437675, "tid": 439228, "ts": "1704161511426997.638", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427040462, "pid": 800, "tid": 3, "ts": "1704161511427040.462", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427048582, "pid": 437675, "tid": 439228, "ts": "1704161511427030.778", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427048582, "pid": 800, "tid": 3, "ts": "1704161511427048.582", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427050192, "pid": 437675, "tid": 439228, "ts": "1704161511427030.778", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427050192, "pid": 800, "tid": 3, "ts": "1704161511427050.192", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427055462, "pid": 437675, "tid": 439228, "ts": "1704161511427030.778", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427055462, "pid": 800, "tid": 3, "ts": "1704161511427055.462", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427060822, "pid": 437675, "tid": 439228, "ts": "1704161511427030.778", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427060822, "pid": 800, "tid": 3, "ts": "1704161511427060.822", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427068772, "pid": 437675, "tid": 439228, "ts": "1704161511427030.778", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427068772, "pid": 800, "tid": 3, "ts": "1704161511427068.772", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427075512, "pid": 437675, "tid": 439228, "ts": "1704161511427030.778", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427075512, "pid": 800, "tid": 3, "ts": "1704161511427075.512", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427077712, "pid": 437675, "tid": 439228, "ts": "1704161511427030.778", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427077712, "pid": 800, "tid": 3, "ts": "1704161511427077.712", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427085432, "pid": 437675, "tid": 439228, "ts": "1704161511427082.058", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427085432, "pid": 800, "tid": 3, "ts": "1704161511427085.432", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427092352, "pid": 437675, "tid": 439228, "ts": "1704161511427082.058", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427092352, "pid": 800, "tid": 3, "ts": "1704161511427092.352", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427303440, "pid": 437675, "tid": 439228, "ts": "1704161511427259.190", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427303440, "pid": 800, "tid": 3, "ts": "1704161511427303.440", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427371509, "pid": 437675, "tid": 439228, "ts": "1704161511427351.201", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427371509, "pid": 800, "tid": 3, "ts": "1704161511427371.509", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427431688, "pid": 437675, "tid": 439228, "ts": "1704161511427415.592", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427431688, "pid": 800, "tid": 3, "ts": "1704161511427431.688", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427479708, "pid": 437675, "tid": 439228, "ts": "1704161511427471.612", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427479708, "pid": 800, "tid": 3, "ts": "1704161511427479.708", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427481828, "pid": 437675, "tid": 439228, "ts": "1704161511427471.612", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427481828, "pid": 800, "tid": 3, "ts": "1704161511427481.828", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427492208, "pid": 437675, "tid": 439228, "ts": "1704161511427471.612", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427492208, "pid": 800, "tid": 3, "ts": "1704161511427492.208", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427499728, "pid": 437675, "tid": 439228, "ts": "1704161511427471.612", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427499728, "pid": 800, "tid": 3, "ts": "1704161511427499.728", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427507778, "pid": 437675, "tid": 439228, "ts": "1704161511427471.612", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427507778, "pid": 800, "tid": 3, "ts": "1704161511427507.778", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427514148, "pid": 437675, "tid": 439228, "ts": "1704161511427471.612", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427514148, "pid": 800, "tid": 3, "ts": "1704161511427514.148", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427517078, "pid": 437675, "tid": 439228, "ts": "1704161511427471.612", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427517078, "pid": 800, "tid": 3, "ts": "1704161511427517.078", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427521338, "pid": 437675, "tid": 439228, "ts": "1704161511427501.723", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427521338, "pid": 800, "tid": 3, "ts": "1704161511427521.338", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427523267, "pid": 437675, "tid": 439228, "ts": "1704161511427501.723", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427523267, "pid": 800, "tid": 3, "ts": "1704161511427523.267", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427525457, "pid": 437675, "tid": 439228, "ts": "1704161511427501.723", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427525457, "pid": 800, "tid": 3, "ts": "1704161511427525.457", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427532977, "pid": 437675, "tid": 439228, "ts": "1704161511427501.723", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427532977, "pid": 800, "tid": 3, "ts": "1704161511427532.977", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427540347, "pid": 437675, "tid": 439228, "ts": "1704161511427501.723", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427540347, "pid": 800, "tid": 3, "ts": "1704161511427540.347", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427546987, "pid": 437675, "tid": 439228, "ts": "1704161511427501.723", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427546987, "pid": 800, "tid": 3, "ts": "1704161511427546.987", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427549627, "pid": 437675, "tid": 439228, "ts": "1704161511427501.723", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427549627, "pid": 800, "tid": 3, "ts": "1704161511427549.627", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427553867, "pid": 437675, "tid": 439228, "ts": "1704161511427522.133", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427553867, "pid": 800, "tid": 3, "ts": "1704161511427553.867", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427560787, "pid": 437675, "tid": 439228, "ts": "1704161511427522.133", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427560787, "pid": 800, "tid": 3, "ts": "1704161511427560.787", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427612856, "pid": 437675, "tid": 439228, "ts": "1704161511427593.064", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427612856, "pid": 800, "tid": 3, "ts": "1704161511427612.856", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427683476, "pid": 437675, "tid": 439228, "ts": "1704161511427667.914", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427683476, "pid": 800, "tid": 3, "ts": "1704161511427683.476", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427705176, "pid": 437675, "tid": 439228, "ts": "1704161511427704.725", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427705176, "pid": 800, "tid": 3, "ts": "1704161511427705.176", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427758055, "pid": 437675, "tid": 439228, "ts": "1704161511427746.585", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427758055, "pid": 800, "tid": 3, "ts": "1704161511427758.055", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427760505, "pid": 437675, "tid": 439228, "ts": "1704161511427746.585", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427760505, "pid": 800, "tid": 3, "ts": "1704161511427760.505", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427775084, "pid": 437675, "tid": 439228, "ts": "1704161511427746.585", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427775084, "pid": 800, "tid": 3, "ts": "1704161511427775.084", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427782584, "pid": 437675, "tid": 439228, "ts": "1704161511427746.585", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427782584, "pid": 800, "tid": 3, "ts": "1704161511427782.584", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427789024, "pid": 437675, "tid": 439228, "ts": "1704161511427746.585", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427789024, "pid": 800, "tid": 3, "ts": "1704161511427789.024", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427795454, "pid": 437675, "tid": 439228, "ts": "1704161511427746.585", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427795454, "pid": 800, "tid": 3, "ts": "1704161511427795.454", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427801114, "pid": 437675, "tid": 439228, "ts": "1704161511427746.585", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427801114, "pid": 800, "tid": 3, "ts": "1704161511427801.114", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427868514, "pid": 437675, "tid": 439228, "ts": "1704161511427861.856", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427868514, "pid": 800, "tid": 3, "ts": "1704161511427868.514", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427870904, "pid": 437675, "tid": 439228, "ts": "1704161511427861.856", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427870904, "pid": 800, "tid": 3, "ts": "1704161511427870.904", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427884264, "pid": 437675, "tid": 439228, "ts": "1704161511427861.856", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427884264, "pid": 800, "tid": 3, "ts": "1704161511427884.264", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427891924, "pid": 437675, "tid": 439228, "ts": "1704161511427861.856", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427891924, "pid": 800, "tid": 3, "ts": "1704161511427891.924", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427897494, "pid": 437675, "tid": 439228, "ts": "1704161511427861.856", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427897494, "pid": 800, "tid": 3, "ts": "1704161511427897.494", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427903704, "pid": 437675, "tid": 439228, "ts": "1704161511427861.856", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427903704, "pid": 800, "tid": 3, "ts": "1704161511427903.704", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427908594, "pid": 437675, "tid": 439228, "ts": "1704161511427861.856", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427908594, "pid": 800, "tid": 3, "ts": "1704161511427908.594", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427917724, "pid": 437675, "tid": 439228, "ts": "1704161511427894.447", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427917724, "pid": 800, "tid": 3, "ts": "1704161511427917.724", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427924183, "pid": 437675, "tid": 439228, "ts": "1704161511427894.447", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427924183, "pid": 800, "tid": 3, "ts": "1704161511427924.183", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511427990032, "pid": 437675, "tid": 439228, "ts": "1704161511427972.097", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511427990032, "pid": 800, "tid": 3, "ts": "1704161511427990.032", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511428064212, "pid": 437675, "tid": 439228, "ts": "1704161511428050.588", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511428064212, "pid": 800, "tid": 3, "ts": "1704161511428064.212", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511428807574, "pid": 437675, "tid": 439228, "ts": "1704161511428164.169", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511428807574, "pid": 800, "tid": 3, "ts": "1704161511428807.574", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511428844424, "pid": 437675, "tid": 439228, "ts": "1704161511428164.169", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511428844424, "pid": 800, "tid": 3, "ts": "1704161511428844.424", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511428871664, "pid": 437675, "tid": 439228, "ts": "1704161511428164.169", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511428871664, "pid": 800, "tid": 3, "ts": "1704161511428871.664", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511428888694, "pid": 437675, "tid": 439228, "ts": "1704161511428164.169", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511428888694, "pid": 800, "tid": 3, "ts": "1704161511428888.694", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511428940243, "pid": 437675, "tid": 439228, "ts": "1704161511428164.169", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511428940243, "pid": 800, "tid": 3, "ts": "1704161511428940.243", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511428969833, "pid": 437675, "tid": 439228, "ts": "1704161511428164.169", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511428969833, "pid": 800, "tid": 3, "ts": "1704161511428969.833", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511428981892, "pid": 437675, "tid": 439228, "ts": "1704161511428164.169", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511428981892, "pid": 800, "tid": 3, "ts": "1704161511428981.892", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511428997462, "pid": 437675, "tid": 439228, "ts": "1704161511428213.150", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511428997462, "pid": 800, "tid": 3, "ts": "1704161511428997.462", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511429727355, "pid": 437675, "tid": 439228, "ts": "1704161511428347.071", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511429727355, "pid": 800, "tid": 3, "ts": "1704161511429727.355", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511429755595, "pid": 437675, "tid": 439228, "ts": "1704161511428347.071", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511429755595, "pid": 800, "tid": 3, "ts": "1704161511429755.595", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511429765475, "pid": 437675, "tid": 439228, "ts": "1704161511428347.071", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511429765475, "pid": 800, "tid": 3, "ts": "1704161511429765.475", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511429778074, "pid": 437675, "tid": 439228, "ts": "1704161511428347.071", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511429778074, "pid": 800, "tid": 3, "ts": "1704161511429778.074", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511429816044, "pid": 437675, "tid": 439228, "ts": "1704161511428347.071", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511429816044, "pid": 800, "tid": 3, "ts": "1704161511429816.044", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511429856244, "pid": 437675, "tid": 439228, "ts": "1704161511428347.071", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511429856244, "pid": 800, "tid": 3, "ts": "1704161511429856.244", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511429880104, "pid": 437675, "tid": 439228, "ts": "1704161511428347.071", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511429880104, "pid": 800, "tid": 3, "ts": "1704161511429880.104", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511429898474, "pid": 437675, "tid": 439228, "ts": "1704161511428347.071", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511429898474, "pid": 800, "tid": 3, "ts": "1704161511429898.474", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511430588746, "pid": 437675, "tid": 439228, "ts": "1704161511428382.811", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511430588746, "pid": 800, "tid": 3, "ts": "1704161511430588.746", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511430622196, "pid": 437675, "tid": 439228, "ts": "1704161511428382.811", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511430622196, "pid": 800, "tid": 3, "ts": "1704161511430622.196", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511430644526, "pid": 437675, "tid": 439228, "ts": "1704161511428382.811", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511430644526, "pid": 800, "tid": 3, "ts": "1704161511430644.526", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511430662726, "pid": 437675, "tid": 439228, "ts": "1704161511428382.811", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511430662726, "pid": 800, "tid": 3, "ts": "1704161511430662.726", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511430704986, "pid": 437675, "tid": 439228, "ts": "1704161511428382.811", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511430704986, "pid": 800, "tid": 3, "ts": "1704161511430704.986", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511430713476, "pid": 437675, "tid": 439228, "ts": "1704161511428382.811", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511430713476, "pid": 800, "tid": 3, "ts": "1704161511430713.476", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511430739235, "pid": 437675, "tid": 439228, "ts": "1704161511428382.811", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511430739235, "pid": 800, "tid": 3, "ts": "1704161511430739.235", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511431052332, "pid": 437675, "tid": 439228, "ts": "1704161511428434.892", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511431052332, "pid": 800, "tid": 3, "ts": "1704161511431052.332", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511431071262, "pid": 437675, "tid": 439228, "ts": "1704161511428434.892", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511431071262, "pid": 800, "tid": 3, "ts": "1704161511431071.262", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511431167751, "pid": 437675, "tid": 439228, "ts": "1704161511428602.694", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511431167751, "pid": 800, "tid": 3, "ts": "1704161511431167.751", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511431208310, "pid": 437675, "tid": 439228, "ts": "1704161511428655.254", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511431208310, "pid": 800, "tid": 3, "ts": "1704161511431208.310", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511431858704, "pid": 437675, "tid": 439228, "ts": "1704161511428703.785", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511431858704, "pid": 800, "tid": 3, "ts": "1704161511431858.704", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511431891734, "pid": 437675, "tid": 439228, "ts": "1704161511428703.785", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511431891734, "pid": 800, "tid": 3, "ts": "1704161511431891.734", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511431914284, "pid": 437675, "tid": 439228, "ts": "1704161511428703.785", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511431914284, "pid": 800, "tid": 3, "ts": "1704161511431914.284", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511431937293, "pid": 437675, "tid": 439228, "ts": "1704161511428703.785", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511431937293, "pid": 800, "tid": 3, "ts": "1704161511431937.293", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511431961483, "pid": 437675, "tid": 439228, "ts": "1704161511428703.785", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511431961483, "pid": 800, "tid": 3, "ts": "1704161511431961.483", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511431986512, "pid": 437675, "tid": 439228, "ts": "1704161511428703.785", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511431986512, "pid": 800, "tid": 3, "ts": "1704161511431986.512", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432001662, "pid": 437675, "tid": 439228, "ts": "1704161511428703.785", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432001662, "pid": 800, "tid": 3, "ts": "1704161511432001.662", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432032482, "pid": 437675, "tid": 439228, "ts": "1704161511428743.655", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432032482, "pid": 800, "tid": 3, "ts": "1704161511432032.482", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432083372, "pid": 437675, "tid": 439228, "ts": "1704161511428819.646", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432083372, "pid": 800, "tid": 3, "ts": "1704161511432083.372", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432109392, "pid": 437675, "tid": 439228, "ts": "1704161511428819.646", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432109392, "pid": 800, "tid": 3, "ts": "1704161511432109.392", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432126562, "pid": 437675, "tid": 439228, "ts": "1704161511428819.646", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432126562, "pid": 800, "tid": 3, "ts": "1704161511432126.562", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432145121, "pid": 437675, "tid": 439228, "ts": "1704161511428819.646", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432145121, "pid": 800, "tid": 3, "ts": "1704161511432145.121", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432179410, "pid": 437675, "tid": 439228, "ts": "1704161511428819.646", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432179410, "pid": 800, "tid": 3, "ts": "1704161511432179.410", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432189400, "pid": 437675, "tid": 439228, "ts": "1704161511428819.646", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432189400, "pid": 800, "tid": 3, "ts": "1704161511432189.400", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432224730, "pid": 437675, "tid": 439228, "ts": "1704161511428819.646", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432224730, "pid": 800, "tid": 3, "ts": "1704161511432224.730", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432260930, "pid": 437675, "tid": 439228, "ts": "1704161511428855.646", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432260930, "pid": 800, "tid": 3, "ts": "1704161511432260.930", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432270310, "pid": 437675, "tid": 439228, "ts": "1704161511428855.646", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432270310, "pid": 800, "tid": 3, "ts": "1704161511432270.310", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432354639, "pid": 437675, "tid": 439228, "ts": "1704161511428953.477", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432354639, "pid": 800, "tid": 3, "ts": "1704161511432354.639", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432386648, "pid": 437675, "tid": 439228, "ts": "1704161511428995.778", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432386648, "pid": 800, "tid": 3, "ts": "1704161511432386.648", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432413758, "pid": 437675, "tid": 437675, "ts": "1704161511429190.490", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432413758, "pid": 800, "tid": 3, "ts": "1704161511432413.758", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432422268, "pid": 437675, "tid": 437675, "ts": "1704161511429220.330", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432422268, "pid": 800, "tid": 3, "ts": "1704161511432422.268", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432434548, "pid": 437675, "tid": 437675, "ts": "1704161511429237.520", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432434548, "pid": 800, "tid": 3, "ts": "1704161511432434.548", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432444538, "pid": 437675, "tid": 437675, "ts": "1704161511429259.120", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432444538, "pid": 800, "tid": 3, "ts": "1704161511432444.538", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432455308, "pid": 437675, "tid": 437675, "ts": "1704161511429284.691", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432455308, "pid": 800, "tid": 3, "ts": "1704161511432455.308", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432496978, "pid": 437675, "tid": 437675, "ts": "1704161511429396.712", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432496978, "pid": 800, "tid": 3, "ts": "1704161511432496.978", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432514998, "pid": 437675, "tid": 437675, "ts": "1704161511429427.092", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432514998, "pid": 800, "tid": 3, "ts": "1704161511432514.998", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432555557, "pid": 437675, "tid": 437675, "ts": "1704161511429503.263", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432555557, "pid": 800, "tid": 3, "ts": "1704161511432555.557", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432579166, "pid": 437675, "tid": 437675, "ts": "1704161511429530.753", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432579166, "pid": 800, "tid": 3, "ts": "1704161511432579.166", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432587416, "pid": 437675, "tid": 437675, "ts": "1704161511429551.083", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432587416, "pid": 800, "tid": 3, "ts": "1704161511432587.416", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432597586, "pid": 437675, "tid": 437675, "ts": "1704161511429566.633", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432597586, "pid": 800, "tid": 3, "ts": "1704161511432597.586", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432606356, "pid": 437675, "tid": 437675, "ts": "1704161511429580.403", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432606356, "pid": 800, "tid": 3, "ts": "1704161511432606.356", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432614986, "pid": 437675, "tid": 437675, "ts": "1704161511429597.614", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432614986, "pid": 800, "tid": 3, "ts": "1704161511432614.986", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432641056, "pid": 437675, "tid": 437675, "ts": "1704161511429657.214", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432641056, "pid": 800, "tid": 3, "ts": "1704161511432641.056", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432654996, "pid": 437675, "tid": 437675, "ts": "1704161511429683.444", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432654996, "pid": 800, "tid": 3, "ts": "1704161511432654.996", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432682936, "pid": 437675, "tid": 437675, "ts": "1704161511429739.045", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432682936, "pid": 800, "tid": 3, "ts": "1704161511432682.936", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432699816, "pid": 437675, "tid": 437675, "ts": "1704161511429764.755", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432699816, "pid": 800, "tid": 3, "ts": "1704161511432699.816", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432715196, "pid": 437675, "tid": 437675, "ts": "1704161511429783.905", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432715196, "pid": 800, "tid": 3, "ts": "1704161511432715.196", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432718256, "pid": 437675, "tid": 437675, "ts": "1704161511429807.856", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432718256, "pid": 800, "tid": 3, "ts": "1704161511432718.256", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432733965, "pid": 437675, "tid": 437675, "ts": "1704161511429826.966", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432733965, "pid": 800, "tid": 3, "ts": "1704161511432733.965", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432740525, "pid": 437675, "tid": 437675, "ts": "1704161511429844.736", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432740525, "pid": 800, "tid": 3, "ts": "1704161511432740.525", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432767615, "pid": 437675, "tid": 437675, "ts": "1704161511429904.687", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432767615, "pid": 800, "tid": 3, "ts": "1704161511432767.615", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432777264, "pid": 437675, "tid": 437675, "ts": "1704161511429930.557", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432777264, "pid": 800, "tid": 3, "ts": "1704161511432777.264", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432797084, "pid": 437675, "tid": 437675, "ts": "1704161511429986.048", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432797084, "pid": 800, "tid": 3, "ts": "1704161511432797.084", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432816684, "pid": 437675, "tid": 437675, "ts": "1704161511430008.578", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432816684, "pid": 800, "tid": 3, "ts": "1704161511432816.684", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432836124, "pid": 437675, "tid": 437675, "ts": "1704161511430021.828", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432836124, "pid": 800, "tid": 3, "ts": "1704161511432836.124", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432837674, "pid": 437675, "tid": 437675, "ts": "1704161511430043.898", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432837674, "pid": 800, "tid": 3, "ts": "1704161511432837.674", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432850164, "pid": 437675, "tid": 437675, "ts": "1704161511430057.468", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432850164, "pid": 800, "tid": 3, "ts": "1704161511432850.164", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432866914, "pid": 437675, "tid": 437675, "ts": "1704161511430073.098", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432866914, "pid": 800, "tid": 3, "ts": "1704161511432866.914", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432894324, "pid": 437675, "tid": 437675, "ts": "1704161511430126.759", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432894324, "pid": 800, "tid": 3, "ts": "1704161511432894.324", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432910874, "pid": 437675, "tid": 437675, "ts": "1704161511430149.279", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432910874, "pid": 800, "tid": 3, "ts": "1704161511432910.874", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432936273, "pid": 437675, "tid": 437675, "ts": "1704161511430198.880", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432936273, "pid": 800, "tid": 3, "ts": "1704161511432936.273", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432951583, "pid": 437675, "tid": 437675, "ts": "1704161511430231.680", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432951583, "pid": 800, "tid": 3, "ts": "1704161511432951.583", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432964953, "pid": 437675, "tid": 437675, "ts": "1704161511430245.440", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432964953, "pid": 800, "tid": 3, "ts": "1704161511432964.953", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432970373, "pid": 437675, "tid": 437675, "ts": "1704161511430263.740", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432970373, "pid": 800, "tid": 3, "ts": "1704161511432970.373", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432984052, "pid": 437675, "tid": 437675, "ts": "1704161511430277.350", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432984052, "pid": 800, "tid": 3, "ts": "1704161511432984.052", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511432994212, "pid": 437675, "tid": 437675, "ts": "1704161511430293.391", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511432994212, "pid": 800, "tid": 3, "ts": "1704161511432994.212", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433011992, "pid": 437675, "tid": 437675, "ts": "1704161511430348.241", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433011992, "pid": 800, "tid": 3, "ts": "1704161511433011.992", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433025252, "pid": 437675, "tid": 437675, "ts": "1704161511430372.141", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433025252, "pid": 800, "tid": 3, "ts": "1704161511433025.252", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433045072, "pid": 437675, "tid": 437675, "ts": "1704161511430424.952", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433045072, "pid": 800, "tid": 3, "ts": "1704161511433045.072", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433080302, "pid": 437675, "tid": 437675, "ts": "1704161511430447.902", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433080302, "pid": 800, "tid": 3, "ts": "1704161511433080.302", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433092932, "pid": 437675, "tid": 437675, "ts": "1704161511430461.112", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433092932, "pid": 800, "tid": 3, "ts": "1704161511433092.932", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433094642, "pid": 437675, "tid": 437675, "ts": "1704161511430478.862", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433094642, "pid": 800, "tid": 3, "ts": "1704161511433094.642", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433121742, "pid": 437675, "tid": 437675, "ts": "1704161511430491.463", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433121742, "pid": 800, "tid": 3, "ts": "1704161511433121.742", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433125872, "pid": 437675, "tid": 437675, "ts": "1704161511430507.133", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433125872, "pid": 800, "tid": 3, "ts": "1704161511433125.872", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433132671, "pid": 437675, "tid": 437675, "ts": "1704161511430557.473", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433132671, "pid": 800, "tid": 3, "ts": "1704161511433132.671", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433139371, "pid": 437675, "tid": 437675, "ts": "1704161511430579.823", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433139371, "pid": 800, "tid": 3, "ts": "1704161511433139.371", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433161761, "pid": 437675, "tid": 437675, "ts": "1704161511430628.544", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433161761, "pid": 800, "tid": 3, "ts": "1704161511433161.761", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433183990, "pid": 437675, "tid": 437675, "ts": "1704161511430650.654", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433183990, "pid": 800, "tid": 3, "ts": "1704161511433183.990", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433220980, "pid": 437675, "tid": 437675, "ts": "1704161511430663.114", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433220980, "pid": 800, "tid": 3, "ts": "1704161511433220.980", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433223620, "pid": 437675, "tid": 437675, "ts": "1704161511430682.284", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433223620, "pid": 800, "tid": 3, "ts": "1704161511433223.620", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433233730, "pid": 437675, "tid": 437675, "ts": "1704161511430696.425", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433233730, "pid": 800, "tid": 3, "ts": "1704161511433233.730", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433239550, "pid": 437675, "tid": 437675, "ts": "1704161511430712.515", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433239550, "pid": 800, "tid": 3, "ts": "1704161511433239.550", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433248110, "pid": 437675, "tid": 437675, "ts": "1704161511430763.255", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433248110, "pid": 800, "tid": 3, "ts": "1704161511433248.110", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433263850, "pid": 437675, "tid": 437675, "ts": "1704161511430784.465", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433263850, "pid": 800, "tid": 3, "ts": "1704161511433263.850", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433292040, "pid": 437675, "tid": 437675, "ts": "1704161511430835.566", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433292040, "pid": 800, "tid": 3, "ts": "1704161511433292.040", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433310380, "pid": 437675, "tid": 437675, "ts": "1704161511430856.916", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433310380, "pid": 800, "tid": 3, "ts": "1704161511433310.380", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433324640, "pid": 437675, "tid": 437675, "ts": "1704161511430868.846", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433324640, "pid": 800, "tid": 3, "ts": "1704161511433324.640", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433326230, "pid": 437675, "tid": 437675, "ts": "1704161511430882.316", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433326230, "pid": 800, "tid": 3, "ts": "1704161511433326.230", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433335009, "pid": 437675, "tid": 437675, "ts": "1704161511430898.757", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433335009, "pid": 800, "tid": 3, "ts": "1704161511433335.009", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433352219, "pid": 437675, "tid": 437675, "ts": "1704161511430912.737", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433352219, "pid": 800, "tid": 3, "ts": "1704161511433352.219", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433362289, "pid": 437675, "tid": 437675, "ts": "1704161511430962.007", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433362289, "pid": 800, "tid": 3, "ts": "1704161511433362.289", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433384498, "pid": 437675, "tid": 437675, "ts": "1704161511430983.637", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433384498, "pid": 800, "tid": 3, "ts": "1704161511433384.498", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433402698, "pid": 437675, "tid": 437675, "ts": "1704161511431030.818", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433402698, "pid": 800, "tid": 3, "ts": "1704161511433402.698", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433421908, "pid": 437675, "tid": 437675, "ts": "1704161511431052.738", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433421908, "pid": 800, "tid": 3, "ts": "1704161511433421.908", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433441418, "pid": 437675, "tid": 437675, "ts": "1704161511431065.368", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433441418, "pid": 800, "tid": 3, "ts": "1704161511433441.418", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433443248, "pid": 437675, "tid": 437675, "ts": "1704161511431078.608", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433443248, "pid": 800, "tid": 3, "ts": "1704161511433443.248", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433454408, "pid": 437675, "tid": 437675, "ts": "1704161511431099.609", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433454408, "pid": 800, "tid": 3, "ts": "1704161511433454.408", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433465338, "pid": 437675, "tid": 437675, "ts": "1704161511431115.269", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433465338, "pid": 800, "tid": 3, "ts": "1704161511433465.338", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433482728, "pid": 437675, "tid": 437675, "ts": "1704161511431168.769", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433482728, "pid": 800, "tid": 3, "ts": "1704161511433482.728", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433505428, "pid": 437675, "tid": 437675, "ts": "1704161511431191.670", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433505428, "pid": 800, "tid": 3, "ts": "1704161511433505.428", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433525048, "pid": 437675, "tid": 437675, "ts": "1704161511431243.120", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433525048, "pid": 800, "tid": 3, "ts": "1704161511433525.048", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433546537, "pid": 437675, "tid": 437675, "ts": "1704161511431266.000", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433546537, "pid": 800, "tid": 3, "ts": "1704161511433546.537", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433554497, "pid": 437675, "tid": 437675, "ts": "1704161511431279.690", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433554497, "pid": 800, "tid": 3, "ts": "1704161511433554.497", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433564967, "pid": 437675, "tid": 437675, "ts": "1704161511431292.811", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433564967, "pid": 800, "tid": 3, "ts": "1704161511433564.967", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433574217, "pid": 437675, "tid": 437675, "ts": "1704161511431308.501", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433574217, "pid": 800, "tid": 3, "ts": "1704161511433574.217", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433582726, "pid": 437675, "tid": 437675, "ts": "1704161511431323.381", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433582726, "pid": 800, "tid": 3, "ts": "1704161511433582.726", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433609286, "pid": 437675, "tid": 437675, "ts": "1704161511431374.301", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433609286, "pid": 800, "tid": 3, "ts": "1704161511433609.286", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433623296, "pid": 437675, "tid": 437675, "ts": "1704161511431395.582", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433623296, "pid": 800, "tid": 3, "ts": "1704161511433623.296", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433649866, "pid": 437675, "tid": 437675, "ts": "1704161511431442.142", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433649866, "pid": 800, "tid": 3, "ts": "1704161511433649.866", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433666176, "pid": 437675, "tid": 437675, "ts": "1704161511431534.493", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433666176, "pid": 800, "tid": 3, "ts": "1704161511433666.176", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433686786, "pid": 437675, "tid": 437675, "ts": "1704161511431573.203", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433686786, "pid": 800, "tid": 3, "ts": "1704161511433686.786", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433691646, "pid": 437675, "tid": 437675, "ts": "1704161511431573.203", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433691646, "pid": 800, "tid": 3, "ts": "1704161511433691.646", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433701196, "pid": 437675, "tid": 437675, "ts": "1704161511431594.914", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433701196, "pid": 800, "tid": 3, "ts": "1704161511433701.196", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433725596, "pid": 437675, "tid": 437675, "ts": "1704161511431704.105", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433725596, "pid": 800, "tid": 3, "ts": "1704161511433725.596", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433787454, "pid": 437675, "tid": 437675, "ts": "1704161511431704.105", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433787454, "pid": 800, "tid": 3, "ts": "1704161511433787.454", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433794264, "pid": 437675, "tid": 437675, "ts": "1704161511431704.105", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433794264, "pid": 800, "tid": 3, "ts": "1704161511433794.264", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511433816104, "pid": 437675, "tid": 437675, "ts": "1704161511431704.105", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511433816104, "pid": 800, "tid": 3, "ts": "1704161511433816.104", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511434018972, "pid": 437675, "tid": 437675, "ts": "1704161511431757.055", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511434018972, "pid": 800, "tid": 3, "ts": "1704161511434018.972", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511434169621, "pid": 437675, "tid": 437675, "ts": "1704161511431778.495", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511434169621, "pid": 800, "tid": 3, "ts": "1704161511434169.621", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511434171181, "pid": 437675, "tid": 437675, "ts": "1704161511431820.886", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511434171181, "pid": 800, "tid": 3, "ts": "1704161511434171.181", "cat": "async_npu"}, {"ph": "s", "bp": "e", "name": "torch_to_npu", "id": 1704161511434172701, "pid": 437675, "tid": 437675, "ts": "1704161511431840.796", "cat": "async_npu"}, {"ph": "f", "bp": "e", "name": "torch_to_npu", "id": 1704161511434172701, "pid": 800, "tid": 3, "ts": "1704161511434172.701", "cat": "async_npu"}] \ No newline at end of file diff --git a/profiler/test/ut/cluster_analyse/common_func/test_path_manager.py b/profiler/test/ut/cluster_analyse/common_func/test_path_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..8693ed33de20a5f85c352259dcedc839b4b696d3 --- /dev/null +++ b/profiler/test/ut/cluster_analyse/common_func/test_path_manager.py @@ -0,0 +1,107 @@ +import unittest +import os +import time +import pytest + +from common_func.path_manager import PathManager + + +PATH_DIR = "resource" +PATH_FILE = "resource/test.csv" +PATH_TEMP = "temp" + + +class TestPathManager(unittest.TestCase): + + def test_check_input_directory_path(self): + with pytest.raises(RuntimeError) as error: + PathManager.check_input_directory_path(PATH_FILE) + PathManager.check_input_directory_path(PATH_DIR) + + def test_check_input_file_path(self): + with pytest.raises(RuntimeError) as error: + PathManager.check_input_file_path(PATH_DIR) + PathManager.check_input_file_path(PATH_FILE) + + def test_check_path_length(self): + path_max = "a" * 4097 + name_max = "a" * 257 + path_with_name_max = "a/" + name_max + with pytest.raises(RuntimeError) as error: + PathManager.check_input_directory_path(path_max) + with pytest.raises(RuntimeError) as error: + PathManager.check_input_directory_path(path_with_name_max) + PathManager.check_path_length(PATH_FILE) + + def test_input_path_common_check(self): + path_max = "a" * 4097 + name_max = "a" * 257 + path_with_name_max = "a/" + name_max + with pytest.raises(RuntimeError) as error: + PathManager.input_path_common_check(path_max) + with pytest.raises(RuntimeError) as error: + PathManager.input_path_common_check(path_with_name_max) + with pytest.raises(RuntimeError) as error: + PathManager.input_path_common_check(PATH_DIR + "!@~#$%") + PathManager.input_path_common_check(PATH_FILE) + + def test_check_path_owner_consistent(self): + PathManager.check_path_owner_consistent(PATH_DIR) + + def test_check_path_writeable(self): + link_name = "test_link" + str(time.time()) + os.symlink(PATH_FILE, link_name) + with pytest.raises(RuntimeError) as error: + PathManager.check_path_writeable(link_name) + PathManager.check_path_writeable(PATH_DIR) + os.unlink(link_name) + + def test_check_path_readable(self): + link_name = "test_link" + str(time.time()) + os.symlink(PATH_FILE, link_name) + with pytest.raises(RuntimeError) as error: + PathManager.check_path_readable(link_name) + PathManager.check_path_readable(PATH_DIR) + os.unlink(link_name) + + def test_remove_path_safety(self): + path = PATH_TEMP + str(time.time()) + os.makedirs(path) + link_name = "test_link" + str(time.time()) + os.symlink(PATH_FILE, link_name) + with pytest.raises(RuntimeError) as error: + PathManager.remove_path_safety(link_name) + PathManager.remove_path_safety(path + "not_exist") + PathManager.remove_path_safety(path) + os.unlink(link_name) + + def test_make_dir_safety(self): + path = PATH_TEMP + str(time.time()) + link_name = "test_link" + str(time.time()) + os.symlink(PATH_FILE, link_name) + with pytest.raises(RuntimeError) as error: + PathManager.make_dir_safety(link_name) + PathManager.make_dir_safety(path) + os.removedirs(path) + os.unlink(link_name) + + def test_create_file_safety(self): + path = PATH_TEMP + str(time.time()) + link_name = "test_link" + str(time.time()) + os.symlink(PATH_FILE, link_name) + with pytest.raises(RuntimeError) as error: + PathManager.create_file_safety(link_name) + PathManager.create_file_safety(path) + os.remove(path) + os.unlink(link_name) + + def test_get_realpath(self): + path = PATH_TEMP + str(time.time()) + real_path = PathManager.get_realpath(path) + link_name = "test_link" + str(time.time()) + os.symlink(PATH_FILE, link_name) + with pytest.raises(RuntimeError) as error: + PathManager.get_realpath(link_name) + self.assertTrue(real_path.endswith(path)) + os.unlink(link_name) + diff --git a/profiler/test/ut/cluster_analyse/prof_bean/test_step_trace_time_bean.py b/profiler/test/ut/cluster_analyse/prof_bean/test_step_trace_time_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..e369df48421f0bcc50017e6f03583771c29ea076 --- /dev/null +++ b/profiler/test/ut/cluster_analyse/prof_bean/test_step_trace_time_bean.py @@ -0,0 +1,13 @@ +import unittest + +from prof_bean.step_trace_time_bean import StepTraceTimeBean + + +class TestStepTraceTimeBean(unittest.TestCase): + + def test(self): + data = {"Step": 0, "Attr1": 1, "Attr2": 2} + bean = StepTraceTimeBean(data) + self.assertEqual(bean.row, [1.0, 2.0]) + self.assertEqual(bean.step, 0) + self.assertEqual(bean.all_headers, ['Step', 'Type', 'Index', 'Attr1', 'Attr2']) diff --git a/profiler/test/ut/compare_tools/__init__.py b/profiler/test/ut/compare_tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/test/ut/compare_tools/comparator/test_communication_comparator.py b/profiler/test/ut/compare_tools/comparator/test_communication_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..cd40453e1922173e1a5b43f900331081d17e9f91 --- /dev/null +++ b/profiler/test/ut/compare_tools/comparator/test_communication_comparator.py @@ -0,0 +1,40 @@ +import unittest + +from comparator.communication_comparator import CommunicationComparator +from compare_bean.communication_bean import CommunicationBean + + +class TestCommunicationComparator(unittest.TestCase): + ORIGIN_DATA = { + "base_data": { + "allreduce": {"comm_list": [0.5, 7], "comm_task": {"Notify Wait": [1, 2, 3], "Memcpy": [5]}}, + "allgather": {"comm_list": [1, 4], "comm_task": {}} + }, + "comparison_data": { + "allreduce": {"comm_list": [4, 5], "comm_task": {"Notify Wait": [1, 2, 3]}}, + "gather": {"comm_list": [1], "comm_task": {"Notify Wait": [1, 2, 3]}} + } + } + RESULT_DATA = [[1, 'allreduce', None, 2, 7.5, 3.75, 7, 0.5, 'allreduce', None, 2, 9, 4.5, 5, 4, 1.5, 1.2], + [2, '|', 'Notify Wait', 3, 6, 2.0, 3, 1, '|', 'Notify Wait', 3, 6, 2.0, 3, 1, None, None], + [3, '|', 'Memcpy', 1, 5, 5.0, 5, 5, None, None, None, 0, None, None, None, None, None], + [4, 'allgather', None, 2, 5, 2.5, 4, 1, None, None, None, 0, None, None, None, -5, 0.0], + [5, None, None, None, 0, None, None, None, 'gather', None, 1, 1, 1.0, 1, 1, 1, float('inf')], + [6, None, None, None, 0, None, None, None, '|', 'Notify Wait', 3, 6, 2.0, 3, 1, None, None]] + + def test_compare_when_valid_data(self): + comm_comparator = CommunicationComparator(self.ORIGIN_DATA, CommunicationBean) + comm_comparator._compare() + self.assertEqual(comm_comparator._rows, self.RESULT_DATA) + + def test_compare_when_invalid_data(self): + comm_comparator = CommunicationComparator({}, CommunicationBean) + comm_comparator._compare() + self.assertEqual(comm_comparator._rows, []) + + def test_compare_when_invalid_base_data(self): + data = {"comparison_data": {"allreduce": {"comm_list": [4, 5], "comm_task": {}}}} + result = [[1, None, None, None, 0, None, None, None, 'allreduce', None, 2, 9, 4.5, 5, 4, 9, float('inf')]] + comm_comparator = CommunicationComparator(data, CommunicationBean) + comm_comparator._compare() + self.assertEqual(comm_comparator._rows, result) diff --git a/profiler/test/ut/compare_tools/comparator/test_operator_comparator.py b/profiler/test/ut/compare_tools/comparator/test_operator_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..c26a308ae1119f30368ad9394c7580a9df1702b0 --- /dev/null +++ b/profiler/test/ut/compare_tools/comparator/test_operator_comparator.py @@ -0,0 +1,32 @@ +import unittest + +from comparator.operator_comparator import OperatorComparator + + +class MockBean: + TABLE_NAME = "TEST" + HEADERS = ["INDEX", "VALUE1", "VALUE2"] + OVERHEAD = [] + + def __init__(self, index, base_op, comparison_op): + self._index = index + self._base_op = base_op + self._comparison_op = comparison_op + + @property + def row(self): + return [self._index + 1, 1, 1] + + +class TestOperatorComparator(unittest.TestCase): + def test_compare_when_valid_data(self): + data = [[1, 1]] * 3 + result = [[1, 1, 1], [2, 1, 1], [3, 1, 1]] + comparator = OperatorComparator(data, MockBean) + comparator._compare() + self.assertEqual(comparator._rows, result) + + def test_compare_when_invalid_data(self): + comparator = OperatorComparator({}, MockBean) + comparator._compare() + self.assertEqual(comparator._rows, []) diff --git a/profiler/test/ut/compare_tools/comparator/test_operator_statistic_comparator.py b/profiler/test/ut/compare_tools/comparator/test_operator_statistic_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..75f2da77f6c851bb6eb27d58f0844b45fe094157 --- /dev/null +++ b/profiler/test/ut/compare_tools/comparator/test_operator_statistic_comparator.py @@ -0,0 +1,53 @@ +import unittest +from unittest.mock import patch + +from comparator.operator_statistic_comparator import OperatorStatisticComparator + + +class MockBean: + TABLE_NAME = "TEST" + HEADERS = ["INDEX", "VALUE1", "VALUE2"] + OVERHEAD = [] + + def __init__(self, name, base_data, comparison_data): + self._name = name + self._base_data = 0 if not base_data else 1 + self._comparison_data = 0 if not comparison_data else 1 + + @property + def row(self): + return [self._name, self._base_data, self._comparison_data] + + +class TestOperatorStatisticComparator(unittest.TestCase): + def test_compare_when_valid_data(self): + base_dict = {"add": [1], "matmul": [1]} + comparison_dict = {"add": [1], "reduce": [1]} + with patch("comparator.operator_statistic_comparator.OperatorStatisticComparator._group_by_op_name", + return_value=(base_dict, comparison_dict)): + comparator = OperatorStatisticComparator({1: 2}, MockBean) + comparator._compare() + self.assertEqual(comparator._rows, [[1, 1, 1], [2, 1, 0], [3, 0, 1]]) + + def test_compare_when_invalid_data(self): + comparator = OperatorStatisticComparator({}, MockBean) + comparator._compare() + self.assertEqual(comparator._rows, []) + + def test_group_by_op_name_when_valid_data(self): + class Node: + def __init__(self, name): + self.name = name + + data = [[Node("add"), Node("add")], [None, Node("reduce")], [Node("matmul"), None], + [Node("matmul"), Node("matmul")], [Node("reduce"), Node("reduce")]] + comparator = OperatorStatisticComparator(data, MockBean) + base_dict, comparison_dict = comparator._group_by_op_name() + self.assertEqual(len(base_dict.get("matmul")), 2) + self.assertEqual(len(comparison_dict.get("reduce")), 2) + + def test_group_by_op_name_when_invalid_data(self): + comparator = OperatorStatisticComparator([], MockBean) + base_dict, comparison_dict = comparator._group_by_op_name() + self.assertEqual(base_dict, {}) + self.assertEqual(comparison_dict, {}) diff --git a/profiler/test/ut/compare_tools/compare_bean/__init__.py b/profiler/test/ut/compare_tools/compare_bean/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/__init__.py b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_compare_event.py b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_compare_event.py new file mode 100644 index 0000000000000000000000000000000000000000..356895f1742ec11e6ac60b66083ec99b642767b9 --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_compare_event.py @@ -0,0 +1,24 @@ +import unittest + +from compare_bean.origin_data_bean.compare_event import KernelEvent, MemoryEvent +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean + + +class TestKernelEvent(unittest.TestCase): + event = {"name": "Matmul", "dur": 5, "args": {"Task Id": 5, "Task Type": "AI_CORE"}} + + def test_kernel_details_when_gpu_type(self): + kernel = KernelEvent(TraceEventBean(self.event), "GPU") + self.assertEqual(kernel.kernel_details, "Matmul [duration: 5.0]\n") + + def test_kernel_details_when_npu_type(self): + kernel = KernelEvent(TraceEventBean(self.event), "NPU") + self.assertEqual(kernel.kernel_details, "Matmul, 5, AI_CORE [duration: 5.0]\n") + + +class TestMemoryEvent(unittest.TestCase): + event = {"Size(KB)": 512, "ts": 1, "Allocation Time(us)": 1, "Release Time(us)": 5, "Name": "aten::add"} + + def test_memory_details(self): + memory = MemoryEvent(self.event) + self.assertEqual(memory.memory_details, 'aten::add, (1, 5), [duration: 4.0], [size: 512]\n') diff --git a/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..78e8e3c39cd9f1b3ff99e030ae59ffa6582b0b8e --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py @@ -0,0 +1,51 @@ +import unittest + +from compare_bean.origin_data_bean.kernel_details_bean import KernelDetailsBean + + +class TestKernelDetailsBean(unittest.TestCase): + kernel_bean1 = KernelDetailsBean( + {'Type': "memcopy", 'Name': "aclnninplacecopy_tensormove", 'aiv_vec_time(us)': "N/A", 'mac_time(us)': 5.7, + 'Duration(us)': 5}) + kernel_bean2 = KernelDetailsBean({'Type': "matmul", 'Name': "matmul", 'Duration(us)': 5}) + kernel_bean3 = KernelDetailsBean( + {'Type': "Add", 'Name': "Add", 'aiv_vec_time(us)': 1.2, 'mac_time(us)': 5.7, 'Duration(us)': 5}) + kernel_bean4 = KernelDetailsBean( + {'Type': "flashattention_bwd_grad", 'Name': "flashattention", 'mac_time(us)': 0, 'Duration(us)': 5}) + + @staticmethod + def _get_property_str(bean: KernelDetailsBean): + return f"{bean.name}-{bean.op_type}-{bean.duration}-{bean.aiv_vec_time}-{bean.mac_time}" + + def test_property(self): + self.assertEqual(self._get_property_str(self.kernel_bean2), "matmul-matmul-5.0-nan-nan") + self.assertEqual(self._get_property_str(self.kernel_bean3), "Add-Add-5.0-1.2-5.7") + + def test_is_hide_op_pmu(self): + self.assertTrue(self.kernel_bean2.is_hide_op_pmu()) + self.assertFalse(self.kernel_bean1.is_hide_op_pmu()) + + def test_is_vector(self): + self.assertTrue(self.kernel_bean3.is_vector()) + self.assertTrue(self.kernel_bean4.is_vector()) + self.assertFalse(self.kernel_bean1.is_vector()) + + def test_is_invalid(self): + self.assertTrue(self.kernel_bean2.is_invalid()) + self.assertFalse(self.kernel_bean1.is_invalid()) + + def test_is_fa_bwd(self): + self.assertTrue(self.kernel_bean4.is_fa_bwd()) + self.assertFalse(self.kernel_bean1.is_fa_bwd()) + + def test_is_sdma(self): + self.assertTrue(self.kernel_bean1.is_sdma()) + self.assertFalse(self.kernel_bean2.is_sdma()) + + def test_is_flash_attention(self): + self.assertTrue(self.kernel_bean4.is_flash_attention()) + self.assertFalse(self.kernel_bean2.is_flash_attention()) + + def test_is_cube(self): + self.assertTrue(self.kernel_bean2.is_cube()) + self.assertFalse(self.kernel_bean3.is_cube()) diff --git a/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_memory_record_bean.py b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_memory_record_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..30d40a48d16e664dbba5292a7162802a026e64e4 --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_memory_record_bean.py @@ -0,0 +1,9 @@ +import unittest + +from compare_bean.origin_data_bean.memory_record_bean import MemoryRecordBean + + +class TestMemoryRecordBean(unittest.TestCase): + def test_total_reserved_mb(self): + self.assertEqual(MemoryRecordBean({"Total Reserved(MB)": 5}).total_reserved_mb, 5) + self.assertEqual(MemoryRecordBean({}).total_reserved_mb, 0) diff --git a/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_operator_memory_bean.py b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_operator_memory_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..48175153c3391c79cf6cfd6e74e0b2e6c3d2e7b3 --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_operator_memory_bean.py @@ -0,0 +1,20 @@ +import unittest + +from compare_bean.origin_data_bean.operator_memory_bean import OperatorMemoryBean + + +class TestOperatorMemoryBean(unittest.TestCase): + bean1 = OperatorMemoryBean({"Name": "cann::add", "Size(KB)": 512, "Allocation Time(us)": 1, "Release Time(us)": 5}) + bean2 = OperatorMemoryBean({"Name": "aten::add", "Size(KB)": 512}) + + @staticmethod + def _get_property_str(bean: OperatorMemoryBean): + return f"{bean.name}-{bean.size}-{bean.allocation_time}-{bean.release_time}" + + def test_property(self): + self.assertEqual(self._get_property_str(self.bean1), "cann::add-512.0-1-5") + self.assertEqual(self._get_property_str(self.bean2), "aten::add-512.0-0-0") + + def test_is_cann_op(self): + self.assertTrue(self.bean1.is_cann_op()) + self.assertFalse(self.bean2.is_cann_op()) diff --git a/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_trace_event_bean.py b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_trace_event_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..8f42ff04c9bd896a39f6a45cd867579a00f257b8 --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_trace_event_bean.py @@ -0,0 +1,114 @@ +import unittest + +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean + + +class TestTraceEventBean(unittest.TestCase): + + def test_property(self): + data = {"pid": 1, "tid": 1, "ts": 1, "dur": 2, "ph": "X", "cat": "CPU_OP", "name": "Add", "args": {}} + event = TraceEventBean(data) + check_value = f"{event.pid}-{event.tid}-{event.dur}-{event.start_time}-{event.end_time}-{event.name}-" \ + f"{event.lower_name}-{event.lower_cat}-{event.id}-{event.args}-{event.process_name}" + self.assertEqual(check_value, "1-1-2.0-1-3-Add-add-cpu_op-None-{}-") + none_property_list = [event.stream_id, event.stream, event.task_type, event.task_id, event.corr_id, event.addr] + for property_value in none_property_list: + self.assertEqual(property_value, None) + self.assertEqual(event.device_id, -1) + self.assertEqual(event.total_reserved, 0) + self.assertEqual(event.bytes_kb, 0) + + def test_is_m_mode(self) -> bool: + self.assertTrue(TraceEventBean({"ph": "M"}).is_m_mode()) + self.assertFalse(TraceEventBean({"ph": "X"}).is_m_mode()) + + def test_is_x_mode(self) -> bool: + self.assertTrue(TraceEventBean({"ph": "X"}).is_x_mode()) + self.assertFalse(TraceEventBean({"ph": "M"}).is_x_mode()) + + def test_is_flow_start(self) -> bool: + self.assertTrue(TraceEventBean({"ph": "s"}).is_flow_start()) + self.assertFalse(TraceEventBean({"ph": "f"}).is_flow_start()) + + def test_is_flow_end(self) -> bool: + self.assertTrue(TraceEventBean({"ph": "f"}).is_flow_end()) + self.assertFalse(TraceEventBean({"ph": "s"}).is_flow_end()) + + def test_is_enqueue(self) -> bool: + self.assertTrue(TraceEventBean({"cat": "Enqueue"}).is_enqueue()) + self.assertFalse(TraceEventBean({"cat": "cpu_op"}).is_enqueue()) + + def test_is_dequeue(self) -> bool: + self.assertTrue(TraceEventBean({"cat": "Dequeue"}).is_dequeue()) + self.assertFalse(TraceEventBean({"cat": "cpu_op"}).is_dequeue()) + + def test_is_process_meta(self) -> bool: + self.assertTrue(TraceEventBean({"ph": "M", "name": "process_name"}).is_process_meta()) + self.assertFalse(TraceEventBean({"cat": "cpu_op"}).is_process_meta()) + + def test_is_thread_meta(self) -> bool: + self.assertTrue(TraceEventBean({"ph": "M", "name": "thread_name"}).is_thread_meta()) + self.assertFalse(TraceEventBean({"cat": "cpu_op"}).is_thread_meta()) + + def test_is_communication_op_thread(self) -> bool: + self.assertTrue(TraceEventBean({"args": {"name": "Communication1"}}).is_communication_op_thread()) + self.assertFalse(TraceEventBean({"args": {"name": "add"}}).is_communication_op_thread()) + + def test_is_hccl_process_name(self) -> bool: + self.assertTrue(TraceEventBean({"args": {"name": "HCCL"}}).is_hccl_process_name()) + self.assertFalse(TraceEventBean({"args": {"name": "Ascend Hardware"}}).is_hccl_process_name()) + + def test_is_overlap_process_name(self) -> bool: + self.assertTrue(TraceEventBean({"args": {"name": "Overlap Analysis"}}).is_overlap_process_name()) + self.assertFalse(TraceEventBean({"args": {"name": "Ascend Hardware"}}).is_overlap_process_name()) + + def test_is_npu_process_name(self) -> bool: + self.assertTrue(TraceEventBean({"args": {"name": "Ascend Hardware"}}).is_npu_process_name()) + self.assertFalse(TraceEventBean({"args": {"name": "Ascend"}}).is_npu_process_name()) + + def test_is_computing_event(self): + self.assertTrue(TraceEventBean({"name": "Computing"}).is_computing_event()) + self.assertFalse(TraceEventBean({"name": "add"}).is_computing_event()) + + def test_is_comm_not_overlap(self): + self.assertTrue(TraceEventBean({"name": "Communication(Not Overlapped)"}).is_comm_not_overlap()) + self.assertFalse(TraceEventBean({"name": "add"}).is_comm_not_overlap()) + + def test_is_dict(self): + self.assertTrue(TraceEventBean({}).is_dict()) + self.assertFalse(TraceEventBean([]).is_dict()) + + def test_is_kernel_cat(self): + self.assertTrue(TraceEventBean({"cat": "Kernel"}).is_kernel_cat()) + self.assertFalse(TraceEventBean({"cat": "cpu_op"}).is_kernel_cat()) + + def test_is_nccl_name(self): + self.assertTrue(TraceEventBean({"name": "ncclkernel"}).is_nccl_name()) + self.assertFalse(TraceEventBean({"name": "add"}).is_nccl_name()) + + def test_is_kernel_except_nccl(self): + self.assertTrue(TraceEventBean({"cat": "Kernel", "name": "add"}).is_kernel_except_nccl()) + self.assertFalse(TraceEventBean({"cat": "Kernel", "name": "ncclkernel"}).is_kernel_except_nccl()) + + def test_is_memory_event(self): + self.assertTrue(TraceEventBean({"name": "[memory]", "args": {"Device Id": 1}}).is_memory_event()) + self.assertFalse(TraceEventBean({"name": "[memory]"}).is_memory_event()) + + def test_is_compute_event(self): + for task_type in ('AI_CORE', 'MIX_AIC', 'MIX_AIV', 'AI_CPU', 'AI_VECTOR_CORE', 'FFTS_PLUS'): + self.assertTrue(TraceEventBean({"name": "add", "args": {"Task Type": task_type}}).is_compute_event()) + self.assertFalse(TraceEventBean({"name": "[memory]"}).is_compute_event()) + + def test_is_sdma_event(self): + for task_type in ('SDMA_SQE', 'PCIE_DMA_SQE'): + self.assertTrue(TraceEventBean({"name": "add", "args": {"Task Type": task_type}}).is_sdma_event()) + self.assertFalse(TraceEventBean({"name": "[memory]"}).is_sdma_event()) + + def test_is_event_wait(self): + self.assertTrue(TraceEventBean({"name": "add", "args": {"Task Type": 'EVENT_WAIT_SQE'}}).is_event_wait()) + self.assertFalse(TraceEventBean({"name": "[memory]"}).is_event_wait()) + + def is_backward(self): + self.assertTrue(TraceEventBean({"name": "add_bwd"}).is_event_wait()) + self.assertTrue(TraceEventBean({"name": "add_backward"}).is_event_wait()) + self.assertFalse(TraceEventBean({"name": "[memory]"}).is_event_wait()) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_communication_bean.py b/profiler/test/ut/compare_tools/compare_bean/test_communication_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..02605d85b18b8d46cad3323d6dfc39d3e2a1581f --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_communication_bean.py @@ -0,0 +1,15 @@ +import unittest + +from compare_bean.communication_bean import CommunicationBean + + +class TestCommunicationBean(unittest.TestCase): + def test_rows_when_valid_data(self): + base_data = {"comm_list": [0.5, 7], "comm_task": {"Notify Wait": [1, 2, 3]}} + comparison_data = {"comm_list": [1, 3, 5], "comm_task": {"Notify Wait": [1, 2, 3], "Memcpy": [5]}} + result = [[None, 'allreduce', None, 2, 7.5, 3.75, 7, 0.5, 'allreduce', None, 3, 9, 3.0, 5, 1, 1.5, 1.2], + [None, '|', 'Notify Wait', 3, 6, 2.0, 3, 1, '|', 'Notify Wait', 3, 6, 2.0, 3, 1, None, None], + [None, None, None, None, 0, None, None, None, '|', 'Memcpy', 1, 5, 5.0, 5, 5, None, None]] + + comm = CommunicationBean("allreduce", base_data, comparison_data) + self.assertEqual(comm.rows, result) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_memory_compare_bean.py b/profiler/test/ut/compare_tools/compare_bean/test_memory_compare_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..1db3dbee3d6eae3c576bba7d0649fa2ff4b7b16b --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_memory_compare_bean.py @@ -0,0 +1,39 @@ +import unittest +from unittest.mock import patch + +from compare_bean.memory_compare_bean import MemoryCompareBean + + +class MockNode: + def __init__(self, name): + self.name = name + self.input_shape = None + self.input_type = None + + +class MockMemory: + def __init__(self, size): + self.size = size + self.memory_details = "add" + + +class TestMemoryCompareBean(unittest.TestCase): + name = 'aten::add' + + def test_row_when_valid_data(self): + result = [2, self.name, None, None, 'add', 8, self.name, None, None, 'add', 8, 0, 1.0] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", return_value=[MockMemory(8)]): + mem = MemoryCompareBean(1, MockNode(self.name), MockNode(self.name)) + self.assertEqual(mem.row, result) + + def test_row_when_invalid_base_data(self): + result = [2, None, None, None, "", 0, self.name, None, None, 'add', 8, 8, float("inf")] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", return_value=[MockMemory(8)]): + mem = MemoryCompareBean(1, None, MockNode(self.name)) + self.assertEqual(mem.row, result) + + def test_row_when_invalid_comparison_data(self): + result = [2, self.name, None, None, 'add', 8, None, None, None, '', 0, -8, 0] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", return_value=[MockMemory(8)]): + mem = MemoryCompareBean(1, MockNode(self.name), None) + self.assertEqual(mem.row, result) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_memory_statistic_bean.py b/profiler/test/ut/compare_tools/compare_bean/test_memory_statistic_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..5e5daee1bf2eede69957f9188a1c48c0f8bb537b --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_memory_statistic_bean.py @@ -0,0 +1,35 @@ +import unittest +from unittest.mock import patch + +from compare_bean.memory_statistic_bean import MemoryStatisticBean + + +class MockMemory: + def __init__(self, size, duration): + self.size = size + self.duration = duration + + +class TestMemoryStatisticBean(unittest.TestCase): + name = "matmul" + + def test_row_when_valid_data(self): + result = [None, self.name, 8.0, 40.0, 2, 4.0, 20.0, 1, -20.0, 0.5] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", + return_value=[MockMemory(10240, 2000), MockMemory(10240, 2000)]): + bean = MemoryStatisticBean(self.name, [1, 1], [1]) + self.assertEqual(bean.row, result) + + def test_row_when_invalid_base_data(self): + result = [None, self.name, 0, 0, 0, 4.0, 20.0, 1, 20.0, float("inf")] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", + return_value=[MockMemory(10240, 2000), MockMemory(10240, 2000)]): + bean = MemoryStatisticBean(self.name, [], [1]) + self.assertEqual(bean.row, result) + + def test_row_when_invalid_comparison_data(self): + result = [None, self.name, 8.0, 40.0, 2, 0, 0, 0, -40.0, 0] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", + return_value=[MockMemory(10240, 2000), MockMemory(10240, 2000)]): + bean = MemoryStatisticBean(self.name, [1, 1], []) + self.assertEqual(bean.row, result) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_operator_compare_bean.py b/profiler/test/ut/compare_tools/compare_bean/test_operator_compare_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..b87c22cbff4c079c5821dae098a900656ef76c82 --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_operator_compare_bean.py @@ -0,0 +1,39 @@ +import unittest +from unittest.mock import patch + +from compare_bean.operator_compare_bean import OperatorCompareBean + + +class MockNode: + def __init__(self, name): + self.name = name + self.input_shape = None + self.input_type = None + + +class MockKernel: + def __init__(self, device_dur): + self.device_dur = device_dur + self.kernel_details = "add" + + +class TestOperatorCompareBean(unittest.TestCase): + name = 'aten::add' + + def test_row_when_valid_data(self): + result = [2, self.name, None, None, 'add', 8, self.name, None, None, 'add', 8, 0, 1.0] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", return_value=[MockKernel(8)]): + op = OperatorCompareBean(1, MockNode(self.name), MockNode(self.name)) + self.assertEqual(op.row, result) + + def test_row_when_invalid_base_data(self): + result = [2, None, None, None, "", 0, self.name, None, None, 'add', 8, 8, float("inf")] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", return_value=[MockKernel(8)]): + op = OperatorCompareBean(1, None, MockNode(self.name)) + self.assertEqual(op.row, result) + + def test_row_when_invalid_comparison_data(self): + result = [2, self.name, None, None, 'add', 8, None, None, None, '', 0, -8, 0] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", return_value=[MockKernel(8)]): + op = OperatorCompareBean(1, MockNode(self.name), None) + self.assertEqual(op.row, result) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_operator_statistic_bean.py b/profiler/test/ut/compare_tools/compare_bean/test_operator_statistic_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..0f71b6399ff613df2e862dc60757911bcc14ef7e --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_operator_statistic_bean.py @@ -0,0 +1,34 @@ +import unittest +from unittest.mock import patch + +from compare_bean.operator_statistic_bean import OperatorStatisticBean + + +class MockKernel: + def __init__(self, device_dur): + self.device_dur = device_dur + + +class TestOperatorStatisticBean(unittest.TestCase): + name = "matmul" + + def test_row_when_valid_data(self): + result = [None, self.name, 8.0, 2, 4.0, 1, -4.0, 0.5] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", + return_value=[MockKernel(2000), MockKernel(2000)]): + bean = OperatorStatisticBean(self.name, [1, 1], [1]) + self.assertEqual(bean.row, result) + + def test_row_when_invalid_base_data(self): + result = [None, self.name, 0, 0, 4.0, 1, 4.0, float("inf")] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", + return_value=[MockKernel(2000), MockKernel(2000)]): + bean = OperatorStatisticBean(self.name, [], [1]) + self.assertEqual(bean.row, result) + + def test_row_when_invalid_comparison_data(self): + result = [None, self.name, 8.0, 2, 0, 0, -8.0, 0] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", + return_value=[MockKernel(2000), MockKernel(2000)]): + bean = OperatorStatisticBean(self.name, [1, 1], []) + self.assertEqual(bean.row, result) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_profiling_info.py b/profiler/test/ut/compare_tools/compare_bean/test_profiling_info.py new file mode 100644 index 0000000000000000000000000000000000000000..49978cc9625bc7d2629e9e8647458107ce66dbe2 --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_profiling_info.py @@ -0,0 +1,116 @@ +import unittest + +from compare_bean.profiling_info import ProfilingInfo + + +class TestProfilingInfo(unittest.TestCase): + def test_calculate_other_time(self): + info = ProfilingInfo("NPU") + info.compute_time = 10 + info.cube_time = 1 + info.fa_time_fwd = 2 + info.fa_time_bwd = 2 + info.vec_time = 3 + info.calculate_other_time() + self.assertEqual(info.other_time, 2) + info.vec_time = 7 + info.calculate_other_time() + self.assertEqual(info.other_time, 0) + + def test_calculate_vec_time(self): + info = ProfilingInfo("NPU") + info.compute_time = 10 + info.cube_time = 1 + info.fa_time_fwd = 2 + info.fa_time_bwd = 2 + info.calculate_vec_time() + self.assertEqual(info.vec_time, 5) + + def test_calculate_schedule_time(self): + info = ProfilingInfo("NPU") + info.e2e_time = 10 + info.compute_time = 5 + info.communication_not_overlapped = 3 + info.calculate_schedule_time() + self.assertEqual(info.scheduling_time, 2) + + def test_update_fa_fwd_info(self): + info = ProfilingInfo("NPU") + info.update_fa_fwd_info(5) + info.update_fa_fwd_info(5) + self.assertEqual(info.fa_time_fwd, 10) + self.assertEqual(info.fa_num_fwd, 2) + + def test_update_fa_bwd_info(self): + info = ProfilingInfo("NPU") + info.update_fa_bwd_info(5) + info.update_fa_bwd_info(5) + self.assertEqual(info.fa_time_bwd, 10) + self.assertEqual(info.fa_num_bwd, 2) + + def test_update_sdma_info(self): + info = ProfilingInfo("NPU") + info.update_sdma_info(5) + self.assertEqual(info.sdma_time, 5) + self.assertEqual(info.sdma_num, 1) + info.update_sdma_info(5, 5) + self.assertEqual(info.sdma_time, 10) + self.assertEqual(info.sdma_num, 6) + + def test_update_cube_info(self): + info = ProfilingInfo("NPU") + info.update_cube_info(5) + info.update_cube_info(5) + self.assertEqual(info.cube_time, 10) + self.assertEqual(info.cube_num, 2) + + def test_update_vec_info(self): + info = ProfilingInfo("NPU") + info.update_vec_info(5) + info.update_vec_info(5) + self.assertEqual(info.vec_time, 10) + self.assertEqual(info.vec_num, 2) + + def test_set_compute_time(self): + info = ProfilingInfo("NPU") + info.update_compute_time(1) + info.set_compute_time(5) + self.assertEqual(info.compute_time, 5) + + def test_update_compute_time(self): + info = ProfilingInfo("NPU") + info.update_compute_time(5) + info.update_compute_time(5) + self.assertEqual(info.compute_time, 10) + + def test_set_e2e_time(self): + info = ProfilingInfo("NPU") + info.set_e2e_time(5) + self.assertEqual(info.e2e_time, 5) + + def test_set_comm_not_overlap(self): + info = ProfilingInfo("NPU") + info.update_comm_not_overlap(10) + info.set_comm_not_overlap(5) + self.assertEqual(info.communication_not_overlapped, 5) + + def test_update_comm_not_overlap(self): + info = ProfilingInfo("NPU") + info.update_comm_not_overlap(5) + info.update_comm_not_overlap(5) + self.assertEqual(info.communication_not_overlapped, 10) + + def test_set_memory_used(self): + info = ProfilingInfo("NPU") + info.set_memory_used(10) + self.assertEqual(info.memory_used, 10) + + def test_is_not_minimal_profiling(self): + info = ProfilingInfo("GPU") + info.minimal_profiling = False + self.assertFalse(info.is_not_minimal_profiling()) + info = ProfilingInfo("NPU") + info.minimal_profiling = True + self.assertFalse(info.is_not_minimal_profiling()) + info.minimal_profiling = False + self.assertTrue(info.is_not_minimal_profiling()) diff --git a/profiler/test/ut/compare_tools/profiling_parser/__init__.py b/profiler/test/ut/compare_tools/profiling_parser/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..9445296c9850191a0ab9b64c73990b7d57b2c978 --- /dev/null +++ b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py @@ -0,0 +1,190 @@ +import unittest +from unittest.mock import patch + +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean +from profiling_parser.base_profiling_parser import BaseProfilingParser, ProfilingResult + + +class ProfilingParser(BaseProfilingParser): + def __init__(self): + super().__init__({}, {}) + + def init(self, flow_dict, all_kernels): + self._profiling_type = "GPU" + self._trace_events = [] + self._profiling_path = "" + self._json_path = "" + self._result_data = ProfilingResult("GPU") + self._flow_dict = flow_dict + self._all_kernels = all_kernels + self._comm_list = [] + self._comm_task_list = [] + self._dispatch_func = [] + self._enable_profiling_compare = True + self._enable_operator_compare = True + self._enable_memory_compare = True + self._enable_communication_compare = True + + def _update_memory_list(self): + pass + + def _update_overall_metrics(self): + pass + + def _picking_communication_event(self): + pass + + def _is_kernel_event(self, event): + return True + + def _is_flow_event(self, event): + return True + + def _is_torch_op_event(self, event): + return True + + def _get_dispatch_func(self): + pass + + +class MockEvent: + def __init__(self, pid, tid, ts, ph="M"): + self.pid = pid + self.tid = tid + self.ts = ts + self.ph = ph + self.id = 1 + self.event = None + + @property + def name(self): + return "wait" + + @property + def dur(self): + return 7 + + @property + def start_time(self): + return self.ts + + def is_flow_start(self): + return self.ph == "s" + + def is_flow_end(self): + return self.ph == "f" + + def is_nccl_name(self): + return False + + +class TestBaseProfilingParser(unittest.TestCase): + flow_dict = {1: {"start": MockEvent(1, 2, 12), "end": MockEvent(2, 3, 21)}, + 2: {"start": MockEvent(1, 2, 12), "end": MockEvent(2, 3, 22)}, + 3: {}} + all_kernels = {"2-3-23": MockEvent(2, 3, 23), "2-3-21": MockEvent(2, 3, 21), "2-3-22": MockEvent(2, 3, 22)} + comm_events = [{"ph": "X", "name": "hccl_allreduce", "pid": 7, "tid": 3, "ts": 1, "dur": 2}] + task_events = [{"ph": "X", "name": "notify_wait", "pid": 7, "tid": 1, "ts": 2, "dur": 1}, + {"ph": "X", "name": "notify_wait", "pid": 7, "tid": 1, "ts": 5, "dur": 1}] + + def test_picking_torch_op_event(self): + event = MockEvent(1, 2, 3) + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"): + parser = ProfilingParser() + parser.init({}, {}) + self.assertTrue(parser._picking_torch_op_event(event)) + + def test_picking_kernel_event(self): + event = MockEvent(1, 2, 3) + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"): + parser = ProfilingParser() + parser.init({}, {}) + self.assertTrue(parser._picking_kernel_event(event)) + + def test_picking_flow_event(self): + events = [MockEvent(1, 2, 3, "s"), MockEvent(1, 2, 3, "f")] + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"): + parser = ProfilingParser() + parser.init({}, {}) + for event in events: + self.assertTrue(parser._picking_flow_event(event)) + + def test_update_kernel_dict_when_valid_input(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"): + parser = ProfilingParser() + parser.init(self.flow_dict, self.all_kernels) + parser._update_kernel_dict() + self.assertEqual(len(parser._result_data.kernel_dict.get(12)), 2) + + def test_update_kernel_dict_when_without_kernels_return_null(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"): + parser = ProfilingParser() + parser.init(self.flow_dict, {}) + parser._update_kernel_dict() + self.assertEqual(len(parser._result_data.kernel_dict), 0) + + def test_update_kernel_dict_when_without_flow_return_null(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"): + parser = ProfilingParser() + parser.init({}, self.all_kernels) + parser._update_kernel_dict() + self.assertEqual(len(parser._result_data.kernel_dict), 0) + + def test_check_result_data(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"): + parser = ProfilingParser() + parser.init(self.flow_dict, self.all_kernels) + parser._check_result_data() + + def test_load_data_when_valid_input(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"): + parser = ProfilingParser() + parser.init(self.flow_dict, self.all_kernels) + result_data = parser.load_data() + self.assertEqual(len(result_data.kernel_dict.get(12)), 2) + + def test_read_trace_event_when_invalid_json_path(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"): + parser = ProfilingParser() + parser.init({}, {}) + parser._read_trace_event() + self.assertEqual(parser._trace_events, []) + + def test_update_communication_dict(self): + result = {'allreduce': {'comm_list': [2.0], 'comm_task': {'notify_wait': [1.0]}}} + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"): + parser = ProfilingParser() + parser.init({}, {}) + parser._comm_task_list = [TraceEventBean(event) for event in self.task_events] + parser._comm_list = [TraceEventBean(event) for event in self.comm_events] + parser._profiling_type = "NPU" + parser._result_data = ProfilingResult("NPU") + parser._update_communication_dict() + self.assertEqual(parser._result_data.communication_dict, result) + + +class TestProfilingResult(unittest.TestCase): + def test_update_torch_op_data_when_valid_input(self): + res = ProfilingResult("GPU") + res.update_torch_op_data(MockEvent(1, 2, 3)) + self.assertEqual(len(res.torch_op_data), 1) + + def test_update_kernel_dict_when_valid_input(self): + res = ProfilingResult("GPU") + res.update_kernel_dict(2, MockEvent(1, 2, 3)) + self.assertEqual(len(res.kernel_dict.get(2)), 1) + + def test_update_memory_list_when_valid_input(self): + res = ProfilingResult("GPU") + res.update_memory_list({}) + self.assertEqual(len(res.memory_list), 1) + + def test_update_communication_dict_when_valid_input(self): + res = ProfilingResult("GPU") + res.update_communication_dict("reduce", 9) + self.assertEqual(sum(res.communication_dict.get("reduce", {}).get("comm_list")), 9) + + def test_update_comm_task_data_when_valid_input(self): + res = ProfilingResult("GPU") + res.update_comm_task_data("reduce", MockEvent(1, 1, 1)) + self.assertEqual(sum(res.communication_dict.get("reduce", {}).get("comm_task", {}).get("wait")), 7) diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..7c76d4734455d0c7a081bd2df5f3861b9fffc9f9 --- /dev/null +++ b/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py @@ -0,0 +1,129 @@ +import unittest +from collections import defaultdict +from unittest.mock import patch +from decimal import Decimal + +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean +from profiling_parser.base_profiling_parser import ProfilingResult +from profiling_parser.gpu_profiling_parser import GPUProfilingParser + + +class TestGpuProfilingParser(unittest.TestCase): + memory_events = [{"ph": "i", "name": "[memory]", "pid": 1, "tid": 1, "ts": 0, + "args": {"Addr": 3, "Bytes": 512, "Total Allocated": 1024}}, + {"ph": "i", "name": "[memory]", "pid": 1, "tid": 1, "ts": 1, + "args": {"Addr": 1, "Bytes": 512, "Total Allocated": 1024}}, + {"ph": "i", "name": "[memory]", "pid": 1, "tid": 1, "ts": 2, + "args": {"Addr": 1, "Bytes": -512, "Total Allocated": 1024}}, + {"ph": "i", "name": "[memory]", "pid": 1, "tid": 1, "ts": 3, + "args": {"Addr": 1, "Bytes": -512, "Total Allocated": 1024}}, + {"ph": "i", "name": "[memory]", "pid": 1, "tid": 1, "ts": 4, + "args": {"Addr": 2, "Bytes": 512, "Total Allocated": 1024}}, + {"ph": "i", "name": "[memory]", "pid": 1, "tid": 1, "ts": 5, + "args": {"Addr": 2, "Bytes": -512, "Total Allocated": 1024}}] + trace_events = [ + {"ph": "X", "name": "test1", "pid": 1, "tid": 1, "ts": 100, "dur": 1, "cat": "kernel"}, + {"ph": "X", "name": "test2", "pid": 1, "tid": 1, "ts": 97, "dur": 1, "args": {"stream": 3}}, + {"ph": "X", "name": "1htod1", "pid": 1, "tid": 1, "ts": 0, "dur": 1, "cat": "kernel", "args": {"stream": 3}}, + {"ph": "X", "name": "1dtod1", "pid": 1, "tid": 1, "ts": 1, "dur": 1, "cat": "kernel", "args": {"stream": 3}}, + {"ph": "X", "name": "1dtoh1", "pid": 1, "tid": 1, "ts": 2, "dur": 1, "cat": "kernel", "args": {"stream": 3}}, + {"ph": "X", "name": "1memset (device)1", "pid": 1, "tid": 1, "ts": 3, "dur": 1, "cat": "kernel", + "args": {"stream": 3}}, + {"ph": "X", "name": "ncclkernel1", "pid": 1, "tid": 1, "ts": 4, "dur": 1, "cat": "kernel", + "args": {"stream": 3}}, + {"ph": "X", "name": "ncclkernel2", "pid": 1, "tid": 1, "ts": 5, "dur": 1, "cat": "kernel", + "args": {"stream": 3}}, + {"ph": "X", "name": "gemm", "pid": 1, "tid": 1, "ts": 6, "dur": 1, "cat": "kernel", "args": {"stream": 3}}, + {"ph": "X", "name": "fmha_kernel_bwd", "pid": 1, "tid": 1, "ts": 7, "dur": 1, "cat": "kernel", + "args": {"stream": 3}}, + {"ph": "X", "name": "fmha_kernel_fwd", "pid": 1, "tid": 1, "ts": 8, "dur": 1, "cat": "kernel", + "args": {"stream": 3}}, + {"ph": "X", "name": "flash_kernel_bwd", "pid": 1, "tid": 1, "ts": 9, "dur": 1, "cat": "kernel", + "args": {"stream": 3}}, + {"ph": "X", "name": "flash_kernel_fwd", "pid": 1, "tid": 1, "ts": 10, "dur": 1, "cat": "kernel", + "args": {"stream": 3}}, + {"ph": "X", "name": "other", "pid": 1, "tid": 1, "ts": 11, "dur": 1, "cat": "kernel", "args": {"stream": 3}}, + ] + memory_event = {"ph": "i", "name": "[memory]", "pid": 1, "tid": 1, "ts": 0, + "args": {"Addr": 3, "Bytes": 512, "Total Allocated": 1024, 'Device Id': 1}} + nccl_event = {"ph": "X", "name": "nccl_reduce", "pid": 1, "tid": 1, "ts": 4, "dur": 1, "cat": "kernel", + "args": {"stream": 3}} + cube_event = {"ph": "X", "name": "gemm", "pid": 1, "tid": 1, "ts": 6, "dur": 1, "cat": "kernel", + "args": {"stream": 3}} + other_event = {"ph": "X", "name": "other", "pid": 1, "tid": 1, "ts": 6, "dur": 1} + + def test_update_memory_list_when_valid_input(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.gpu_profiling_parser.GPUProfilingParser.__init__", return_value=None): + res = GPUProfilingParser({}, {}) + res._enable_memory_compare = True + res._memory_events = [TraceEventBean(event) for event in self.memory_events] + res._result_data = ProfilingResult("GPU") + res._update_memory_list() + self.assertEqual(len(res._result_data.memory_list), 3) + self.assertEqual(res._result_data.memory_list[0].memory_details, ", (1, 2), [duration: 1.0], [size: 0.5]\n") + + def test_calculate_performance_time_when_valid_input(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.gpu_profiling_parser.GPUProfilingParser.__init__", return_value=None): + res = GPUProfilingParser({}, {}) + res._trace_events = [TraceEventBean(event) for event in self.trace_events] + res._result_data = ProfilingResult("GPU") + res._compute_stream_id = 3 + res._marks = defaultdict(int) + res._calculate_performance_time() + self.assertEqual(res._result_data.overall_metrics.e2e_time, 98) + self.assertEqual(res._result_data.overall_metrics.sdma_time, 4) + self.assertEqual(res._result_data.overall_metrics.sdma_num, 4) + self.assertEqual(res._result_data.overall_metrics.cube_time, 1) + self.assertEqual(res._result_data.overall_metrics.cube_num, 1) + self.assertEqual(res._result_data.overall_metrics.fa_time_fwd, 2) + self.assertEqual(res._result_data.overall_metrics.fa_num_fwd, 2) + self.assertEqual(res._result_data.overall_metrics.fa_time_bwd, 2) + self.assertEqual(res._result_data.overall_metrics.fa_num_bwd, 2) + self.assertEqual(res._result_data.overall_metrics.vec_time, 2) + self.assertEqual(res._result_data.overall_metrics.vec_num, 2) # cun yi + self.assertEqual(res._result_data.overall_metrics.communication_not_overlapped, 2) + self.assertEqual(res._result_data.overall_metrics.compute_time, 7) + + def test_picking_memory_event_when_valid_input(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.gpu_profiling_parser.GPUProfilingParser.__init__", return_value=None): + res = GPUProfilingParser({}, {}) + res._memory_events = [] + result = res._picking_memory_event(TraceEventBean(self.memory_event)) + self.assertTrue(result) + result = res._picking_memory_event(TraceEventBean(self.nccl_event)) + self.assertFalse(result) + + def test_is_torch_op_event_when_valid_input(self): + event_list = [{"cat": "cpu_op"}, {"cat": "user_annotation"}, {"cat": "cuda_runtime"}, {"cat": "operator"}] + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.gpu_profiling_parser.GPUProfilingParser.__init__", return_value=None): + res = GPUProfilingParser({}, {}) + for event in event_list: + result = res._is_torch_op_event(TraceEventBean(event)) + self.assertTrue(result) + result = res._is_torch_op_event(TraceEventBean({"cat": "python_function"})) + self.assertFalse(result) + + def test_is_kernel_event_when_valid_input(self): + event_list1 = [{"cat": "kernel", "name": "matmul"}, {"cat": "kernel", "name": "nccl_reduce"}] + event_list2 = [{"cat": "async", "name": "nccl_reduce"}, {"cat": "cpu_op", "name": "aten::to"}] + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.gpu_profiling_parser.GPUProfilingParser.__init__", return_value=None): + res = GPUProfilingParser({}, {}) + for event in event_list1: + result = res._is_kernel_event(TraceEventBean(event)) + self.assertTrue(result) + for event in event_list2: + result = res._is_kernel_event(TraceEventBean(event)) + self.assertFalse(result) + + def test_is_flow_event_when_valid_input(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.gpu_profiling_parser.GPUProfilingParser.__init__", return_value=None): + res = GPUProfilingParser({}, {}) + res._flow_cat = ("async_gpu",) + result = res._is_flow_event(TraceEventBean({"cat": "async_gpu"})) + self.assertTrue(result) diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_npu_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_npu_profiling_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..feaf3289adfb50a674c8aaf6634622f2a27d9167 --- /dev/null +++ b/profiler/test/ut/compare_tools/profiling_parser/test_npu_profiling_parser.py @@ -0,0 +1,125 @@ +import unittest +from unittest.mock import patch +from decimal import Decimal + +from compare_bean.origin_data_bean.operator_memory_bean import OperatorMemoryBean +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean +from profiling_parser.base_profiling_parser import ProfilingResult +from profiling_parser.npu_profiling_parser import NPUProfilingParser + + +class TestNPUProfilingParser(unittest.TestCase): + comm_events = [{"ph": "X", "name": "hccl_allreduce", "pid": 7, "tid": 3, "ts": 1, "dur": 2}] + task_events = [{"ph": "X", "name": "notify_wait", "pid": 7, "tid": 1, "ts": 2, "dur": 1}, + {"ph": "X", "name": "notify_wait", "pid": 7, "tid": 1, "ts": 5, "dur": 1}] + dequeue_events = [{"ph": "X", "name": "test1", "pid": 1, "tid": 1, "ts": 1, "dur": 5, "cat": "dequeue"}] + enqueue_events = [{"ph": "X", "name": "test1", "pid": 1, "tid": 1, "ts": 1, "dur": 5, "cat": "enqueue"}] + overlap_events = [{"ph": "X", "name": "computing", "pid": 9, "tid": 3, "ts": 1, "dur": 2}] + meta_events = [{"ph": "M", "name": "process_name", "pid": 7, "tid": 3, "args": {"name": "HCCL"}}, + {"ph": "M", "name": "process_name", "pid": 9, "tid": 3, "args": {"name": "Overlap Analysis"}}, + {"ph": "M", "name": "process_name", "pid": 5, "tid": 3, "args": {"name": "Ascend Hardware"}}, + {"ph": "M", "name": "thread_name", "pid": 7, "tid": 3, "args": {"name": "Communication"}}] + + def test_update_memory_list_when_invalid_path(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.npu_profiling_parser.NPUProfilingParser.__init__", return_value=None): + res = NPUProfilingParser({}, {}) + res._operator_memory_path = "" + res._update_memory_list() + + def test_update_memory_list_when_valid_data(self): + memory_data = [ + OperatorMemoryBean({"Name": "aten::add", "Size(KB)": 512, "Allocation Time(us)": 1, "Release Time(us)": 3}), + OperatorMemoryBean({"Name": "aten::add", "Size(KB)": 512, "Allocation Time(us)": 0, "Release Time(us)": 3}), + OperatorMemoryBean({"Name": "cann::add", "Size(KB)": 512, "Allocation Time(us)": 2, "Release Time(us)": 4}), + OperatorMemoryBean( + {"Name": "aten::add", "Size(KB)": 512, "Allocation Time(us)": 7, "Release Time(us)": 10})] + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.npu_profiling_parser.NPUProfilingParser.__init__", return_value=None), \ + patch("utils.file_reader.FileReader.read_csv_file", return_value=memory_data): + res = NPUProfilingParser({}, {}) + res._operator_memory_path = "" + res._enqueue_dict = {} + res._dequeue_data = [TraceEventBean(event) for event in self.dequeue_events] + res._result_data = ProfilingResult("NPU") + res._update_memory_list() + self.assertEqual(len(res._result_data.memory_list), 3) + self.assertEqual(res._result_data.memory_list[0].duration, 2) + + def test_picking_hccl_event(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.npu_profiling_parser.NPUProfilingParser.__init__", return_value=None): + res = NPUProfilingParser({}, {}) + res._hccl_pid = 7 + res._hccl_op_tid_list = [3, 4] + res._comm_list = [] + res._comm_task_list = [] + res._result_data = ProfilingResult("NPU") + for event in self.comm_events + self.task_events + self.dequeue_events: + res._picking_hccl_event(TraceEventBean(event)) + self.assertEqual(len(res._comm_task_list), 2) + self.assertEqual(len(res._comm_list), 1) + + def test_picking_task_queue_data(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.npu_profiling_parser.NPUProfilingParser.__init__", return_value=None): + res = NPUProfilingParser({}, {}) + res._enqueue_dict = {} + res._dequeue_data = [] + for event in self.enqueue_events + self.dequeue_events: + result = res._picking_task_queue_data(TraceEventBean(event)) + self.assertTrue(result) + for event in self.task_events: + result = res._picking_task_queue_data(TraceEventBean(event)) + self.assertFalse(result) + self.assertEqual(len(res._enqueue_dict), 1) + self.assertEqual(len(res._dequeue_data), 1) + + def test_picking_overlap_analysis_data(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.npu_profiling_parser.NPUProfilingParser.__init__", return_value=None): + res = NPUProfilingParser({}, {}) + res._overlap_analysis = [] + res._overlap_pid = 9 + for event in self.overlap_events: + result = res._picking_overlap_analysis_data(TraceEventBean(event)) + self.assertTrue(result) + for event in self.meta_events + self.dequeue_events: + result = res._picking_overlap_analysis_data(TraceEventBean(event)) + self.assertFalse(result) + + def test_is_kernel_event(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.npu_profiling_parser.NPUProfilingParser.__init__", return_value=None): + res = NPUProfilingParser({}, {}) + res._kernel_pid = 5 + self.assertTrue(res._is_kernel_event(TraceEventBean({"pid": 5, "ph": "X"}))) + self.assertFalse(res._is_kernel_event(TraceEventBean({"pid": 5, "ph": "M"}))) + self.assertFalse(res._is_kernel_event(TraceEventBean({"pid": 1, "ph": "x"}))) + + def test_is_flow_event(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.npu_profiling_parser.NPUProfilingParser.__init__", return_value=None): + res = NPUProfilingParser({}, {}) + self.assertTrue(res._is_flow_event(TraceEventBean({"cat": "async_npu"}))) + self.assertFalse(res._is_flow_event(TraceEventBean({"cat": "async"}))) + + def test_is_torch_op_event(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.npu_profiling_parser.NPUProfilingParser.__init__", return_value=None): + res = NPUProfilingParser({}, {}) + self.assertTrue(res._is_torch_op_event(TraceEventBean({"cat": "cpu_op"}))) + self.assertFalse(res._is_torch_op_event(TraceEventBean({"cat": "async"}))) + + def test_filter_meta_id(self): + with patch("profiling_parser.base_profiling_parser.BaseProfilingParser.__init__"), \ + patch("profiling_parser.npu_profiling_parser.NPUProfilingParser.__init__", return_value=None): + res = NPUProfilingParser({}, {}) + res._trace_events = [TraceEventBean(event) for event in self.meta_events] + res._hccl_op_tid_list = [] + res._enable_communication_compare = True + res._filter_meta_id() + self.assertEqual(res._hccl_pid, 7) + self.assertEqual(res._kernel_pid, 5) + self.assertEqual(res._overlap_pid, 9) + self.assertEqual(res._hccl_op_tid_list, [3]) diff --git a/profiler/test/ut/compare_tools/utils/test_file_reader.py b/profiler/test/ut/compare_tools/utils/test_file_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..797ab7407eb1c3546e2c6cb42cf5ad5ffa4b8dca --- /dev/null +++ b/profiler/test/ut/compare_tools/utils/test_file_reader.py @@ -0,0 +1,20 @@ +import unittest +from unittest.mock import patch + +from utils.file_reader import FileReader +from utils.constant import Constant + + +class TestFileReader(unittest.TestCase): + + def test_read_trace_file(self): + json_data = FileReader.read_trace_file("resource/event_list.json") + self.assertEqual(len(json_data), 2) + + def test_read_csv_file(self): + csv = FileReader.read_csv_file("resource/test.csv") + self.assertEqual(len(csv), 8) + + def test_check_json_type(self): + t = FileReader.check_json_type("resource/event_list.json") + self.assertEqual(t, Constant.NPU) diff --git a/profiler/test/ut/compare_tools/utils/test_name_function.py b/profiler/test/ut/compare_tools/utils/test_name_function.py new file mode 100644 index 0000000000000000000000000000000000000000..27913035da28497774f1a6d0a14fea60b5394de1 --- /dev/null +++ b/profiler/test/ut/compare_tools/utils/test_name_function.py @@ -0,0 +1,42 @@ +import unittest + +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean +from utils.name_function import NameFunction +from utils.torch_op_node import TorchOpNode + + +class Args: + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + +args = {"op_name_map": {}, "use_input_shape": True} +args = Args(**args) +func = NameFunction(args) + + +class TestNameFunction(unittest.TestCase): + node = None + + @classmethod + def setUpClass(cls) -> None: + super().setUpClass() + cls.node = TorchOpNode(event=TraceEventBean( + {"pid": 0, "tid": 0, "args": {"Input Dims": [[1, 1], [1, 1]], "name": 0}, "ts": 0, "dur": 1, "ph": "M", + "name": "process_name"})) + + def test_get_name(self): + self.assertEqual(NameFunction.get_name(self.node), "process_name") + + def test_get_full_name(self): + self.assertEqual(NameFunction.get_full_name(self.node), "process_name1,1;\r\n1,1") + + def test_get_name_function(self): + self.assertEqual(func.get_name_func(), func.get_full_map_name) + + def test_get_map_name(self): + self.assertEqual(func.get_map_name(self.node), "process_name") + + def test_get_full_map_name(self): + self.assertEqual(func.get_full_map_name(self.node), "process_name1,1;\r\n1,1") diff --git a/profiler/test/ut/compare_tools/utils/test_tree_builder.py b/profiler/test/ut/compare_tools/utils/test_tree_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..cb5cae2f9a2a0ec74c7a6543eb4c046e238c69d5 --- /dev/null +++ b/profiler/test/ut/compare_tools/utils/test_tree_builder.py @@ -0,0 +1,30 @@ +import unittest +import json + +from compare_bean.origin_data_bean.compare_event import MemoryEvent +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean +from utils.torch_op_node import TorchOpNode +from utils.tree_builder import TreeBuilder + + +class TestUtils(unittest.TestCase): + + def test_build_tree(self): + flow_kernel_dict = {0: [0, 1], 1: [0, 1]} + memory_allocated_list = [ + MemoryEvent({"ts": 0, "Allocation Time(us)": 1, "Release Time(us)": 3, "Name": "test", "Size(KB)": 1})] + event_list = [TraceEventBean({"pid": 0, "tid": 0, "args": {"Input Dims": [[1, 1], [1, 1]], "name": 0}, + "ts": 0, "dur": 1, "ph": "M", "name": "process_name"}), + TraceEventBean({"pid": 1, "tid": 1, "args": {"Input Dims": [[1, 1], [1, 1]], "name": 1}, + "ts": 3, "dur": 1, "ph": "M", "name": "process_name"})] + for event in event_list: + event.is_torch_op = True + tree = TreeBuilder.build_tree(event_list, flow_kernel_dict, memory_allocated_list) + child_nodes = tree.child_nodes + self.assertEqual(len(tree._child_nodes), 2) + self.assertEqual(child_nodes[0].start_time, 0) + self.assertEqual(child_nodes[0].end_time, 1) + self.assertEqual(child_nodes[0].kernel_num, 2) + self.assertEqual(child_nodes[1].kernel_num, 0) + self.assertEqual(len(TreeBuilder.get_total_kernels(tree)), 2) + self.assertEqual(TreeBuilder.get_total_memory(tree)[0].size, 1) diff --git a/profiler/test/ut/compare_tools/view/__init__.py b/profiler/test/ut/compare_tools/view/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/test/ut/compare_tools/view/test_excel_view.py b/profiler/test/ut/compare_tools/view/test_excel_view.py new file mode 100644 index 0000000000000000000000000000000000000000..dfea18e3c412f3a345231c401fcfbee6d3289481 --- /dev/null +++ b/profiler/test/ut/compare_tools/view/test_excel_view.py @@ -0,0 +1,18 @@ +import os +import unittest +from unittest.mock import patch + +from view.excel_view import ExcelView + + +class TestExcelView(unittest.TestCase): + file_path = "./test.xlsx" + + def tearDown(self) -> None: + if not os.path.exists(self.file_path): + raise RuntimeError("ut failed.") + os.remove(self.file_path) + + def test_generate_view(self): + with patch("view.work_sheet_creator.WorkSheetCreator.create_sheet"): + ExcelView({"table1": {}, "table2": {}}, self.file_path, {}).generate_view() diff --git a/profiler/test/ut/compare_tools/view/test_screen_view.py b/profiler/test/ut/compare_tools/view/test_screen_view.py new file mode 100644 index 0000000000000000000000000000000000000000..6828b20f0100d0e8363309588550aedc791bae83 --- /dev/null +++ b/profiler/test/ut/compare_tools/view/test_screen_view.py @@ -0,0 +1,9 @@ +import unittest + +from view.screen_view import ScreenView + + +class TestScreenView(unittest.TestCase): + def test_generate_view(self): + data = {"table": {"headers": ["index", "value"], "rows": [[1, 1], [2, 2]]}} + ScreenView(data).generate_view() diff --git a/profiler/test/ut/compare_tools/view/test_worker_sheet_creator.py b/profiler/test/ut/compare_tools/view/test_worker_sheet_creator.py new file mode 100644 index 0000000000000000000000000000000000000000..ba59e8d1c0279f98845c244d5b194b0bfc38a6d0 --- /dev/null +++ b/profiler/test/ut/compare_tools/view/test_worker_sheet_creator.py @@ -0,0 +1,45 @@ +import os +import unittest + +import pandas as pd +from xlsxwriter import Workbook + +from utils.excel_config import ExcelConfig +from view.work_sheet_creator import WorkSheetCreator + + +class TestWorkerSheetCreator(unittest.TestCase): + file_path = "./test.xlsx" + table_name = "OperatorCompareStatistic" + + def tearDown(self) -> None: + if not os.path.exists(self.file_path): + raise RuntimeError("ut failed.") + os.remove(self.file_path) + + def test_create_sheet_when_valid_data(self): + class Args: + def __init__(self, base, comparison): + self.base_profiling_path = base + self.comparison_profiling_path = comparison + + data = {"headers": ExcelConfig.HEADERS.get(self.table_name), + "overhead": ExcelConfig.OVERHEAD.get(self.table_name), + "rows": [[1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, float("inf")], [1, 2, 3, 4, 5, 6, 7, 0.45], + [1, 2, 3, 4, 5, 6, 7, 0]]} + creator = WorkSheetCreator(Workbook(self.file_path), self.table_name, data, Args("base", "comparison")) + creator.create_sheet() + creator._work_book.close() + data = pd.read_excel(self.file_path) + self.assertEqual(data.shape[0], 6) + self.assertEqual(data.shape[1], 8) + + def test_create_sheet_when_invalid_data(self): + data = {"headers": ExcelConfig.HEADERS.get(self.table_name), + "overhead": ExcelConfig.OVERHEAD.get(self.table_name), + "rows": []} + creator = WorkSheetCreator(Workbook(self.file_path), self.table_name, data, {}) + creator.create_sheet() + creator._work_book.close() + data = pd.read_excel(self.file_path) + self.assertEqual(data.shape[0], 0)