From f005f10309f31ab4331dc8a9e7499416171c4e74 Mon Sep 17 00:00:00 2001 From: sunboquan Date: Thu, 10 Aug 2023 09:58:35 +0800 Subject: [PATCH] modify huawei company name in license --- profiler/cluster_analyse/__init__.py | 2 +- profiler/cluster_analyse/analysis/__init__.py | 14 ++ .../analysis/analysis_facade.py | 11 +- profiler/cluster_analyse/cluster_analysis.py | 2 +- .../cluster_data_preprocess/__init__.py | 2 +- .../data_preprocessor.py | 2 +- .../pytorch_data_preprocessor.py | 2 +- .../cluster_op_summary_analysis.py | 2 +- .../cluster_analyse/common_func/__init__.py | 14 ++ .../cluster_analyse/common_func/constant.py | 28 ++++ .../common_func/file_manager.py | 140 ++++++++++++++++++ .../communication_group/__init__.py | 14 ++ .../communication_group_generator.py | 22 ++- profiler/compare_tools/__init__.py | 2 +- profiler/compare_tools/torch_op_compare.py | 2 +- .../distribute_modify_hostname.bash | 2 +- profiler/performance_analyse/__init__.py | 2 +- profiler/performance_analyse/gpu_parser.py | 2 +- profiler/performance_analyse/npu_parser.py | 2 +- profiler/performance_analyse/parser_helper.py | 2 +- .../performance_analyse/profiling_parse.py | 2 +- 21 files changed, 252 insertions(+), 19 deletions(-) create mode 100644 profiler/cluster_analyse/analysis/__init__.py create mode 100644 profiler/cluster_analyse/common_func/__init__.py create mode 100644 profiler/cluster_analyse/common_func/constant.py create mode 100644 profiler/cluster_analyse/common_func/file_manager.py create mode 100644 profiler/cluster_analyse/communication_group/__init__.py diff --git a/profiler/cluster_analyse/__init__.py b/profiler/cluster_analyse/__init__.py index 1af7850112e..8400fd5ecd1 100644 --- a/profiler/cluster_analyse/__init__.py +++ b/profiler/cluster_analyse/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/analysis/__init__.py b/profiler/cluster_analyse/analysis/__init__.py new file mode 100644 index 00000000000..8400fd5ecd1 --- /dev/null +++ b/profiler/cluster_analyse/analysis/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/cluster_analyse/analysis/analysis_facade.py b/profiler/cluster_analyse/analysis/analysis_facade.py index 98526b94ab1..e6966d4402b 100644 --- a/profiler/cluster_analyse/analysis/analysis_facade.py +++ b/profiler/cluster_analyse/analysis/analysis_facade.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor +from communication_group.communication_group_generator import CommunicationGroupGenerator + + class AnalysisFacade: analysis_module = {} @@ -21,4 +25,9 @@ class AnalysisFacade: self.data_map = data_map self.communication_group = communication_group + def cluster_analyze(self): + data_map = PytorchDataPreprocessor(self.collection_path).get_data_map() + if not data_map: + print("Can not get rank info or profiling data.") + communication_group = CommunicationGroupGenerator(self.collection_path, data_map).generate() diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index d6ac6a57169..d1daea002bf 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/cluster_data_preprocess/__init__.py b/profiler/cluster_analyse/cluster_data_preprocess/__init__.py index 1af7850112e..8400fd5ecd1 100644 --- a/profiler/cluster_analyse/cluster_data_preprocess/__init__.py +++ b/profiler/cluster_analyse/cluster_data_preprocess/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/cluster_data_preprocess/data_preprocessor.py b/profiler/cluster_analyse/cluster_data_preprocess/data_preprocessor.py index c2e4bd4789f..ebc9647c208 100644 --- a/profiler/cluster_analyse/cluster_data_preprocess/data_preprocessor.py +++ b/profiler/cluster_analyse/cluster_data_preprocess/data_preprocessor.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py b/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py index 2a3492697b5..8dc47bd8220 100644 --- a/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py +++ b/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_op_summary_analysis.py b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_op_summary_analysis.py index a95ac0a0180..68232b24e67 100644 --- a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_op_summary_analysis.py +++ b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_op_summary_analysis.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/common_func/__init__.py b/profiler/cluster_analyse/common_func/__init__.py new file mode 100644 index 00000000000..8400fd5ecd1 --- /dev/null +++ b/profiler/cluster_analyse/common_func/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/cluster_analyse/common_func/constant.py b/profiler/cluster_analyse/common_func/constant.py new file mode 100644 index 00000000000..e6cc07eb6e6 --- /dev/null +++ b/profiler/cluster_analyse/common_func/constant.py @@ -0,0 +1,28 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class Constant(object): + # dir name + FRAMEWORK_DIR = "FRAMEWORK" + OUTPUT_DIR = "ASCEND_PROFILER_OUTPUT" + COMM_JSON = "communication.json" + STEP_TIME_CSV == "step_time.csv" + + # file authority + FILE_AUTHORITY = 0o640 + DIR_AUTHORITY = 0o750 + MAX_JSON_SIZE = 1024 * 1024 * 1024 * 10 + MAX_CSV_SIZE = 1024 * 1024 * 1024 * 5 diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py new file mode 100644 index 00000000000..6f0c9f99e9d --- /dev/null +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -0,0 +1,140 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import csv +import json +import shutil +from common_func.constant import Constant + + +class FileManager: + + @classmethod + def check_file_or_directory_path(cls, path, isdir=False): + """ + Function Description: + check whether the path is valid + Parameter: + path: the path to check + isdir: the path is dir or file + Exception Description: + when invalid data throw exception + """ + if isdir: + if not os.path.exists(path): + raise RuntimeError('The path {} is not exist.'.format(path)) + + if not os.path.isdir(path): + raise RuntimeError('The path {} is not a directory.'.format(path)) + + if not os.access(path, os.W_OK): + raise RuntimeError('The path {} does not have permission to write. ' + 'Please check the path permission'.format(path)) + else: + if not os.path.isfile(path): + raise RuntimeError('{} is an invalid file or non-exist.'.format(path)) + + if not os.access(path, os.R_OK): + raise RuntimeError( + 'The path {} does not have permission to read. Please check the path permission'.format(path)) + + @classmethod + def read_csv_file(cls, file_path: str, class_bean: any) -> list: + cls.check_file_or_directory_path(file_path) + file_size = os.path.getsize(file_path) + if file_size <= 0: + return [] + if file_size > Constant.MAX_CSV_SIZE: + print(f"The file size exceeds the preset value {Constant.MAX_CSV_SIZE / 1024 / 1024}MB, " + f"please check the file: {file_path}") + return [] + result_data = [] + try: + with open(file_path, newline="") as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + result_data.append(class_bean(row)) + except Exception: + raise RuntimeError(f"Failed to read the file: {file_path}") + return result_data + + @classmethod + def read_json_file(cls, file_path: str) -> dict: + cls.check_file_or_directory_path(file_path) + file_size = os.path.getsize(file_path) + if file_size <= 0: + return {} + if file_size > Constant.MAX_JSON_SIZE: + print(f"The file size exceeds the preset value {Constant.MAX_CSV_SIZE / 1024 / 1024}MB, " + f"please check the file: {file_path}") + return {} + try: + with open(file_path, "r") as json_file: + result_data = json.load(json_file) + except Exception: + raise RuntimeError(f"Failed to read the file: {file_path}") + return result_data + + @classmethod + def create_csv_file(cls, profiler_path: str, data: list, file_name: str, headers: list = None) -> None: + if not data: + return + file_path = os.path.join(profiler_path, Constant.OUTPUT_DIR, file_name) + try: + with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT, Constant.FILE_AUTHORITY), "w", + newline="") as file: + writer = csv.writer(file) + if headers: + writer.writerow(headers) + writer.writerows(data) + except Exception: + raise RuntimeError(f"Can't create file: {file_path}") + + @classmethod + def create_json_file(cls, profiler_path: str, data: list, file_name: str) -> None: + if not data: + return + file_path = os.path.join(profiler_path, Constant.OUTPUT_DIR, file_name) + cls.create_json_file_by_path(file_path, data) + + @classmethod + def create_json_file_by_path(cls, output_path: str, data: list) -> None: + dir_name = os.path.dirname(output_path) + if not os.path.exists(dir_name): + try: + os.makedirs(dir_name, mode=Constant.DIR_AUTHORITY) + except Exception: + raise RuntimeError(f"Can't create directory: {dir_name}") + try: + with os.fdopen(os.open(output_path, os.O_WRONLY | os.O_CREAT, Constant.FILE_AUTHORITY), "w") as file: + json.dump(data, file) + except Exception: + raise RuntimeError(f"Can't create file: {output_path}") + + @classmethod + def remove_and_make_output_dir(cls, profiler_path) -> None: + output_path = os.path.join(profiler_path, Constant.OUTPUT_DIR) + if os.path.isdir(output_path): + try: + shutil.rmtree(output_path) + os.makedirs(output_path, mode=Constant.DIR_AUTHORITY) + except Exception: + raise RuntimeError(f"Can't delete files in the directory: {output_path}") + return + try: + os.makedirs(output_path, mode=Constant.DIR_AUTHORITY) + except Exception: + raise RuntimeError(f"Can't create directory: {output_path}") diff --git a/profiler/cluster_analyse/communication_group/__init__.py b/profiler/cluster_analyse/communication_group/__init__.py new file mode 100644 index 00000000000..8400fd5ecd1 --- /dev/null +++ b/profiler/cluster_analyse/communication_group/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/cluster_analyse/communication_group/communication_group_generator.py b/profiler/cluster_analyse/communication_group/communication_group_generator.py index b287f07eda2..e95175b0b38 100644 --- a/profiler/cluster_analyse/communication_group/communication_group_generator.py +++ b/profiler/cluster_analyse/communication_group/communication_group_generator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,20 +13,34 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +from common_func.constant import Constant +from common_func.file_manager import FileManager + class CommunicationGroupGenerator: def __init__(self, collection_path: str, data_map: dict): self.collection_path = collection_path self.data_map = data_map self.communication_group = {} + self.rank_comm_dir_dict = {} def generate(self): - pass + self.load_communication_json() + + def load_communication_json(self): + for rank_id, profiling_dir_path in self.data_map: + comm_dir = profiling_dir_path.get(Constant.COMM_JSON) + if comm_dir: + self.rank_comm_dir_dict[rank_id] = FileManager.read_json_file(comm_dir) + if not self.rank_comm_dir_dict.get(rank_id): + print(f"rank {rank_id} does not have a valid communication.json") - def read_communication_json(self): - pass def generate_collective_communication_group(self): pass def generate_p2p_communication_group(self): pass + + def get_all_collective_ops_name(self): + pass diff --git a/profiler/compare_tools/__init__.py b/profiler/compare_tools/__init__.py index 1af7850112e..8400fd5ecd1 100644 --- a/profiler/compare_tools/__init__.py +++ b/profiler/compare_tools/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/compare_tools/torch_op_compare.py b/profiler/compare_tools/torch_op_compare.py index 1c4551aa4b2..fbbcba95e38 100644 --- a/profiler/compare_tools/torch_op_compare.py +++ b/profiler/compare_tools/torch_op_compare.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/distribute_tools/distribute_modify_hostname.bash b/profiler/distribute_tools/distribute_modify_hostname.bash index d47b035499e..ac72f2fa1b7 100644 --- a/profiler/distribute_tools/distribute_modify_hostname.bash +++ b/profiler/distribute_tools/distribute_modify_hostname.bash @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/performance_analyse/__init__.py b/profiler/performance_analyse/__init__.py index 1af7850112e..8400fd5ecd1 100644 --- a/profiler/performance_analyse/__init__.py +++ b/profiler/performance_analyse/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/performance_analyse/gpu_parser.py b/profiler/performance_analyse/gpu_parser.py index e95eb138012..95391dc0ba9 100644 --- a/profiler/performance_analyse/gpu_parser.py +++ b/profiler/performance_analyse/gpu_parser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/performance_analyse/npu_parser.py b/profiler/performance_analyse/npu_parser.py index e2c8dbc0d28..333d8a2682b 100644 --- a/profiler/performance_analyse/npu_parser.py +++ b/profiler/performance_analyse/npu_parser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/performance_analyse/parser_helper.py b/profiler/performance_analyse/parser_helper.py index 9cfab6910de..958a3146bb5 100644 --- a/profiler/performance_analyse/parser_helper.py +++ b/profiler/performance_analyse/parser_helper.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/performance_analyse/profiling_parse.py b/profiler/performance_analyse/profiling_parse.py index 0e81f2d2199..6c69af674f5 100644 --- a/profiler/performance_analyse/profiling_parse.py +++ b/profiler/performance_analyse/profiling_parse.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); -- Gitee