diff --git a/profiler/cluster_analyse/__init__.py b/profiler/cluster_analyse/__init__.py index 1af7850112ed8824e5f9b73b9dbab54215f9c9cd..8400fd5ecd1246eaee795cebfccfacc80a94f08c 100644 --- a/profiler/cluster_analyse/__init__.py +++ b/profiler/cluster_analyse/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/analysis/__init__.py b/profiler/cluster_analyse/analysis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8400fd5ecd1246eaee795cebfccfacc80a94f08c --- /dev/null +++ b/profiler/cluster_analyse/analysis/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/cluster_analyse/analysis/analysis_facade.py b/profiler/cluster_analyse/analysis/analysis_facade.py index 98526b94ab16d794f3a765ded9cf6187c9881493..e6966d4402b9b3e15022fa18a7e142213804a698 100644 --- a/profiler/cluster_analyse/analysis/analysis_facade.py +++ b/profiler/cluster_analyse/analysis/analysis_facade.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor +from communication_group.communication_group_generator import CommunicationGroupGenerator + + class AnalysisFacade: analysis_module = {} @@ -21,4 +25,9 @@ class AnalysisFacade: self.data_map = data_map self.communication_group = communication_group + def cluster_analyze(self): + data_map = PytorchDataPreprocessor(self.collection_path).get_data_map() + if not data_map: + print("Can not get rank info or profiling data.") + communication_group = CommunicationGroupGenerator(self.collection_path, data_map).generate() diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index d6ac6a57169c9059974939cdbc19c8a6e951891e..d1daea002bf5f0504e98d3b9e9b7143a2a03db49 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/cluster_data_preprocess/__init__.py b/profiler/cluster_analyse/cluster_data_preprocess/__init__.py index 1af7850112ed8824e5f9b73b9dbab54215f9c9cd..8400fd5ecd1246eaee795cebfccfacc80a94f08c 100644 --- a/profiler/cluster_analyse/cluster_data_preprocess/__init__.py +++ b/profiler/cluster_analyse/cluster_data_preprocess/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/cluster_data_preprocess/data_preprocessor.py b/profiler/cluster_analyse/cluster_data_preprocess/data_preprocessor.py index c2e4bd4789fc10a11a19e2e4667f48c231824540..ebc9647c208b05f51698563b8dabb7d13c28c7ec 100644 --- a/profiler/cluster_analyse/cluster_data_preprocess/data_preprocessor.py +++ b/profiler/cluster_analyse/cluster_data_preprocess/data_preprocessor.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py b/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py index 2a3492697b52eb72159f82c95d799f05b83d786c..8dc47bd822034e77bdec6f7cdf45036a1d7c5446 100644 --- a/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py +++ b/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_op_summary_analysis.py b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_op_summary_analysis.py index a95ac0a01807d9a163f8992d7df8b2f294fca8fb..68232b24e670b6a29f313e307036760570be5e94 100644 --- a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_op_summary_analysis.py +++ b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_op_summary_analysis.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/cluster_analyse/common_func/__init__.py b/profiler/cluster_analyse/common_func/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8400fd5ecd1246eaee795cebfccfacc80a94f08c --- /dev/null +++ b/profiler/cluster_analyse/common_func/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/cluster_analyse/common_func/constant.py b/profiler/cluster_analyse/common_func/constant.py new file mode 100644 index 0000000000000000000000000000000000000000..e6cc07eb6e6a9fff831068cd32ce5d59901d5212 --- /dev/null +++ b/profiler/cluster_analyse/common_func/constant.py @@ -0,0 +1,28 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class Constant(object): + # dir name + FRAMEWORK_DIR = "FRAMEWORK" + OUTPUT_DIR = "ASCEND_PROFILER_OUTPUT" + COMM_JSON = "communication.json" + STEP_TIME_CSV == "step_time.csv" + + # file authority + FILE_AUTHORITY = 0o640 + DIR_AUTHORITY = 0o750 + MAX_JSON_SIZE = 1024 * 1024 * 1024 * 10 + MAX_CSV_SIZE = 1024 * 1024 * 1024 * 5 diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..6f0c9f99e9d2962d4fc10d0b738ec51688afde00 --- /dev/null +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -0,0 +1,140 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import csv +import json +import shutil +from common_func.constant import Constant + + +class FileManager: + + @classmethod + def check_file_or_directory_path(cls, path, isdir=False): + """ + Function Description: + check whether the path is valid + Parameter: + path: the path to check + isdir: the path is dir or file + Exception Description: + when invalid data throw exception + """ + if isdir: + if not os.path.exists(path): + raise RuntimeError('The path {} is not exist.'.format(path)) + + if not os.path.isdir(path): + raise RuntimeError('The path {} is not a directory.'.format(path)) + + if not os.access(path, os.W_OK): + raise RuntimeError('The path {} does not have permission to write. ' + 'Please check the path permission'.format(path)) + else: + if not os.path.isfile(path): + raise RuntimeError('{} is an invalid file or non-exist.'.format(path)) + + if not os.access(path, os.R_OK): + raise RuntimeError( + 'The path {} does not have permission to read. Please check the path permission'.format(path)) + + @classmethod + def read_csv_file(cls, file_path: str, class_bean: any) -> list: + cls.check_file_or_directory_path(file_path) + file_size = os.path.getsize(file_path) + if file_size <= 0: + return [] + if file_size > Constant.MAX_CSV_SIZE: + print(f"The file size exceeds the preset value {Constant.MAX_CSV_SIZE / 1024 / 1024}MB, " + f"please check the file: {file_path}") + return [] + result_data = [] + try: + with open(file_path, newline="") as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + result_data.append(class_bean(row)) + except Exception: + raise RuntimeError(f"Failed to read the file: {file_path}") + return result_data + + @classmethod + def read_json_file(cls, file_path: str) -> dict: + cls.check_file_or_directory_path(file_path) + file_size = os.path.getsize(file_path) + if file_size <= 0: + return {} + if file_size > Constant.MAX_JSON_SIZE: + print(f"The file size exceeds the preset value {Constant.MAX_CSV_SIZE / 1024 / 1024}MB, " + f"please check the file: {file_path}") + return {} + try: + with open(file_path, "r") as json_file: + result_data = json.load(json_file) + except Exception: + raise RuntimeError(f"Failed to read the file: {file_path}") + return result_data + + @classmethod + def create_csv_file(cls, profiler_path: str, data: list, file_name: str, headers: list = None) -> None: + if not data: + return + file_path = os.path.join(profiler_path, Constant.OUTPUT_DIR, file_name) + try: + with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT, Constant.FILE_AUTHORITY), "w", + newline="") as file: + writer = csv.writer(file) + if headers: + writer.writerow(headers) + writer.writerows(data) + except Exception: + raise RuntimeError(f"Can't create file: {file_path}") + + @classmethod + def create_json_file(cls, profiler_path: str, data: list, file_name: str) -> None: + if not data: + return + file_path = os.path.join(profiler_path, Constant.OUTPUT_DIR, file_name) + cls.create_json_file_by_path(file_path, data) + + @classmethod + def create_json_file_by_path(cls, output_path: str, data: list) -> None: + dir_name = os.path.dirname(output_path) + if not os.path.exists(dir_name): + try: + os.makedirs(dir_name, mode=Constant.DIR_AUTHORITY) + except Exception: + raise RuntimeError(f"Can't create directory: {dir_name}") + try: + with os.fdopen(os.open(output_path, os.O_WRONLY | os.O_CREAT, Constant.FILE_AUTHORITY), "w") as file: + json.dump(data, file) + except Exception: + raise RuntimeError(f"Can't create file: {output_path}") + + @classmethod + def remove_and_make_output_dir(cls, profiler_path) -> None: + output_path = os.path.join(profiler_path, Constant.OUTPUT_DIR) + if os.path.isdir(output_path): + try: + shutil.rmtree(output_path) + os.makedirs(output_path, mode=Constant.DIR_AUTHORITY) + except Exception: + raise RuntimeError(f"Can't delete files in the directory: {output_path}") + return + try: + os.makedirs(output_path, mode=Constant.DIR_AUTHORITY) + except Exception: + raise RuntimeError(f"Can't create directory: {output_path}") diff --git a/profiler/cluster_analyse/communication_group/__init__.py b/profiler/cluster_analyse/communication_group/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8400fd5ecd1246eaee795cebfccfacc80a94f08c --- /dev/null +++ b/profiler/cluster_analyse/communication_group/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/cluster_analyse/communication_group/communication_group_generator.py b/profiler/cluster_analyse/communication_group/communication_group_generator.py index b287f07eda2840c905fde8357597f4f60d638391..e95175b0b380c15ab8ce449b080c5611db7e7264 100644 --- a/profiler/cluster_analyse/communication_group/communication_group_generator.py +++ b/profiler/cluster_analyse/communication_group/communication_group_generator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,20 +13,34 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +from common_func.constant import Constant +from common_func.file_manager import FileManager + class CommunicationGroupGenerator: def __init__(self, collection_path: str, data_map: dict): self.collection_path = collection_path self.data_map = data_map self.communication_group = {} + self.rank_comm_dir_dict = {} def generate(self): - pass + self.load_communication_json() + + def load_communication_json(self): + for rank_id, profiling_dir_path in self.data_map: + comm_dir = profiling_dir_path.get(Constant.COMM_JSON) + if comm_dir: + self.rank_comm_dir_dict[rank_id] = FileManager.read_json_file(comm_dir) + if not self.rank_comm_dir_dict.get(rank_id): + print(f"rank {rank_id} does not have a valid communication.json") - def read_communication_json(self): - pass def generate_collective_communication_group(self): pass def generate_p2p_communication_group(self): pass + + def get_all_collective_ops_name(self): + pass diff --git a/profiler/compare_tools/__init__.py b/profiler/compare_tools/__init__.py index 1af7850112ed8824e5f9b73b9dbab54215f9c9cd..8400fd5ecd1246eaee795cebfccfacc80a94f08c 100644 --- a/profiler/compare_tools/__init__.py +++ b/profiler/compare_tools/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/compare_tools/torch_op_compare.py b/profiler/compare_tools/torch_op_compare.py index 1c4551aa4b221054d3fe921a66b46c7a65c1048a..fbbcba95e3859eb0a01746f3e888751705d9f514 100644 --- a/profiler/compare_tools/torch_op_compare.py +++ b/profiler/compare_tools/torch_op_compare.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/distribute_tools/distribute_modify_hostname.bash b/profiler/distribute_tools/distribute_modify_hostname.bash index d47b035499e678ae969f0e5a0be12802fa571155..ac72f2fa1b77e65c7d2883f949b955b870a977cc 100644 --- a/profiler/distribute_tools/distribute_modify_hostname.bash +++ b/profiler/distribute_tools/distribute_modify_hostname.bash @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/performance_analyse/__init__.py b/profiler/performance_analyse/__init__.py index 1af7850112ed8824e5f9b73b9dbab54215f9c9cd..8400fd5ecd1246eaee795cebfccfacc80a94f08c 100644 --- a/profiler/performance_analyse/__init__.py +++ b/profiler/performance_analyse/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/performance_analyse/gpu_parser.py b/profiler/performance_analyse/gpu_parser.py index e95eb1380128cd4dc0a26b4b1ab5077425667a22..95391dc0ba9dd77020976a8843b1f343368820af 100644 --- a/profiler/performance_analyse/gpu_parser.py +++ b/profiler/performance_analyse/gpu_parser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/performance_analyse/npu_parser.py b/profiler/performance_analyse/npu_parser.py index e2c8dbc0d286c9e28cc3927d821e449792a91670..333d8a2682b05846e294e1112af92a91acfb6a5c 100644 --- a/profiler/performance_analyse/npu_parser.py +++ b/profiler/performance_analyse/npu_parser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/performance_analyse/parser_helper.py b/profiler/performance_analyse/parser_helper.py index 9cfab6910de020c7b2e6464fdb7b521b9e9c75e0..958a3146bb58898cdb76003f5f59476a45c1593f 100644 --- a/profiler/performance_analyse/parser_helper.py +++ b/profiler/performance_analyse/parser_helper.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/performance_analyse/profiling_parse.py b/profiler/performance_analyse/profiling_parse.py index 0e81f2d2199be54b135374847b4f3fe99cbe200c..6c69af674f5d2035d1dc667c0b0a5c3c83a4dc97 100644 --- a/profiler/performance_analyse/profiling_parse.py +++ b/profiler/performance_analyse/profiling_parse.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies. +# Copyright (c) 2023, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License");