diff --git a/profiler/affinity_cpu_bind/bind_core.py b/profiler/affinity_cpu_bind/bind_core.py index 23d72c8b10975f1b3526d7b4a2a515702b32e701..e7799b2f34218941bc956652bb5683336b5c1f6d 100644 --- a/profiler/affinity_cpu_bind/bind_core.py +++ b/profiler/affinity_cpu_bind/bind_core.py @@ -1,3 +1,18 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import subprocess import argparse import os @@ -48,7 +63,7 @@ class BindCoreManager(): self.args_parse() if not bind_core_manager.get_npu_info(): - print('[ERROR] Failed to get current npus info') + logging.error('Failed to get current npus info') exit() if not bind_core_manager.get_running_pid_on_npu(): exit() @@ -56,7 +71,6 @@ class BindCoreManager(): bind_core_manager.run_bind_core() def get_running_pid_on_npu(self) -> bool: - no_running_pids_on_npu_msg = '[INFO] Now there is no running process on all NPUs, stop bind cores' logging.info('Begin to find running process on all NPUs') # get running process on NPUs for _ in range(self.find_running_pid_times): @@ -102,7 +116,7 @@ class BindCoreManager(): logging.info('Succeed to find running process %s on NPU %d', pids, npu_id) if_running_process = True if not if_running_process: - print(no_running_pids_on_npu_msg) + logging.info('Now there is no running process on all NPUs, stop bind cores') return if_running_process def get_npu_info(self) -> bool: @@ -129,14 +143,17 @@ class BindCoreManager(): p = subprocess.run(set_affinity_cpu_cmd.split(), shell=False, capture_output=True) logging.info(p.stdout.decode('utf-8')) except subprocess.CalledProcessError: - print('[ERROR] Failed to bind process {} on NPU {} with cpu cores list {}'.format(pid, npu, affinity_cpu)) + logging.error('Failed to bind process {} on NPU {} with cpu cores list {}'.format(pid, npu, + affinity_cpu)) logging.info('Succeed to bind process %s on NPU %d with cpu cores list %s', pid, npu, affinity_cpu) def args_parse(self): parser = argparse.ArgumentParser(description='This is a affinity cpu core bind script.') - parser.add_argument('-t', '--time', type=int, metavar='', help='Wait time before bind cores that you want to set. The unit is \'s\'.') - parser.add_argument('-app', '--application', metavar='', nargs='+', help='Training or inference command that you want to run.') + parser.add_argument('-t', '--time', type=int, metavar='', + help='Wait time before bind cores that you want to set. The unit is \'s\'.') + parser.add_argument('-app', '--application', metavar='', nargs='+', + help='Training or inference command that you want to run.') args = parser.parse_args() if args.application: application_cmd = ' '.join(args.application) @@ -148,7 +165,7 @@ class BindCoreManager(): args.time = 0 msg = f"Invalid parameter. The value of --time is not within the range " \ f"[0, {BindCoreManager.MAX_WAIT_TIME_BEFORE_BIND_CORE}]. --time has been set to 0 to continue." - print(f'[WARNING] {msg}') + logging.warning(f'{msg}') time.sleep(args.time) def _init_log_file(self): @@ -175,7 +192,8 @@ class BindCoreManager(): get_npu_info_cmd = 'npu-smi info -l' get_npu_info_process = subprocess.run(get_npu_info_cmd.split(), shell=False, capture_output=True) get_npu_id_cmd = 'grep ID' - get_npu_id_process = subprocess.run(get_npu_id_cmd.split(), shell=False, input=get_npu_info_process.stdout, capture_output=True) + get_npu_id_process = subprocess.run(get_npu_id_cmd.split(), shell=False, input=get_npu_info_process.stdout, + capture_output=True) res = get_npu_id_process.stdout.decode('utf-8').split() for i in res: if i.isdigit(): @@ -189,7 +207,8 @@ class BindCoreManager(): p = subprocess.run(get_npu_topo_cmd.split(), shell=False, capture_output=True) res = p.stdout.decode('utf-8').split() if not res: - print('[ERROR] Failed to run get npu affinity info, please check if driver version support cmd npu-smi info -t topo') + logging.error('Failed to run get npu affinity info, ' + 'please check if driver version support cmd npu-smi info -t topo') return False index = 0 @@ -205,10 +224,12 @@ class BindCoreManager(): cpus[1] = str(int(cpus[1]) + cpu_num_for_each_npu) affinity_cpus.append(cpus[0] + '-' + cpus[1]) if index < len(self.npu_id_list): - self.npu_affinity_cpu_dict[self.npu_id_list[index]] = ','.join(affinity_cpu for affinity_cpu in affinity_cpus) + self.npu_affinity_cpu_dict[self.npu_id_list[index]] = ','.join( + affinity_cpu for affinity_cpu in affinity_cpus) index += 1 else: - print('[ERROR] Get affinity_cpu_list for {} npus, more than real npu num: {}'.format(index + 1, len(self.npu_id_list))) + logging.error('Get affinity_cpu_list for {} npus, ' + 'more than real npu num: {}'.format(index + 1, len(self.npu_id_list))) return False for k in self.npu_affinity_cpu_dict.keys(): @@ -217,12 +238,12 @@ class BindCoreManager(): if __name__ == '__main__': - print('[INFO] Begin to run bind-cores script...') + logging.info('Begin to run bind-cores script...') bind_core_manager = BindCoreManager() try: bind_core_manager.run() except Exception as exception: - print(f'[ERROR] {exception}') + logging.error(f'{exception}') - print('[INFO] End to run bind-cores script, the log is saved in {}'.format(bind_core_manager.log_file)) + logging.info('End to run bind-cores script, the log is saved in {}'.format(bind_core_manager.log_file)) diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 0241a27aa84b765999baa577c03d59d5221fb61b..5b2e85e166d9d4eca1fa2975cc6289c03ceac69a 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -1,7 +1,22 @@ -import click +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys import os import logging +import click sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), "compare_tools")) sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), "cluster_analyse")) diff --git a/profiler/cli/compare_cli.py b/profiler/cli/compare_cli.py index 1551ee65185561275cb90894e75eed68f576e5f2..e954bd1502c37933bd213c4dafcc15a4d1fd9b51 100644 --- a/profiler/cli/compare_cli.py +++ b/profiler/cli/compare_cli.py @@ -13,9 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. import ast -import click import os import sys +import click sys.path.append(os.path.dirname(os.path.dirname(__file__))) diff --git a/profiler/cli/complete_cli.py b/profiler/cli/complete_cli.py index 9c4efd0af90daa84b7ae5c3a0b2462dc52873da5..78aea6cf892d1a0dc790841fe9d33eba45400d91 100644 --- a/profiler/cli/complete_cli.py +++ b/profiler/cli/complete_cli.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import click from profiler.advisor.utils.tools import CONTEXT_SETTINGS diff --git a/profiler/cluster_analyse/analysis/base_analysis.py b/profiler/cluster_analyse/analysis/base_analysis.py index 398098117a95795e54882533898092d6e237ad7a..a20a5e6da53a8f146536507fd9e09a171c61ece3 100644 --- a/profiler/cluster_analyse/analysis/base_analysis.py +++ b/profiler/cluster_analyse/analysis/base_analysis.py @@ -22,6 +22,7 @@ from cluster_utils.data_transfer_adapter import DataTransferAdapter logger = logging.getLogger() + class BaseAnalysis: MAX_RANKS = 1000 diff --git a/profiler/cluster_analyse/analysis/comm_matrix_analysis.py b/profiler/cluster_analyse/analysis/comm_matrix_analysis.py index 3df349ee86be4a7ff9eaccac0edb7973ae91ef3d..522d9db9f2ef42d441b86c3f18262d4b71b61271 100644 --- a/profiler/cluster_analyse/analysis/comm_matrix_analysis.py +++ b/profiler/cluster_analyse/analysis/comm_matrix_analysis.py @@ -18,10 +18,11 @@ import logging from collections import defaultdict from analysis.base_analysis import BaseAnalysis -from profiler.prof_common.constant import Constant from common_func.db_manager import DBManager from common_func.utils import increase_shared_value +from profiler.prof_common.constant import Constant + logger = logging.getLogger("cluster") diff --git a/profiler/cluster_analyse/analysis/communication_analysis.py b/profiler/cluster_analyse/analysis/communication_analysis.py index 0cac569511f1acdc21a9821b19dd38f1dd53df03..8dece433901a6a0b661eb5d25e30bb87908dd334 100644 --- a/profiler/cluster_analyse/analysis/communication_analysis.py +++ b/profiler/cluster_analyse/analysis/communication_analysis.py @@ -245,14 +245,14 @@ class CommunicationAnalysisOptimized(BaseAnalysis): return total_bw_info, total_transit_size, total_transit_time for _, step_dict in self._aggregate_bandwidth.items(): - for step_id, rank_dict in step_dict.items(): - for rank_id, communication_op_info in rank_dict.items(): - for transport_type, bandwidth_info in communication_op_info.items(): + for _, rank_dict in step_dict.items(): + for _, communication_op_info in rank_dict.items(): + for _, bandwidth_info in communication_op_info.items(): total_transit_size = 0.0 total_transit_time = 0.0 total_info = [] op_group_set = set() - for package_size, package_info in bandwidth_info.items(): + for _, package_info in bandwidth_info.items(): total_bandwidth_info, total_transit_size, total_transit_time = process_package_info( package_info, total_transit_size, total_transit_time, op_group_set ) diff --git a/profiler/cluster_analyse/analysis/host_info_analysis.py b/profiler/cluster_analyse/analysis/host_info_analysis.py index 4d1c8889bd0a12464e4b9eb4a4a26d0a52160343..3a6cfb8a5c9c8b4981c41bc9c0ae0f9fb04c412c 100644 --- a/profiler/cluster_analyse/analysis/host_info_analysis.py +++ b/profiler/cluster_analyse/analysis/host_info_analysis.py @@ -17,10 +17,11 @@ import os import logging from analysis.base_analysis import BaseAnalysis -from profiler.prof_common.constant import Constant from common_func.db_manager import DBManager from common_func.utils import increase_shared_value +from profiler.prof_common.constant import Constant + logger = logging.getLogger("cluster") @@ -101,4 +102,4 @@ class HostInfoAnalysis(BaseAnalysis): self.all_rank_host_info[host_uid] = host_name self.all_rank_device_info.extend(rank_device_info) if print_empty_host_info: - print(print_empty_host_info) + logger.warning(print_empty_host_info) diff --git a/profiler/cluster_analyse/analysis/step_trace_time_analysis.py b/profiler/cluster_analyse/analysis/step_trace_time_analysis.py index db0b9b5b0ca1792552342896686f4d3b95b7232c..82cae80b262b719a9ab35405d8d53996cf396c14 100644 --- a/profiler/cluster_analyse/analysis/step_trace_time_analysis.py +++ b/profiler/cluster_analyse/analysis/step_trace_time_analysis.py @@ -18,10 +18,11 @@ import logging from common_func.db_manager import DBManager from common_func.utils import increase_shared_value +from cluster_utils.parallel_strategy_calculator import ParallelStrategyCalculator +from prof_bean.step_trace_time_bean import StepTraceTimeBean + from profiler.prof_common.constant import Constant from profiler.prof_common.file_manager import FileManager -from prof_bean.step_trace_time_bean import StepTraceTimeBean -from cluster_utils.parallel_strategy_calculator import ParallelStrategyCalculator logger = logging.getLogger("cluster") @@ -77,18 +78,19 @@ class StepTraceTimeAnalysis: if len(parallelism_map) > len(self.step_time_dict): missing_rank_ids = [ - rank_id for rank_id in range(len(parallelism_map)) + rank_id + for rank_id in range(len(parallelism_map)) if rank_id not in self.step_time_dict ] logger.warning("Step trace data length should equal to real rank numbers, but get step data length =" "%s, real rank numbers = %s, maybe lost some rank ids = %s, please check your profiling " - "data.",str(len(self.step_time_dict)),str(len(parallelism_map)),str(missing_rank_ids)) + "data.", (len(self.step_time_dict)), str(len(parallelism_map)), str(missing_rank_ids)) if len(parallelism_map) < len(self.step_time_dict): logger.error("Step trace data length should equal to real rank numbers, but get step data length = %s," " real rank numbers = %s, maybe parallel params in profiler_metadata.json is error, " "please check your metadata data.", - str(len(self.step_time_dict)),str(len(parallelism_map))) + str(len(self.step_time_dict)), str(len(parallelism_map))) self.distributed_args = None return @@ -146,8 +148,8 @@ class StepTraceTimeAnalysis: self.step_time_dict[rank_id] = data DBManager.destroy_db_connect(conn, cursor) if not self.step_time_dict.get(rank_id): - logger.warning("Rank %s does not have a valid step_trace_time data in %s file." - ,str(rank_id),str(self.data_type)) + logger.warning("Rank %s does not have a valid step_trace_time data in %s file.", + str(rank_id), str(self.data_type)) def analyze_step_time(self): for rank_id, data_bean_list in self.step_time_dict.items(): diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index 6ab68fe173987ee4424ebf6667aa02c41df53195..42d5be474a483b81e8a4db3cf90b0d05f8cb039a 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -19,10 +19,10 @@ import logging import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +from analysis.analysis_facade import AnalysisFacade from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor from cluster_data_preprocess.mindspore_data_preprocessor import MindsporeDataPreprocessor from communication_group.communication_group_generator import CommunicationGroupGenerator -from analysis.analysis_facade import AnalysisFacade from profiler.prof_common.additional_args_manager import AdditionalArgsManager from profiler.prof_common.constant import Constant from profiler.prof_common.file_manager import FileManager diff --git a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py index 3af835e6db211b80b13b7b4d93f80e2d67aa7fe5..64c1d27b0f3e788f94f0e6c5e73ae9f85833e66f 100644 --- a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py +++ b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py @@ -51,7 +51,8 @@ class FormDataProcessor: node_id = re.search(r'node(\d+)', dir_path).group(1) return int(node_id) - def get_files_with_prefix_recursive(self, csv_path, match_str): + @staticmethod + def get_files_with_prefix_recursive(csv_path, match_str): matched_ir_files = list(Path(csv_path).rglob(match_str)) if not matched_ir_files: msg = f"Didn't find any file in folder {csv_path} that matches {match_str}" @@ -186,7 +187,7 @@ class OpSummaryAnalyzerBase: def get_columns_to_view(self): return self.columns_to_view - def calculateViewData(self, summary_data): + def calculate_view_data(self, summary_data): # 存储所有合并后的数据 calculate_dict = {self.columns_to_view[i]: self.calculate_fun for i in range(len(self.columns_to_view))} view_data = summary_data.groupby(self.attrs_to_group).agg(calculate_dict).reset_index() @@ -198,7 +199,7 @@ class TimeToCsvAnalyzer(OpSummaryAnalyzerBase): super().__init__(chip_type, "TimeToCsvAnalyzer", dir_path) def generate_deliverable(self, summary_data, rank_num): - view_data = self.calculateViewData(summary_data) + view_data = self.calculate_view_data(summary_data) # 规范化列名 view_data.columns = [''.join(col) if col[1] == "" else '_'.join(col) for col in view_data.columns] try: @@ -221,7 +222,7 @@ class StatisticalInfoToHtmlAnalyzer(OpSummaryAnalyzerBase): # top_n 如果不符合要求,报警告 def generate_deliverable(self, summary_data, rank_num): - view_data = self.calculateViewData(summary_data) + view_data = self.calculate_view_data(summary_data) # 规范化列名 op_name/ --> op_name time/var 这种不变 view_data.columns = [''.join(col) if col[1] == "" else col for col in view_data.columns] diff --git a/profiler/cluster_analyse/cluster_utils/data_transfer_adapter.py b/profiler/cluster_analyse/cluster_utils/data_transfer_adapter.py index c2aed8250dec646597dec59cbfd13a45f17d5ca4..2de1ca9593d2d7697b25813946f10e91864cd0f0 100644 --- a/profiler/cluster_analyse/cluster_utils/data_transfer_adapter.py +++ b/profiler/cluster_analyse/cluster_utils/data_transfer_adapter.py @@ -1,7 +1,20 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import copy - -from profiler.prof_common.constant import Constant from common_func.table_constant import TableConstant +from profiler.prof_common.constant import Constant class DataTransferAdapter(object): @@ -85,8 +98,8 @@ class DataTransferAdapter(object): res_dict[TableConstant.GROUP_NAME] = op_name.split("@")[1] if "@" in op_name else "" return res_dict - for rank_set, step_dict in res_data.items(): - for step, op_dict in step_dict.items(): + for _, step_dict in res_data.items(): + for _, op_dict in step_dict.items(): for op_name, op_data in op_dict.items(): split_comm_time() return res_comm_data, res_bd_data @@ -137,7 +150,7 @@ class DataTransferAdapter(object): self.set_value_by_key(matrix_data, link_data, key_dict) result.append(matrix_data) - for rank_set, step_dict in res_data.items(): + for _, step_dict in res_data.items(): for step, op_dict in step_dict.items(): split_matrix_data() return result diff --git a/profiler/cluster_analyse/common_func/db_manager.py b/profiler/cluster_analyse/common_func/db_manager.py index be790e9298b4546902ab99a8f0a811e784363c69..e2e62500cd72b42d7dab18dbefb0368c19e2d83b 100644 --- a/profiler/cluster_analyse/common_func/db_manager.py +++ b/profiler/cluster_analyse/common_func/db_manager.py @@ -223,4 +223,4 @@ class CustomizedDictFactory: for data in data_result: data_dict = dict(zip(description_set, data)) res.append(data_dict) - return res + return res \ No newline at end of file diff --git a/profiler/cluster_analyse/common_func/empty_class.py b/profiler/cluster_analyse/common_func/empty_class.py index df100d156fa064cca4514260db0b2e843e217d09..3bc5751ffb843ba39b197a8e9d389efa4101a813 100644 --- a/profiler/cluster_analyse/common_func/empty_class.py +++ b/profiler/cluster_analyse/common_func/empty_class.py @@ -1,3 +1,17 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. class EmptyClass: def __init__(self: any, info: str = "") -> None: diff --git a/profiler/cluster_analyse/common_func/table_constant.py b/profiler/cluster_analyse/common_func/table_constant.py index de6d47e97e5683493905de5353a9978195e87b70..8a13558360f7abd160dc23377ee5509eecb7d992 100644 --- a/profiler/cluster_analyse/common_func/table_constant.py +++ b/profiler/cluster_analyse/common_func/table_constant.py @@ -1,3 +1,17 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. class TableConstant: RANK_SET = "rank_set" diff --git a/profiler/cluster_analyse/common_func/tables_config.py b/profiler/cluster_analyse/common_func/tables_config.py index d8f49916accfb41a93a1966bf60dc0fdcc5423ef..e9868737dd3dcf83327d315cb0fde8cf0f5a6261 100644 --- a/profiler/cluster_analyse/common_func/tables_config.py +++ b/profiler/cluster_analyse/common_func/tables_config.py @@ -1,3 +1,17 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. class TablesConfig: DATA = { "ClusterCommAnalyzerTimeMap": [ diff --git a/profiler/cluster_analyse/communication_group/base_communication_group.py b/profiler/cluster_analyse/communication_group/base_communication_group.py index 2c44d0ae0f33bb42ef937d82ea2d39e72d1d11cd..59c6b9c3e9aebf981ac22046967fa85fe1a6e43e 100644 --- a/profiler/cluster_analyse/communication_group/base_communication_group.py +++ b/profiler/cluster_analyse/communication_group/base_communication_group.py @@ -20,8 +20,8 @@ from copy import deepcopy from multiprocessing import Pool import logging -from profiler.prof_common.constant import Constant from cluster_utils.data_transfer_adapter import DataTransferAdapter +from profiler.prof_common.constant import Constant logger = logging.getLogger() @@ -80,7 +80,7 @@ class BaseCommunicationGroup: def generate_p2p_communication_group(self): stage_group = {} - for group_name, rank_set in self.collective_group_dict.items(): + for _, rank_set in self.collective_group_dict.items(): if not self.whether_valid_comm_group(rank_set): continue unioned_set = set() diff --git a/profiler/cluster_analyse/communication_group/communication_db_group.py b/profiler/cluster_analyse/communication_group/communication_db_group.py index eef617c9ef509320ddd26bf55b10bd757e325659..8a4d24b97b94ac242b6808af8f82fd5c5faf75e1 100644 --- a/profiler/cluster_analyse/communication_group/communication_db_group.py +++ b/profiler/cluster_analyse/communication_group/communication_db_group.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -""" -# Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,15 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" import os import logging from common_func.db_manager import DBManager -from profiler.prof_common.constant import Constant from communication_group.base_communication_group import BaseCommunicationGroup - +from profiler.prof_common.constant import Constant logger = logging.getLogger() diff --git a/profiler/cluster_analyse/communication_group/communication_group_generator.py b/profiler/cluster_analyse/communication_group/communication_group_generator.py index 324137207ca47b3e88ea64b083086f12e5058c50..51ddb6a2ea377fa6fc2e41a83305b1739906d120 100644 --- a/profiler/cluster_analyse/communication_group/communication_group_generator.py +++ b/profiler/cluster_analyse/communication_group/communication_group_generator.py @@ -13,10 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from profiler.prof_common.constant import Constant from communication_group.communication_db_group import CommunicationDBGroup from communication_group.communication_db_group import CommunicationDBGroupOptimized from communication_group.communication_json_group import CommunicationJsonGroup +from profiler.prof_common.constant import Constant SIMPLIFIED = "SIMPLIFIED" diff --git a/profiler/cluster_analyse/communication_group/communication_json_group.py b/profiler/cluster_analyse/communication_group/communication_json_group.py index d9c1c0b4019f3d11c2c7bff6b91b48fb1b201c4f..1cd770f757978368cc97a3f6d2a769a0ced09166 100644 --- a/profiler/cluster_analyse/communication_group/communication_json_group.py +++ b/profiler/cluster_analyse/communication_group/communication_json_group.py @@ -15,8 +15,8 @@ import os -from profiler.prof_common.file_manager import FileManager from communication_group.base_communication_group import BaseCommunicationGroup +from profiler.prof_common.file_manager import FileManager class CommunicationJsonGroup(BaseCommunicationGroup): diff --git a/profiler/cluster_analyse/prof_bean/step_trace_time_bean.py b/profiler/cluster_analyse/prof_bean/step_trace_time_bean.py index 3cf4bc4fa5b1a8c8ccf93c010fe5fe8e9843e513..226b1968ee64943ee8548be90a4d8dd28730462f 100644 --- a/profiler/cluster_analyse/prof_bean/step_trace_time_bean.py +++ b/profiler/cluster_analyse/prof_bean/step_trace_time_bean.py @@ -18,6 +18,7 @@ import logging logger = logging.getLogger() + class StepTraceTimeBean: STEP = "Step" COMPLEMENT_HEADER = ["Step", "Type", "Index"] diff --git a/profiler/example/mstx_torch_plugin/__init__.py b/profiler/example/mstx_torch_plugin/__init__.py index 0ebc8ae1ab65b27515fef716208f36f4a75e55c9..b405266d6cd49309f57aa380c9f09ed2cc4f0b2c 100644 --- a/profiler/example/mstx_torch_plugin/__init__.py +++ b/profiler/example/mstx_torch_plugin/__init__.py @@ -1,3 +1,17 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import logging from .mstx_torch_plugin import apply_mstx_patch diff --git a/profiler/example/mstx_torch_plugin/mstx_torch_plugin.py b/profiler/example/mstx_torch_plugin/mstx_torch_plugin.py index 6f731a40792792a08a61319dbdc2a7705a08149b..cd8145fc2c90ad1b035e46a0a117dd63f8b7f535 100644 --- a/profiler/example/mstx_torch_plugin/mstx_torch_plugin.py +++ b/profiler/example/mstx_torch_plugin/mstx_torch_plugin.py @@ -1,3 +1,17 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import functools import torch @@ -115,7 +129,7 @@ def _step_hook(self, *args, **kwargs): if id(self) != mstx_state.last_optimizer_id: return stream = torch.npu.current_stream() - mstx_state.step_id+=1 + mstx_state.step_id += 1 if mstx_state.step_range_id is not None: torch.npu.mstx.range_end(mstx_state.step_range_id) mstx_state.step_range_id = torch.npu.mstx.range_start(f"step {mstx_state.step_id}", stream) diff --git a/profiler/example/setup.py b/profiler/example/setup.py index faf7f497df681b83bb14cb096a641d87a6456ca7..3b150ad16a7d51610c47c5191df3c27de87a24a8 100644 --- a/profiler/example/setup.py +++ b/profiler/example/setup.py @@ -1,5 +1,19 @@ #!/usr/bin/python # -*- coding: utf-8 -*- +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os from setuptools import setup, find_packages diff --git a/profiler/prof_common/__init__.py b/profiler/prof_common/__init__.py index 9ee8a1b9b09df600acf059d555a419c5658c8e60..577e0e8e18a7db576aca0b4b1908918a9cbfdbd0 100644 --- a/profiler/prof_common/__init__.py +++ b/profiler/prof_common/__init__.py @@ -1,3 +1,17 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) \ No newline at end of file diff --git a/profiler/prof_common/file_manager.py b/profiler/prof_common/file_manager.py index 467926dd9444335d4a5a7b879cc3bb3773a00fba..b0dfd81b7d7c62300bc99f916ead7e2bd788d76a 100644 --- a/profiler/prof_common/file_manager.py +++ b/profiler/prof_common/file_manager.py @@ -39,7 +39,7 @@ class FileManager: f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]") if check_msg.lower() != "y": logger.warning("The user choose not to read the file: %s", file_path) - return [] + return {} try: with open(file_path, "r") as json_file: result_data = json.loads(json_file.read()) @@ -59,7 +59,7 @@ class FileManager: check_msg = input( f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]") if check_msg.lower() != "y": - print(f"[WARNING] The user choose not to read the file: {file_path}") + logger.warning(f"The user choose not to read the file: {file_path}") return [] result_data = [] try: diff --git a/profiler/prof_common/utils.py b/profiler/prof_common/utils.py index 9cef03d33bcc3bf667cfdaed8c7bdb324b5dccb7..664a64d32e2741dde2fc8402ca1af8ad3e8bab43 100644 --- a/profiler/prof_common/utils.py +++ b/profiler/prof_common/utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import configparser import logging import os diff --git a/profiler/setup.py b/profiler/setup.py index 7e5516ef2595b5bf4e57cff7065122ccab189d86..124c5ed5b2566fdc311a40c9295cd0d8202576b5 100644 --- a/profiler/setup.py +++ b/profiler/setup.py @@ -1,5 +1,21 @@ #!/usr/bin/python # -*- coding: utf-8 -*- + +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import sys