diff --git a/profiler/__init__.py b/profiler/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index 58f1e4bc6ad779e1ced82ecf84e31b0bf17031aa..3491808524f7d3a4322176b0e6071c4463c5741a 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -14,20 +14,24 @@ # limitations under the License. import argparse + from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor from communication_group.communication_group_generator import CommunicationGroupGenerator from common_func.constant import Constant from common_func.file_manager import FileManager +from common_func.path_manager import PathManager from analysis.analysis_facade import AnalysisFacade class Interface: def __init__(self, args: argparse.Namespace): - self.collection_path = args.collection_path + self.collection_path = PathManager.get_realpath(args.collection_path) self.data_map = {} self.communication_group = {} def run(self): + PathManager.check_input_directory_path(self.collection_path) + PathManager.check_path_owner_consistent(self.collection_path) FileManager.create_output_dir(self.collection_path) data_map = PytorchDataPreprocessor(self.collection_path).get_data_map() if not data_map: diff --git a/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py b/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py index c10bd5142590dbcfbb171ca4f10c35f9096c772d..8df922ea19ba9f185a59544d7000dbd07161a8cb 100644 --- a/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py +++ b/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py @@ -26,7 +26,6 @@ class PytorchDataPreprocessor: self.path = os.path.realpath(path) def get_data_map(self) -> dict: - FileManager.check_file_or_directory_path(self.path, isdir=True) ascend_pt_dirs = [] for root, dirs, files in os.walk(self.path): for dir_name in dirs: diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py index 8f20f35fbf88043f75e66bdaecd47a7a4948d6aa..063332acc0942bfe60314cb6e646bb89ff3e00fe 100644 --- a/profiler/cluster_analyse/common_func/file_manager.py +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -16,59 +16,25 @@ import os import csv import json -import shutil + from common_func.constant import Constant +from common_func.path_manager import PathManager class FileManager: - @classmethod - def check_file_or_directory_path(cls, path, isdir=False): - """ - Function Description: - check whether the path is valid - Parameter: - path: the path to check - isdir: the path is dir or file - Exception Description: - when invalid data throw exception - """ - if not os.path.exists(path): - raise RuntimeError('{} is not exist.'.format(path)) - - if not os.access(path, os.R_OK): - raise RuntimeError( - 'The path {} does not have permission to read. Please check the path permission'.format(path)) - - if len(path) > Constant.MAX_PATH_LENGTH: - msg = f"The length of file path exceeded the maximum value {Constant.MAX_PATH_LENGTH}: {path}" - raise RuntimeError(msg) - - if os.path.islink(path): - msg = f"Invalid profiling path is soft link: {path}" - raise RuntimeError(msg) - - if isdir: - if not os.path.isdir(path): - raise RuntimeError('The path {} is not a directory.'.format(path)) - - if not os.access(path, os.W_OK): - raise RuntimeError('The path {} does not have permission to write. ' - 'Please check the path permission'.format(path)) - else: - if not os.path.isfile(path): - raise RuntimeError('{} is an invalid file or non-exist.'.format(path)) - @classmethod def read_csv_file(cls, file_path: str, class_bean: any) -> list: - cls.check_file_or_directory_path(file_path) + PathManager.check_path_readable(file_path) file_size = os.path.getsize(file_path) if file_size <= 0: return [] if file_size > Constant.MAX_CSV_SIZE: - print(f"The file size exceeds the preset value {Constant.MAX_CSV_SIZE / 1024 / 1024}MB, " - f"please check the file: {file_path}") - return [] + check_msg = input( + f"The file({file_path}) size exceeds the preset max value, do you continue reading the file? [y/n]") + if check_msg.lower() != "y": + print(f"[WARNING] The user choose not to read the file: {file_path}") + return [] result_data = [] try: with open(file_path, newline="") as csv_file: @@ -81,14 +47,16 @@ class FileManager: @classmethod def read_json_file(cls, file_path: str) -> dict: - cls.check_file_or_directory_path(file_path) + PathManager.check_path_readable(file_path) file_size = os.path.getsize(file_path) if file_size <= 0: return {} if file_size > Constant.MAX_JSON_SIZE: - print(f"The file size exceeds the preset value {Constant.MAX_JSON_SIZE / 1024 / 1024}MB, " - f"please check the file: {file_path}") - return {} + check_msg = input( + f"The file({file_path}) size exceeds the preset max value, do you continue reading the file? [y/n]") + if check_msg.lower() != "y": + print(f"[WARNING] The user choose not to read the file: {file_path}") + return [] try: with open(file_path, "r") as json_file: result_data = json.load(json_file) @@ -102,10 +70,10 @@ class FileManager: return output_path = os.path.join(profiler_path, Constant.CLUSTER_ANALYSIS_OUTPUT) output_file = os.path.join(output_path, file_name) - cls.check_file_or_directory_path(output_path, isdir=True) + PathManager.create_file_safety(output_file) + PathManager.check_path_writeable(output_file) try: - with os.fdopen(os.open(output_file, os.O_WRONLY | os.O_CREAT, Constant.FILE_AUTHORITY), "w", - newline="") as file: + with open(output_file, "w", newline="") as file: writer = csv.writer(file) if headers: writer.writerow(headers) @@ -119,9 +87,10 @@ class FileManager: return output_path = os.path.join(profiler_path, Constant.CLUSTER_ANALYSIS_OUTPUT) output_file = os.path.join(output_path, file_name) - cls.check_file_or_directory_path(output_path, isdir=True) + PathManager.create_file_safety(output_file) + PathManager.check_path_writeable(output_file) try: - with os.fdopen(os.open(output_file, os.O_WRONLY | os.O_CREAT, Constant.FILE_AUTHORITY), "w") as file: + with open(output_file, "w") as file: json.dump(data, file) except Exception: raise RuntimeError(f"Can't create the file: {output_file}") @@ -129,15 +98,5 @@ class FileManager: @classmethod def create_output_dir(cls, collection_path: str) -> None: output_path = os.path.join(collection_path, Constant.CLUSTER_ANALYSIS_OUTPUT) - if os.path.isdir(output_path): - try: - cls.check_file_or_directory_path(output_path, isdir=True) - shutil.rmtree(output_path) - os.makedirs(output_path, mode=Constant.DIR_AUTHORITY) - except Exception: - raise RuntimeError(f"Can't delete the directory: {output_path}") - return - try: - os.makedirs(output_path, mode=Constant.DIR_AUTHORITY) - except Exception: - raise RuntimeError(f"Can't create the directory: {output_path}") + PathManager.remove_path_safety(output_path) + PathManager.make_dir_safety(output_path) diff --git a/profiler/cluster_analyse/common_func/path_manager.py b/profiler/cluster_analyse/common_func/path_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..3a332d43a0fe16f0d9ef76cffa010cf95d316b7d --- /dev/null +++ b/profiler/cluster_analyse/common_func/path_manager.py @@ -0,0 +1,183 @@ +# Copyright (c) 2023 Huawei Technologies Co., Ltd +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import shutil +import platform + + +class PathManager: + MAX_PATH_LENGTH = 4096 + MAX_FILE_NAME_LENGTH = 255 + DATA_FILE_AUTHORITY = 0o640 + DATA_DIR_AUTHORITY = 0o750 + WINDOWS = "windows" + + @classmethod + def check_input_directory_path(cls, path: str): + """ + Function Description: + check whether the path is valid, some businesses can accept a path that does not exist, + so the function do not verify whether the path exists + Parameter: + path: the path to check, whether the incoming path is absolute or relative depends on the business + Exception Description: + when invalid data throw exception + """ + cls.input_path_common_check(path) + + if os.path.isfile(path): + msg = "Invalid input path which is a file path: {path}" + raise RuntimeError(msg) + + @classmethod + def check_input_file_path(cls, path: str): + """ + Function Description: + check whether the file path is valid, some businesses can accept a path that does not exist, + so the function do not verify whether the path exists + Parameter: + path: the file path to check, whether the incoming path is absolute or relative depends on the business + Exception Description: + when invalid data throw exception + """ + cls.input_path_common_check(path) + + if os.path.isdir(path): + msg = "Invalid input path which is a directory path: {path}" + raise RuntimeError(msg) + + @classmethod + def input_path_common_check(cls, path: str): + if len(path) > cls.MAX_PATH_LENGTH: + raise RuntimeError("Length of input path exceeds the limit.") + + if os.path.islink(path): + msg = f"Invalid input path which is a soft link: {path}" + raise RuntimeError(msg) + + if platform.system().lower() == cls.WINDOWS: + pattern = r'(\.|:|\\|/|_|-|\s|[~0-9a-zA-Z\u4e00-\u9fa5])+' + else: + pattern = r'(\.|/|_|-|\s|[~0-9a-zA-Z])+' + if not re.fullmatch(pattern, path): + msg = f"Invalid input path: {path}" + raise RuntimeError(msg) + + path_split_list = path.split("/") + for path in path_split_list: + path_list = path.split("\\") + for name in path_list: + if len(name) > cls.MAX_FILE_NAME_LENGTH: + raise RuntimeError("Length of input path exceeds the limit.") + + @classmethod + def check_path_owner_consistent(cls, path: str): + """ + Function Description: + check whether the path belong to process owner + Parameter: + path: the path to check + Exception Description: + when invalid path, prompt the user + """ + if not os.path.exists(path): + msg = f"The path does not exist: {path}" + raise RuntimeError(msg) + if platform.system().lower() == cls.WINDOWS: + return + if os.stat(path).st_uid != os.getuid(): + check_msg = input("The path does not belong to you, do you want to continue? [y/n]") + if check_msg.lower() != "y": + raise RuntimeError("The user choose not to continue.") + + @classmethod + def check_path_writeable(cls, path): + """ + Function Description: + check whether the path is writable + Parameter: + path: the path to check + Exception Description: + when invalid data throw exception + """ + cls.check_path_owner_consistent(path) + if os.path.islink(path): + msg = f"Invalid path which is a soft link: {path}" + raise RuntimeError(msg) + if not os.access(path, os.W_OK): + msg = f"The path permission check failed: {path}" + raise RuntimeError(msg) + + @classmethod + def check_path_readable(cls, path): + """ + Function Description: + check whether the path is writable + Parameter: + path: the path to check + Exception Description: + when invalid data throw exception + """ + cls.check_path_owner_consistent(path) + if os.path.islink(path): + msg = f"Invalid path which is a soft link: {path}" + raise RuntimeError(msg) + if not os.access(path, os.R_OK): + msg = f"The path permission check failed: {path}" + raise RuntimeError(msg) + + @classmethod + def remove_path_safety(cls, path: str): + msg = f"Failed to remove path: {path}" + if os.path.islink(path): + raise RuntimeError(msg) + if os.path.exists(path): + try: + shutil.rmtree(path) + except Exception as err: + raise RuntimeError(msg) from err + + @classmethod + def make_dir_safety(cls, path: str): + msg = f"Failed to make directory: {path}" + if os.path.islink(path): + raise RuntimeError(msg) + if os.path.exists(path): + return + try: + os.makedirs(path, mode=cls.DATA_DIR_AUTHORITY) + except Exception as err: + raise RuntimeError(msg) from err + + @classmethod + def create_file_safety(cls, path: str): + msg = f"Failed to create file: {path}" + if os.path.islink(path): + raise RuntimeError(msg) + if os.path.exists(path): + return + try: + os.close(os.open(path, os.O_WRONLY | os.O_CREAT, cls.DATA_FILE_AUTHORITY)) + except Exception as err: + raise RuntimeError(msg) from err + + @classmethod + def get_realpath(cls, path: str) -> str: + if os.path.islink(path): + msg = f"Invalid input path which is a soft link: {path}" + raise RuntimeError(msg) + return os.path.realpath(path) diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py index cf42641be10c99f42c6f4b5bc00066bda64d8ad5..2d98fce4be613ea5eedfd4ebb57cf19dc10d837d 100644 --- a/profiler/compare_tools/performance_compare.py +++ b/profiler/compare_tools/performance_compare.py @@ -5,6 +5,8 @@ import os.path import sys import time +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse", "common_func")) from generation.comparison_generator import ComparisonGenerator from utils.args_manager import ArgsManager from profiling_analysis.profiling_parse import prof_main @@ -17,7 +19,6 @@ def performance_compare(args): def main(): - sys.path.append(os.path.dirname(__file__)) parser = argparse.ArgumentParser(description="Compare trace of GPU and NPU") parser.add_argument("base_profiling_path", type=str, default='', help="基准性能数据的文件路径") parser.add_argument("comparison_profiling_path", type=str, default='', help="比较性能数据的文件路径") diff --git a/profiler/compare_tools/utils/args_manager.py b/profiler/compare_tools/utils/args_manager.py index 88c57b2f9e437a681243c9c33a7772cf6a4a6c23..48bcdd76997b4b25ad80dc4dcc7b438b0b7a8c56 100644 --- a/profiler/compare_tools/utils/args_manager.py +++ b/profiler/compare_tools/utils/args_manager.py @@ -1,5 +1,6 @@ import os.path +from path_manager import PathManager from utils.constant import Constant from utils.file_reader import FileReader from utils.profiling_parser import GPUProfilingParser, NPUProfilingParser @@ -45,38 +46,14 @@ class ArgsManager: @classmethod def check_profiling_path(cls, file_path: str): - if len(file_path) > Constant.MAX_PATH_LENGTH: - msg = f"The length of file path exceeded the maximum value {Constant.MAX_PATH_LENGTH}: {file_path}" - raise RuntimeError(msg) - if not os.path.exists(file_path): - msg = f"Invalid profiling path: {file_path}" - raise RuntimeError(msg) - if os.path.islink(file_path): - msg = f"Invalid profiling path is soft link: {file_path}" - raise RuntimeError(msg) - if not os.access(file_path, os.R_OK): - msg = f"The file path has no read permission: {file_path}" - raise RuntimeError(msg) + PathManager.input_path_common_check(file_path) + PathManager.check_path_owner_consistent(file_path) @classmethod def check_output_path(cls, output_path: str): - if len(output_path) > Constant.MAX_PATH_LENGTH: - msg = f"Invalid param, the length of output_path exceeded the maximum value {Constant.MAX_PATH_LENGTH}" - raise RuntimeError(msg) - if os.path.islink(output_path): - raise RuntimeError("Invalid param, the output_path is soft link") - if not os.path.exists(output_path): - try: - os.makedirs(output_path, mode=Constant.DIR_AUTHORITY) - except Exception: - msg = f"Can't create directory: {output_path}" - raise RuntimeError(msg) - if not os.path.isdir(output_path): - msg = f"Invalid output_path: {output_path}" - raise RuntimeError(msg) - if not os.access(output_path, os.W_OK): - msg = f"The output path has no write permission: {output_path}" - raise RuntimeError(msg) + PathManager.check_input_directory_path(output_path) + PathManager.make_dir_safety(output_path) + PathManager.check_path_writeable(output_path) def parse_profiling_path(self, file_path: str): self.check_profiling_path(file_path) @@ -118,17 +95,22 @@ class ArgsManager: self._args.enable_operator_compare = True self._args.enable_memory_compare = True self._args.enable_communication_compare = True - base_profiling_dict = self.parse_profiling_path(self._args.base_profiling_path) - comparison_profiling_dict = self.parse_profiling_path(self._args.comparison_profiling_path) + + base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path) + self.check_profiling_path(base_profiling_path) + base_profiling_dict = self.parse_profiling_path(base_profiling_path) + comparison_profiling_path = PathManager.get_realpath(self._args.comparison_profiling_path) + self.check_profiling_path(comparison_profiling_path) + comparison_profiling_dict = self.parse_profiling_path(comparison_profiling_path) if self._args.output_path: - self.check_output_path(self._args.output_path) + self.check_output_path(PathManager.get_realpath(self._args.output_path)) Constant.BASE_PROFILING = Constant.BASE_PROFILING + self._args.base_profiling_path self._base_profiling_type = base_profiling_dict.get(Constant.PROFILING_TYPE) self._base_profiling = self.PARSER_DICT.get(self._base_profiling_type)(self._args, base_profiling_dict) - if self._args.base_profiling_path == self._args.comparison_profiling_path: + if base_profiling_path == comparison_profiling_path: Constant.COMPARISON_PROFILING = "Same To Base Profiling" self._comparison_profiling_type = self._base_profiling_type self._comparison_profiling = self._base_profiling diff --git a/profiler/compare_tools/utils/file_reader.py b/profiler/compare_tools/utils/file_reader.py index 4658e0e7766cc69c575ae3f8a14a64220debb9c5..37853f41f4231217f8d8ece3b1dbc10fd7934b4e 100644 --- a/profiler/compare_tools/utils/file_reader.py +++ b/profiler/compare_tools/utils/file_reader.py @@ -2,6 +2,7 @@ import csv import json import os +from path_manager import PathManager from utils.constant import Constant @@ -9,6 +10,7 @@ class FileReader: @classmethod def read_trace_file(cls, file_path: str) -> any: + PathManager.check_path_readable(file_path) if not os.path.isfile(file_path): msg = f"File not exists: {file_path}" raise RuntimeError(msg) @@ -16,9 +18,11 @@ class FileReader: if file_size <= 0: return [] if file_size > Constant.MAX_FILE_SIZE: - print(f"[WARNING] The file size exceeds the preset value {Constant.MAX_FILE_SIZE / 1024 / 1024}MB, " - f"please check the file: {file_path}") - return [] + check_msg = input( + f"The file({file_path}) size exceeds the preset max value, do you continue reading the file? [y/n]") + if check_msg.lower() != "y": + print(f"[WARNING] The user choose not to read the file: {file_path}") + return [] try: with open(file_path, "rt") as file: json_data = json.loads(file.read()) @@ -29,15 +33,18 @@ class FileReader: @classmethod def read_csv_file(cls, file_path: str) -> any: + PathManager.check_path_readable(file_path) if not os.path.isfile(file_path): return [] file_size = os.path.getsize(file_path) if file_size <= 0: return [] if file_size > Constant.MAX_FILE_SIZE: - print(f"[WARNING] The file size exceeds the preset value {Constant.MAX_FILE_SIZE / 1024 / 1024}MB, " - f"please check the file: {file_path}") - return [] + check_msg = input( + f"The file({file_path}) size exceeds the preset max value, do you continue reading the file? [y/n]") + if check_msg.lower() != "y": + print(f"[WARNING] The user choose not to read the file: {file_path}") + return [] result_data = [] try: with open(file_path, newline="") as csv_file: