From 61110a58dddb566108262f6a53e94eac98c60893 Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Thu, 19 Oct 2023 09:53:25 +0800 Subject: [PATCH] input_check_for_security --- profiler/__init__.py | 0 profiler/cluster_analyse/cluster_analysis.py | 5 + .../common_func/path_manager.py | 183 ++++++++++++++++++ profiler/compare_tools/performance_compare.py | 4 +- profiler/compare_tools/utils/args_manager.py | 48 ++--- profiler/compare_tools/utils/constant.py | 2 +- profiler/compare_tools/utils/file_reader.py | 3 + 7 files changed, 210 insertions(+), 35 deletions(-) create mode 100644 profiler/__init__.py create mode 100644 profiler/cluster_analyse/common_func/path_manager.py diff --git a/profiler/__init__.py b/profiler/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index 58f1e4bc6ad..53e88ff1476 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -14,10 +14,12 @@ # limitations under the License. import argparse + from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor from communication_group.communication_group_generator import CommunicationGroupGenerator from common_func.constant import Constant from common_func.file_manager import FileManager +from common_func.path_manager import PathManager from analysis.analysis_facade import AnalysisFacade @@ -56,4 +58,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="cluster analysis module") parser.add_argument('-d', '--collection_path', type=str, required=True, help="profiling data path") args = parser.parse_args() + collection_path = PathManager.get_realpath(args.collection_path) + PathManager.check_input_directory_path(collection_path) + PathManager.check_path_owner_consistent(collection_path) Interface(args).run() diff --git a/profiler/cluster_analyse/common_func/path_manager.py b/profiler/cluster_analyse/common_func/path_manager.py new file mode 100644 index 00000000000..7f371bdbb30 --- /dev/null +++ b/profiler/cluster_analyse/common_func/path_manager.py @@ -0,0 +1,183 @@ +# Copyright (c) 2023 Huawei Technologies Co., Ltd +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import shutil +import platform + + +class PathManager: + MAX_PATH_LENGTH = 4096 + MAX_FILE_NAME_LENGTH = 255 + DATA_FILE_AUTHORITY = 0o640 + DATA_DIR_AUTHORITY = 0o750 + WINDOWS = "windows" + + @classmethod + def check_input_directory_path(cls, path: str): + """ + Function Description: + check whether the path is valid, some businesses can accept a path that does not exist, + so the function do not verify whether the path exists + Parameter: + path: the path to check, whether the incoming path is absolute or relative depends on the business + Exception Description: + when invalid data throw exception + """ + cls.input_path_common_check(path) + + if os.path.isfile(path): + msg = "Invalid input path is a file path: {path}" + raise RuntimeError(msg) + + @classmethod + def check_input_file_path(cls, path: str): + """ + Function Description: + check whether the file path is valid, some businesses can accept a path that does not exist, + so the function do not verify whether the path exists + Parameter: + path: the file path to check, whether the incoming path is absolute or relative depends on the business + Exception Description: + when invalid data throw exception + """ + cls.input_path_common_check(path) + + if os.path.isdir(path): + msg = "Invalid input path is a directory path: {path}" + raise RuntimeError(msg) + + @classmethod + def input_path_common_check(cls, path: str): + if len(path) > cls.MAX_PATH_LENGTH: + raise RuntimeError("Length of input path exceeds the limit.") + + if os.path.islink(path): + msg = f"Invalid input path is a soft chain: {path}" + raise RuntimeError(msg) + + if platform.system().lower() == cls.WINDOWS: + pattern = r'(\.|:|\\|/|_|-|\s|[~0-9a-zA-Z\u4e00-\u9fa5])+' + else: + pattern = r'(\.|/|_|-|\s|[~0-9a-zA-Z])+' + if not re.fullmatch(pattern, path): + msg = f"Invalid input path: {path}" + raise RuntimeError(msg) + + path_split_list = path.split("/") + for path in path_split_list: + path_list = path.split("\\") + for name in path_list: + if len(name) > cls.MAX_FILE_NAME_LENGTH: + raise RuntimeError("Length of input path exceeds the limit.") + + @classmethod + def check_path_owner_consistent(cls, path: str): + """ + Function Description: + check whether the path belong to process owner + Parameter: + path: the path to check + Exception Description: + when invalid path, prompt the user + """ + if not os.path.exists(path): + msg = f"The path does not exist: {path}" + raise RuntimeError(msg) + if platform.system().lower() == cls.WINDOWS: + return + if os.stat(path).st_uid != os.getuid(): + check_msg = input("The path does not belong to you, do you want to continue? [y/n]") + if check_msg.lower() != "y": + raise RuntimeError("The user chose not to continue.") + + @classmethod + def check_path_writeable(cls, path): + """ + Function Description: + check whether the path is writable + Parameter: + path: the path to check + Exception Description: + when invalid data throw exception + """ + cls.check_path_owner_consistent(path) + if os.path.islink(path): + msg = f"Invalid path is a soft chain: {path}" + raise RuntimeError(msg) + if not os.access(path, os.W_OK): + msg = f"The path permission check failed: {path}" + raise RuntimeError(msg) + + @classmethod + def check_path_readable(cls, path): + """ + Function Description: + check whether the path is writable + Parameter: + path: the path to check + Exception Description: + when invalid data throw exception + """ + cls.check_path_owner_consistent(path) + if os.path.islink(path): + msg = f"Invalid path is a soft chain: {path}" + raise RuntimeError(msg) + if not os.access(path, os.R_OK): + msg = f"The path permission check failed: {path}" + raise RuntimeError(msg) + + @classmethod + def remove_path_safety(cls, path: str): + msg = f"Failed to remove path: {path}" + if os.path.islink(path): + raise RuntimeError(msg) + if os.path.exists(path): + try: + shutil.rmtree(path) + except Exception as err: + raise RuntimeError(msg) from err + + @classmethod + def make_dir_safety(cls, path: str): + msg = f"Failed to make directory: {path}" + if os.path.islink(path): + raise RuntimeError(msg) + if os.path.exists(path): + return + try: + os.makedirs(path, mode=cls.DATA_DIR_AUTHORITY) + except Exception as err: + raise RuntimeError(msg) from err + + @classmethod + def create_file_safety(cls, path: str): + msg = f"Failed to create file: {path}" + if os.path.islink(path): + raise RuntimeError(msg) + if os.path.exists(path): + return + try: + os.close(os.open(path, os.O_WRONLY | os.O_CREAT, cls.DATA_FILE_AUTHORITY)) + except Exception as err: + raise RuntimeError(msg) from err + + @classmethod + def get_realpath(cls, path: str) -> str: + if os.path.islink(path): + msg = f"Invalid input path is a soft chain: {path}" + raise RuntimeError(msg) + return os.path.realpath(path) diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py index cf42641be10..1cb2b1913c0 100644 --- a/profiler/compare_tools/performance_compare.py +++ b/profiler/compare_tools/performance_compare.py @@ -5,6 +5,8 @@ import os.path import sys import time +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse", "common_func")) from generation.comparison_generator import ComparisonGenerator from utils.args_manager import ArgsManager from profiling_analysis.profiling_parse import prof_main @@ -17,7 +19,7 @@ def performance_compare(args): def main(): - sys.path.append(os.path.dirname(__file__)) + sys.path.append(os.path.dirname(os.path.abspath(__file__))) parser = argparse.ArgumentParser(description="Compare trace of GPU and NPU") parser.add_argument("base_profiling_path", type=str, default='', help="基准性能数据的文件路径") parser.add_argument("comparison_profiling_path", type=str, default='', help="比较性能数据的文件路径") diff --git a/profiler/compare_tools/utils/args_manager.py b/profiler/compare_tools/utils/args_manager.py index 88c57b2f9e4..48bcdd76997 100644 --- a/profiler/compare_tools/utils/args_manager.py +++ b/profiler/compare_tools/utils/args_manager.py @@ -1,5 +1,6 @@ import os.path +from path_manager import PathManager from utils.constant import Constant from utils.file_reader import FileReader from utils.profiling_parser import GPUProfilingParser, NPUProfilingParser @@ -45,38 +46,14 @@ class ArgsManager: @classmethod def check_profiling_path(cls, file_path: str): - if len(file_path) > Constant.MAX_PATH_LENGTH: - msg = f"The length of file path exceeded the maximum value {Constant.MAX_PATH_LENGTH}: {file_path}" - raise RuntimeError(msg) - if not os.path.exists(file_path): - msg = f"Invalid profiling path: {file_path}" - raise RuntimeError(msg) - if os.path.islink(file_path): - msg = f"Invalid profiling path is soft link: {file_path}" - raise RuntimeError(msg) - if not os.access(file_path, os.R_OK): - msg = f"The file path has no read permission: {file_path}" - raise RuntimeError(msg) + PathManager.input_path_common_check(file_path) + PathManager.check_path_owner_consistent(file_path) @classmethod def check_output_path(cls, output_path: str): - if len(output_path) > Constant.MAX_PATH_LENGTH: - msg = f"Invalid param, the length of output_path exceeded the maximum value {Constant.MAX_PATH_LENGTH}" - raise RuntimeError(msg) - if os.path.islink(output_path): - raise RuntimeError("Invalid param, the output_path is soft link") - if not os.path.exists(output_path): - try: - os.makedirs(output_path, mode=Constant.DIR_AUTHORITY) - except Exception: - msg = f"Can't create directory: {output_path}" - raise RuntimeError(msg) - if not os.path.isdir(output_path): - msg = f"Invalid output_path: {output_path}" - raise RuntimeError(msg) - if not os.access(output_path, os.W_OK): - msg = f"The output path has no write permission: {output_path}" - raise RuntimeError(msg) + PathManager.check_input_directory_path(output_path) + PathManager.make_dir_safety(output_path) + PathManager.check_path_writeable(output_path) def parse_profiling_path(self, file_path: str): self.check_profiling_path(file_path) @@ -118,17 +95,22 @@ class ArgsManager: self._args.enable_operator_compare = True self._args.enable_memory_compare = True self._args.enable_communication_compare = True - base_profiling_dict = self.parse_profiling_path(self._args.base_profiling_path) - comparison_profiling_dict = self.parse_profiling_path(self._args.comparison_profiling_path) + + base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path) + self.check_profiling_path(base_profiling_path) + base_profiling_dict = self.parse_profiling_path(base_profiling_path) + comparison_profiling_path = PathManager.get_realpath(self._args.comparison_profiling_path) + self.check_profiling_path(comparison_profiling_path) + comparison_profiling_dict = self.parse_profiling_path(comparison_profiling_path) if self._args.output_path: - self.check_output_path(self._args.output_path) + self.check_output_path(PathManager.get_realpath(self._args.output_path)) Constant.BASE_PROFILING = Constant.BASE_PROFILING + self._args.base_profiling_path self._base_profiling_type = base_profiling_dict.get(Constant.PROFILING_TYPE) self._base_profiling = self.PARSER_DICT.get(self._base_profiling_type)(self._args, base_profiling_dict) - if self._args.base_profiling_path == self._args.comparison_profiling_path: + if base_profiling_path == comparison_profiling_path: Constant.COMPARISON_PROFILING = "Same To Base Profiling" self._comparison_profiling_type = self._base_profiling_type self._comparison_profiling = self._base_profiling diff --git a/profiler/compare_tools/utils/constant.py b/profiler/compare_tools/utils/constant.py index 360c2ab44ae..c702a851e3c 100644 --- a/profiler/compare_tools/utils/constant.py +++ b/profiler/compare_tools/utils/constant.py @@ -8,7 +8,7 @@ class Constant(object): LIMIT_KERNEL = 3 MAX_PATH_LENGTH = 4096 MAX_FLOW_CAT_LEN = 20 - MAX_FILE_SIZE = 1024 * 1024 * 1024 * 5 + MAX_FILE_SIZE = 1024 * 1024 * 1024 * 10 BYTE_TO_KB = 1024 YELLOW_COLOR = "FFFF00" GREEN_COLOR = "0000FF00" diff --git a/profiler/compare_tools/utils/file_reader.py b/profiler/compare_tools/utils/file_reader.py index 4658e0e7766..31eb02dd827 100644 --- a/profiler/compare_tools/utils/file_reader.py +++ b/profiler/compare_tools/utils/file_reader.py @@ -2,6 +2,7 @@ import csv import json import os +from path_manager import PathManager from utils.constant import Constant @@ -9,6 +10,7 @@ class FileReader: @classmethod def read_trace_file(cls, file_path: str) -> any: + PathManager.check_path_readable(file_path) if not os.path.isfile(file_path): msg = f"File not exists: {file_path}" raise RuntimeError(msg) @@ -29,6 +31,7 @@ class FileReader: @classmethod def read_csv_file(cls, file_path: str) -> any: + PathManager.check_path_readable(file_path) if not os.path.isfile(file_path): return [] file_size = os.path.getsize(file_path) -- Gitee