From ce429e3a47734ed8501f14d251c5279c88fef292 Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Wed, 19 Feb 2025 11:54:30 +0800 Subject: [PATCH 01/15] =?UTF-8?q?=E3=80=90=E9=9C=80=E6=B1=82=E3=80=91?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F=E5=92=8C?= =?UTF-8?q?=E4=B8=89=E6=96=B9=E5=BA=93=E6=A3=80=E6=B5=8B=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=EF=BC=8C=E9=A2=84=E7=95=99=E7=9B=AE=E6=A0=87=E7=8E=AF=E5=A2=83?= =?UTF-8?q?=E5=8F=98=E9=87=8F=E5=92=8C=E4=B8=89=E6=96=B9=E5=BA=93=E7=9B=AE?= =?UTF-8?q?=E6=A0=87=E5=88=97=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../env_check/environment_variable_check.py | 47 +++++++++++++++- .../env_check/python_library_check.py | 56 ++++++++++++++++++- .../target_config/target_env_variables.yaml | 48 ++++++++++++++++ .../target_config/target_requirements.txt | 52 +++++++++++++++++ 4 files changed, 199 insertions(+), 4 deletions(-) create mode 100644 profiler/msprof_analyze/precheck/env_check/target_config/target_env_variables.yaml create mode 100644 profiler/msprof_analyze/precheck/env_check/target_config/target_requirements.txt diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index 549eb9ddf0..8a7670bce0 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -12,7 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from msprof_analyze.precheck.env_check.environment_check import SoftwareCheck +import os +import yaml + +from environment_check import SoftwareCheck class EnvironmentVariableCheck(SoftwareCheck): @@ -22,4 +25,44 @@ class EnvironmentVariableCheck(SoftwareCheck): super().__init__(args) def check(self): - pass + target_variables = self.__get_target_variables() + env_variables = self.__get_env_variables() + missing_or_mismatched = self.__check_env_variables( + target_variables, env_variables + ) + self.__print_result(missing_or_mismatched) + + def __get_target_variables(self): + variables_file_path = "./target_config/target_env_variables.yaml" + with open(variables_file_path, "r") as file: + res = yaml.safe_load(file) + return res + + def __get_env_variables(self): + res = {key: os.getenv(key) for key in os.environ} + return res + + def __check_env_variables(self, target_variables, env_variables): + missing_or_mismatched = [] + + for key, value in target_variables.items(): + if key not in env_variables: + missing_or_mismatched.append(f"{key} (missing, expected '{value}', or check if it's default value.)") + elif env_variables[key] != value: + missing_or_mismatched.append( + f"{key} (value mismatch, expected '{value}', got '{env_variables[key]}.')") + + return missing_or_mismatched + + def __print_result(self, missing_or_mismatched): + if missing_or_mismatched: + print("Missing or different environment variables:") + for item in missing_or_mismatched: + print(item) + else: + print("All required environment variables are present and have the correct values.") + + +if __name__ == "__main__": + checker = EnvironmentVariableCheck() + checker.check() diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index a23b674ac0..2d76ff4485 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -12,7 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from msprof_analyze.precheck.env_check.environment_check import SoftwareCheck +import pkg_resources +import importlib.metadata + +from environment_check import SoftwareCheck class PythonLibraryCheck(SoftwareCheck): @@ -22,4 +25,53 @@ class PythonLibraryCheck(SoftwareCheck): super().__init__(args) def check(self): - pass + target_libraries = self.__get_target_libraries() + env_libraries = self.__get_env_libraries() + missing_or_different = self.__check_env_libraries(target_libraries, env_libraries) + self.__print_result(missing_or_different) + + def __get_target_libraries(self): + requirements_file_path = './target_config/target_requirements.txt' + target_libs = {} + with open(requirements_file_path, 'r') as file: + for line in file: + line = line.strip() + if line and not line.startswith('#'): + try: + lib = pkg_resources.Requirement.parse(line) + target_libs[lib.project_name] = lib.specifier + except ValueError: + print(f"Skipping invalid line: {line}") + return target_libs + + def __get_env_libraries(self): + env_libs = {} + + for package in importlib.metadata.distributions(): + env_libs[package.metadata['Name']] = package.version + return env_libs + + + def __check_env_libraries(self, target_libraries, env_libraries): + missing_or_mismatched = [] + + for lib_name, specifier in target_libraries.items(): + if lib_name in env_libraries: + installed_version = env_libraries[lib_name] + if specifier and not specifier.contains(installed_version): + missing_or_mismatched.append(f"{lib_name} (version mismatch, expected {specifier}, got {installed_version}.)") + else: + missing_or_mismatched.append(f"{lib_name} (missing, expected '{specifier}'.)") + return missing_or_mismatched + + def __print_result(self, missing_or_mismatched): + if missing_or_mismatched: + print("Missing or version-mismatched Python libraries:") + for item in missing_or_mismatched: + print(item) + else: + print("All required Python libraries are present and have the correct versions.") + +if __name__ == '__main__': + python_library_check = PythonLibraryCheck() + python_library_check.check() diff --git a/profiler/msprof_analyze/precheck/env_check/target_config/target_env_variables.yaml b/profiler/msprof_analyze/precheck/env_check/target_config/target_env_variables.yaml new file mode 100644 index 0000000000..e0906447d1 --- /dev/null +++ b/profiler/msprof_analyze/precheck/env_check/target_config/target_env_variables.yaml @@ -0,0 +1,48 @@ +# 可根据场景修改或添加环境变量目标值 + +# CANN +TE_PARALLEL_COMPILER: 8 +# IGNORE_INFER_ERROR: +AUTO_USE_UC_MEMORY: 1 +ACLNN_CACHE_LIMIT: 10000 +ENABLE_DYNAMIC_SHAPE_MULTI_STREAM: 0 +# MAX_RUNTIME_CORE_NUMBER: +HCCL_CONNECT_TIMEOUT: 120 +HCCL_EXEC_TIMEOUT: 1836 +HCCL_ALGO: "level0:NA;level1:H-D_R" +HCCL_BUFFSIZE: 200 +HCCL_INTRA_PCIE_ENABLE: 1 +HCCL_INTRA_ROCE_ENABLE: 0 +HCCL_RDMA_TC: 132 +HCCL_RDMA_SL: 4 +HCCL_RDMA_TIMEOUT: 20 +HCCL_RDMA_RETRY_CNT: 7 +HCCL_RDMA_PCIE_DIRECT_POST_NOSTRICT: "TRUE" +HCCL_RDMA_QPS_PER_CONNECTION: 1 +HCCL_MULTI_QP_THRESHOLD: 512 +HCCL_OP_EXPANSION_MODE: "HOST" +HCCL_DETERMINISTIC: "false" + +# torch_npu +INF_NAN_MODE_ENABLE: 1 +COMBINED_ENABLE: 0 +ASCEND_LAUNCH_BLOCKING: 0 +# ACL_OP_COMPILER_CACHE_DIR: +ACL_OP_COMPILER_CACHE_MODE: "enable" +# PYTORCH_NPU_ALLOC_CONF: +HCCL_ASYNC_ERROR_HANDLING: 1 +HCCL_DESYNC_DEBUG: 0 +HCCL_EVENT_TIMEOUT: 0 +P2P_HCCL_BUFFSIZE: 20 +PYTORCH_NO_NPU_MEMORY_CACHING: +OOM_SNAPSHOT_ENABLE: 0 +# OOM_SNAPSHOT_PATH: +# RANK_TABLE_FILE: +# TORCH_NPU_DISABLED_WARNING: +TASK_QUEUE_ENABLE: 1 +# ACL_DEVICE_SYNC_TIMEOUT: +MULTI_STREAM_MEMORY_REUSE: 1 +NPU_ASD_ENABLE: 0 +NPU_ASD_UPPER_THRESH: "1000000,10000" +NPU_ASD_SIGMA_THRESH: "100000,5000" +INF_NAN_MODE_FORCE_DISABLE: 0 \ No newline at end of file diff --git a/profiler/msprof_analyze/precheck/env_check/target_config/target_requirements.txt b/profiler/msprof_analyze/precheck/env_check/target_config/target_requirements.txt new file mode 100644 index 0000000000..fa8b6a6af3 --- /dev/null +++ b/profiler/msprof_analyze/precheck/env_check/target_config/target_requirements.txt @@ -0,0 +1,52 @@ +# 可根据场景修改或添加三方库目标值 + +# basic +torch==2.1.0 +torch-npu==2.1.0.post8 +torchvision==0.16.0 +apex==0.1+ascend + +# mindspeed-llm +numpy>=1.19.2,<2.0.0 +transformers==4.43.2 +transformers_stream_generator +sympy +decorator +scipy +sentencepiece +einops +datasets>=2.16.0 +pybind11 +accelerate +six +protobuf +peft==0.7.1 +tiktoken +ray==2.10.0 +tensordict==0.1.2 +hydra-core==1.3.2 +codetiming +bitsandbytes-npu-beta==0.45.2 + +# mindspeed +pybind11 +ninja +wheel +numpy +six +regex +decorator +attrs +psutil +pyyaml +protobuf +einops +scipy +sentencepiece +pytest +tokenizers<=0.20.3 +transformers>=4.43.2 +gpytorch +pandas +scikit-learn +SQLAlchemy \ No newline at end of file -- Gitee From e057b702fb0ac8bfa16b6c342ab1692ccb56b7f8 Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Tue, 25 Feb 2025 18:01:45 +0800 Subject: [PATCH 02/15] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../precheck/env_check/constant.py | 22 ++++++ .../env_check/environment_variable_check.py | 39 +++++++--- .../env_check/python_library_check.py | 30 ++++++-- .../precheck/env_check/utils.py | 76 +++++++++++++++++++ 4 files changed, 147 insertions(+), 20 deletions(-) create mode 100644 profiler/msprof_analyze/precheck/env_check/constant.py create mode 100644 profiler/msprof_analyze/precheck/env_check/utils.py diff --git a/profiler/msprof_analyze/precheck/env_check/constant.py b/profiler/msprof_analyze/precheck/env_check/constant.py new file mode 100644 index 0000000000..08c0367a2d --- /dev/null +++ b/profiler/msprof_analyze/precheck/env_check/constant.py @@ -0,0 +1,22 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class Constant(object): + # Input file size limit 64 KB + MAX_READ_FILE_BYTES = 64 * 1024 + + # Target config file path + TARGET_VARIABLE_FILE_PATH = "./target_config/target_env_variables.yaml" + TARGET_LIBRARY_FILE_PATH = './target_config/target_requirements.txt' diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index 8a7670bce0..b201fd5670 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -14,8 +14,11 @@ # limitations under the License. import os import yaml +import logging from environment_check import SoftwareCheck +from constant import Constant +from utils import SafeOpen, create_file class EnvironmentVariableCheck(SoftwareCheck): @@ -23,20 +26,31 @@ class EnvironmentVariableCheck(SoftwareCheck): def __init__(self, args): super().__init__(args) + self.output_path = args.get("output", "./") + self.rank = args.get("rank", 0) + log_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.rank}.log") + create_file(log_file_path) + file_handler = logging.FileHandler(log_file_path) + self.logger = logging.getLogger() + self.logger.addHandler(file_handler) + def check(self): target_variables = self.__get_target_variables() env_variables = self.__get_env_variables() - missing_or_mismatched = self.__check_env_variables( - target_variables, env_variables - ) + missing_or_mismatched = self.__check_env_variables(target_variables, env_variables) self.__print_result(missing_or_mismatched) def __get_target_variables(self): - variables_file_path = "./target_config/target_env_variables.yaml" - with open(variables_file_path, "r") as file: - res = yaml.safe_load(file) - return res + variables_file_path = os.path.abspath(Constant.TARGET_VARIABLE_FILE_PATH) + with SafeOpen(variables_file_path, "r") as file: + if not file: + return {} + try: + res = yaml.safe_load(file) + except yaml.YAMLError as e: + self.logger.error(f"Error when loading target variables: {e}") + return res if res is not None else {} def __get_env_variables(self): res = {key: os.getenv(key) for key in os.environ} @@ -47,20 +61,21 @@ class EnvironmentVariableCheck(SoftwareCheck): for key, value in target_variables.items(): if key not in env_variables: - missing_or_mismatched.append(f"{key} (missing, expected '{value}', or check if it's default value.)") + missing_or_mismatched.append(f"Environment variables '{key}' missing, expected '{value}'," + f"or check if it's default value.") elif env_variables[key] != value: missing_or_mismatched.append( - f"{key} (value mismatch, expected '{value}', got '{env_variables[key]}.')") + f"Environment variables '{key}' value mismatch, expected value: '{value}', got value: '{env_variables[key]}'.") return missing_or_mismatched def __print_result(self, missing_or_mismatched): if missing_or_mismatched: - print("Missing or different environment variables:") + self.logger.warning("Missing or different environment variables:") for item in missing_or_mismatched: - print(item) + self.logger.warning(item) else: - print("All required environment variables are present and have the correct values.") + self.logger.info("All required environment variables are present and have the correct values.") if __name__ == "__main__": diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index 2d76ff4485..e71909992e 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -12,10 +12,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging +import os import pkg_resources import importlib.metadata from environment_check import SoftwareCheck +from constant import Constant +from utils import SafeOpen, create_file class PythonLibraryCheck(SoftwareCheck): @@ -23,6 +27,14 @@ class PythonLibraryCheck(SoftwareCheck): def __init__(self, args): super().__init__(args) + self.output_path = args.get("output", "./") + self.rank = args.get("rank", 0) + log_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.rank}.log") + + create_file(log_file_path) + file_handler = logging.FileHandler(log_file_path) + self.logger = logging.getLogger() + self.logger.addHandler(file_handler) def check(self): target_libraries = self.__get_target_libraries() @@ -31,9 +43,11 @@ class PythonLibraryCheck(SoftwareCheck): self.__print_result(missing_or_different) def __get_target_libraries(self): - requirements_file_path = './target_config/target_requirements.txt' + requirements_file_path = os.path.abspath(Constant.TARGET_LIBRARY_FILE_PATH) target_libs = {} - with open(requirements_file_path, 'r') as file: + with SafeOpen(requirements_file_path, 'r') as file: + if not file: + return {} for line in file: line = line.strip() if line and not line.startswith('#'): @@ -41,7 +55,7 @@ class PythonLibraryCheck(SoftwareCheck): lib = pkg_resources.Requirement.parse(line) target_libs[lib.project_name] = lib.specifier except ValueError: - print(f"Skipping invalid line: {line}") + self.logger.warning(f"Skipping invalid line: {line}") return target_libs def __get_env_libraries(self): @@ -59,18 +73,18 @@ class PythonLibraryCheck(SoftwareCheck): if lib_name in env_libraries: installed_version = env_libraries[lib_name] if specifier and not specifier.contains(installed_version): - missing_or_mismatched.append(f"{lib_name} (version mismatch, expected {specifier}, got {installed_version}.)") + missing_or_mismatched.append(f"Third-party libraries '{lib_name}' version mismatch, expected '{specifier}', got '{installed_version}'.") else: - missing_or_mismatched.append(f"{lib_name} (missing, expected '{specifier}'.)") + missing_or_mismatched.append(f"Third-party libraries '{lib_name}' missing, expected version: '{specifier}'.") return missing_or_mismatched def __print_result(self, missing_or_mismatched): if missing_or_mismatched: - print("Missing or version-mismatched Python libraries:") + self.logger.warning("Missing or version-mismatched Python libraries:") for item in missing_or_mismatched: - print(item) + self.logger.warning(item) else: - print("All required Python libraries are present and have the correct versions.") + self.logger.info("All required Python libraries are present and have the correct versions.") if __name__ == '__main__': python_library_check = PythonLibraryCheck() diff --git a/profiler/msprof_analyze/precheck/env_check/utils.py b/profiler/msprof_analyze/precheck/env_check/utils.py new file mode 100644 index 0000000000..5407da7a8b --- /dev/null +++ b/profiler/msprof_analyze/precheck/env_check/utils.py @@ -0,0 +1,76 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import logging + +from constant import Constant + +logger = logging.getLogger() + + +def check_path_valid(path: str, max_size: int = Constant.MAX_READ_FILE_BYTES) -> bool: + """ + 检查给定的文件路径是否有效。 + """ + if not path: + logger.error("The path is empty. Please enter a valid path.") + return False + if not os.path.isfile(path): + logger.error(f"The path \"{path}\" is not a file. Please check the path.") + return False + if os.path.islink(path): + logger.error(f"The path \"{path}\" is link. Please check the path.") + return False + if os.path.getsize(path) > max_size: + logger.error(f"The path \"{path}\" is too large to read. Please check the path.") + return False + if not os.access(path, os.R_OK): + logger.error(f"The path \"{path}\" does not have permission to read. ") + return False + return True + +def create_file(file_path): + """ + 根据提供的路径创建一个文件。如果目录不存在,则先创建目录。 + """ + directory = os.path.dirname(file_path) + if not os.path.exists(directory): + os.makedirs(directory) + try: + with open(file_path, 'w') as file: + pass + except Exception as e: + logger.error(f"Failed to create file: {e}") + +class SafeOpen: + """ + open的安全上下文管理器 + """ + def __init__(self: any, file_path: str, mode: str = "r", max_size: int = Constant.MAX_READ_FILE_BYTES) -> None: + self.file_path = file_path + self.mode = mode + self.max_size = max_size + self.file_reader = None + + def __enter__(self: any) -> any: + if not check_path_valid(self.file_path, max_size=self.max_size): + return None + self.file_reader = open(self.file_path, self.mode, encoding='utf-8') + return self.file_reader + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.file_reader: + self.file_reader.close() + -- Gitee From 728c42d985a9deb4deda0a2e41f834e19bc51837 Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Wed, 26 Feb 2025 14:28:30 +0800 Subject: [PATCH 03/15] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=EF=BC=9A=E6=96=B0=E5=A2=9E=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E7=B1=BB=EF=BC=8C=E4=BF=AE=E6=94=B9=E5=8F=98=E9=87=8F=E6=A0=A1?= =?UTF-8?q?=E9=AA=8C=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../precheck/env_check/constant.py | 5 ++ .../env_check/environment_variable_check.py | 30 ++++----- .../precheck/env_check/{utils.py => file.py} | 54 +++++++++++---- .../env_check/python_library_check.py | 4 +- .../target_config/target_env_variables.yaml | 67 ++++++++++--------- 5 files changed, 96 insertions(+), 64 deletions(-) rename profiler/msprof_analyze/precheck/env_check/{utils.py => file.py} (53%) diff --git a/profiler/msprof_analyze/precheck/env_check/constant.py b/profiler/msprof_analyze/precheck/env_check/constant.py index 08c0367a2d..0d89d18455 100644 --- a/profiler/msprof_analyze/precheck/env_check/constant.py +++ b/profiler/msprof_analyze/precheck/env_check/constant.py @@ -16,6 +16,11 @@ class Constant(object): # Input file size limit 64 KB MAX_READ_FILE_BYTES = 64 * 1024 + MAX_WRITE_FILE_BYTES = 64 * 1024 + + # Athority of directory and file + DIR_AUTHORITY = 0o750 + File_AUTHORITY = 0o640 # Target config file path TARGET_VARIABLE_FILE_PATH = "./target_config/target_env_variables.yaml" diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index b201fd5670..41ad49c4d5 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -18,7 +18,7 @@ import logging from environment_check import SoftwareCheck from constant import Constant -from utils import SafeOpen, create_file +from file import FileReader, create_file class EnvironmentVariableCheck(SoftwareCheck): @@ -38,12 +38,12 @@ class EnvironmentVariableCheck(SoftwareCheck): def check(self): target_variables = self.__get_target_variables() env_variables = self.__get_env_variables() - missing_or_mismatched = self.__check_env_variables(target_variables, env_variables) - self.__print_result(missing_or_mismatched) + mismatched = self.__check_env_variables(target_variables, env_variables) + self.__print_result(mismatched) def __get_target_variables(self): variables_file_path = os.path.abspath(Constant.TARGET_VARIABLE_FILE_PATH) - with SafeOpen(variables_file_path, "r") as file: + with FileReader(variables_file_path) as file: if not file: return {} try: @@ -57,22 +57,20 @@ class EnvironmentVariableCheck(SoftwareCheck): return res def __check_env_variables(self, target_variables, env_variables): - missing_or_mismatched = [] + mismatched = [] - for key, value in target_variables.items(): - if key not in env_variables: - missing_or_mismatched.append(f"Environment variables '{key}' missing, expected '{value}'," - f"or check if it's default value.") - elif env_variables[key] != value: - missing_or_mismatched.append( - f"Environment variables '{key}' value mismatch, expected value: '{value}', got value: '{env_variables[key]}'.") + for target_key, target_value in target_variables.items(): + env_value = env_variables.get(target_key, "") + if env_value != target_value: + mismatched.append(f"Environment variables '{target_key}' value mismatch, " + f"expected value: '{target_value}', got value: '{env_value}'.") - return missing_or_mismatched + return mismatched - def __print_result(self, missing_or_mismatched): - if missing_or_mismatched: + def __print_result(self, mismatched): + if mismatched: self.logger.warning("Missing or different environment variables:") - for item in missing_or_mismatched: + for item in mismatched: self.logger.warning(item) else: self.logger.info("All required environment variables are present and have the correct values.") diff --git a/profiler/msprof_analyze/precheck/env_check/utils.py b/profiler/msprof_analyze/precheck/env_check/file.py similarity index 53% rename from profiler/msprof_analyze/precheck/env_check/utils.py rename to profiler/msprof_analyze/precheck/env_check/file.py index 5407da7a8b..6a601904e2 100644 --- a/profiler/msprof_analyze/precheck/env_check/utils.py +++ b/profiler/msprof_analyze/precheck/env_check/file.py @@ -36,41 +36,67 @@ def check_path_valid(path: str, max_size: int = Constant.MAX_READ_FILE_BYTES) -> if os.path.getsize(path) > max_size: logger.error(f"The path \"{path}\" is too large to read. Please check the path.") return False - if not os.access(path, os.R_OK): - logger.error(f"The path \"{path}\" does not have permission to read. ") - return False return True def create_file(file_path): """ - 根据提供的路径创建一个文件。如果目录不存在,则先创建目录。 + 根据提供的路径创建一个文件,覆盖原有文件。如果目录不存在,则先创建目录。 """ directory = os.path.dirname(file_path) if not os.path.exists(directory): - os.makedirs(directory) + os.makedirs(directory, mode=Constant.DIR_AUTHORITY) try: with open(file_path, 'w') as file: - pass + os.chmod(file_path, Constant.File_AUTHORITY) except Exception as e: logger.error(f"Failed to create file: {e}") -class SafeOpen: +class File: """ - open的安全上下文管理器 + open的安全文件操作类,使用with语句进行上下文管理 """ - def __init__(self: any, file_path: str, mode: str = "r", max_size: int = Constant.MAX_READ_FILE_BYTES) -> None: + def __init__(self, file_path: str, mode: str = "r", max_size: int = Constant.MAX_READ_FILE_BYTES): self.file_path = file_path self.mode = mode self.max_size = max_size - self.file_reader = None + self.file_handler = None def __enter__(self: any) -> any: if not check_path_valid(self.file_path, max_size=self.max_size): return None - self.file_reader = open(self.file_path, self.mode, encoding='utf-8') - return self.file_reader + self.file_handler = open(self.file_path, self.mode, encoding='utf-8') + return self.file_handler def __exit__(self, exc_type, exc_val, exc_tb): - if self.file_reader: - self.file_reader.close() + if self.file_handler: + self.file_handler.close() + +class FileReader(File): + """ + 读取文件内容的类,使用with语句进行上下文管理 + """ + def __init__(self, file_path: str, max_size: int = Constant.MAX_READ_FILE_BYTES): + super().__init__(file_path, mode="r", max_size=max_size) + + def __enter__(self): + if not os.access(self.file_path, os.R_OK): + logger.error(f"The path \"{self.file_path}\" does not have permission to read. ") + return None + self.file_handler = super().__enter__() + return self.file_handler + +class FileWriter(File): + """ + 写入文件内容的类,使用with语句进行上下文管理 + """ + def __init__(self, file_path: str, max_size: int = Constant.MAX_WRITE_FILE_BYTES): + super().__init__(file_path, mode="w", max_size=max_size) + + def __enter__(self): + if not os.access(self.file_path, os.W_OK): + logger.error(f"The path \"{self.file_path}\" does not have permission to write. ") + return None + self.file_handler = super().__enter__() + return self.file_handler + diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index e71909992e..c68979535d 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -19,7 +19,7 @@ import importlib.metadata from environment_check import SoftwareCheck from constant import Constant -from utils import SafeOpen, create_file +from file import FileReader, create_file class PythonLibraryCheck(SoftwareCheck): @@ -45,7 +45,7 @@ class PythonLibraryCheck(SoftwareCheck): def __get_target_libraries(self): requirements_file_path = os.path.abspath(Constant.TARGET_LIBRARY_FILE_PATH) target_libs = {} - with SafeOpen(requirements_file_path, 'r') as file: + with FileReader(requirements_file_path) as file: if not file: return {} for line in file: diff --git a/profiler/msprof_analyze/precheck/env_check/target_config/target_env_variables.yaml b/profiler/msprof_analyze/precheck/env_check/target_config/target_env_variables.yaml index e0906447d1..92e26ce7e5 100644 --- a/profiler/msprof_analyze/precheck/env_check/target_config/target_env_variables.yaml +++ b/profiler/msprof_analyze/precheck/env_check/target_config/target_env_variables.yaml @@ -1,48 +1,51 @@ # 可根据场景修改或添加环境变量目标值 # CANN -TE_PARALLEL_COMPILER: 8 +TE_PARALLEL_COMPILER: "" # IGNORE_INFER_ERROR: -AUTO_USE_UC_MEMORY: 1 -ACLNN_CACHE_LIMIT: 10000 -ENABLE_DYNAMIC_SHAPE_MULTI_STREAM: 0 -# MAX_RUNTIME_CORE_NUMBER: -HCCL_CONNECT_TIMEOUT: 120 -HCCL_EXEC_TIMEOUT: 1836 -HCCL_ALGO: "level0:NA;level1:H-D_R" -HCCL_BUFFSIZE: 200 -HCCL_INTRA_PCIE_ENABLE: 1 -HCCL_INTRA_ROCE_ENABLE: 0 -HCCL_RDMA_TC: 132 -HCCL_RDMA_SL: 4 -HCCL_RDMA_TIMEOUT: 20 -HCCL_RDMA_RETRY_CNT: 7 -HCCL_RDMA_PCIE_DIRECT_POST_NOSTRICT: "TRUE" -HCCL_RDMA_QPS_PER_CONNECTION: 1 -HCCL_MULTI_QP_THRESHOLD: 512 -HCCL_OP_EXPANSION_MODE: "HOST" +AUTO_USE_UC_MEMORY: "" +ACLNN_CACHE_LIMIT: "" +ENABLE_DYNAMIC_SHAPE_MULTI_STREAM: "" +MAX_RUNTIME_CORE_NUMBER: "" +HCCL_CONNECT_TIMEOUT: 1200 +HCCL_EXEC_TIMEOUT: 1200 +HCCL_ALGO: "" +HCCL_BUFFSIZE: "" +HCCL_INTRA_PCIE_ENABLE: "" +HCCL_INTRA_ROCE_ENABLE: "" +HCCL_RDMA_TC: "" +HCCL_RDMA_SL: "" +HCCL_RDMA_TIMEOUT: "" +HCCL_RDMA_RETRY_CNT: "" +HCCL_RDMA_PCIE_DIRECT_POST_NOSTRICT: "" +HCCL_RDMA_QPS_PER_CONNECTION: "" +HCCL_MULTI_QP_THRESHOLD: "" +HCCL_OP_EXPANSION_MODE: "" HCCL_DETERMINISTIC: "false" # torch_npu -INF_NAN_MODE_ENABLE: 1 -COMBINED_ENABLE: 0 +INF_NAN_MODE_ENABLE: "" +COMBINED_ENABLE: 1 ASCEND_LAUNCH_BLOCKING: 0 # ACL_OP_COMPILER_CACHE_DIR: ACL_OP_COMPILER_CACHE_MODE: "enable" -# PYTORCH_NPU_ALLOC_CONF: -HCCL_ASYNC_ERROR_HANDLING: 1 -HCCL_DESYNC_DEBUG: 0 -HCCL_EVENT_TIMEOUT: 0 -P2P_HCCL_BUFFSIZE: 20 -PYTORCH_NO_NPU_MEMORY_CACHING: -OOM_SNAPSHOT_ENABLE: 0 +PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" +HCCL_ASYNC_ERROR_HANDLING: "" +HCCL_DESYNC_DEBUG: "" +HCCL_EVENT_TIMEOUT: "" +P2P_HCCL_BUFFSIZE: "" +PYTORCH_NO_NPU_MEMORY_CACHING: "" +OOM_SNAPSHOT_ENABLE: "" # OOM_SNAPSHOT_PATH: # RANK_TABLE_FILE: # TORCH_NPU_DISABLED_WARNING: -TASK_QUEUE_ENABLE: 1 +TASK_QUEUE_ENABLE: 2 # ACL_DEVICE_SYNC_TIMEOUT: MULTI_STREAM_MEMORY_REUSE: 1 NPU_ASD_ENABLE: 0 -NPU_ASD_UPPER_THRESH: "1000000,10000" -NPU_ASD_SIGMA_THRESH: "100000,5000" -INF_NAN_MODE_FORCE_DISABLE: 0 \ No newline at end of file +NPU_ASD_UPPER_THRESH: "" +NPU_ASD_SIGMA_THRESH: "" +INF_NAN_MODE_FORCE_DISABLE: "" + +# Other +CUDA_DEVICE_MAX_CONNECTIONS: 1 \ No newline at end of file -- Gitee From 5d12ba241fa944de32ed35b2b543e40fb97d4f3a Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Wed, 26 Feb 2025 16:31:08 +0800 Subject: [PATCH 04/15] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=EF=BC=9A=E4=BC=98=E5=8C=96=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../precheck/env_check/constant.py | 3 + .../env_check/environment_variable_check.py | 4 +- .../msprof_analyze/precheck/env_check/file.py | 121 ++++++++++-------- .../env_check/python_library_check.py | 4 +- 4 files changed, 75 insertions(+), 57 deletions(-) diff --git a/profiler/msprof_analyze/precheck/env_check/constant.py b/profiler/msprof_analyze/precheck/env_check/constant.py index 0d89d18455..28d630e3ba 100644 --- a/profiler/msprof_analyze/precheck/env_check/constant.py +++ b/profiler/msprof_analyze/precheck/env_check/constant.py @@ -14,6 +14,9 @@ # limitations under the License. class Constant(object): + # Max path size + MAX_PATH_SIZE = 255 + # Input file size limit 64 KB MAX_READ_FILE_BYTES = 64 * 1024 MAX_WRITE_FILE_BYTES = 64 * 1024 diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index 41ad49c4d5..9f11070456 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -18,7 +18,7 @@ import logging from environment_check import SoftwareCheck from constant import Constant -from file import FileReader, create_file +from file import File, FileReader class EnvironmentVariableCheck(SoftwareCheck): @@ -30,7 +30,7 @@ class EnvironmentVariableCheck(SoftwareCheck): self.rank = args.get("rank", 0) log_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.rank}.log") - create_file(log_file_path) + File.create_file(log_file_path) file_handler = logging.FileHandler(log_file_path) self.logger = logging.getLogger() self.logger.addHandler(file_handler) diff --git a/profiler/msprof_analyze/precheck/env_check/file.py b/profiler/msprof_analyze/precheck/env_check/file.py index 6a601904e2..74e8ff1537 100644 --- a/profiler/msprof_analyze/precheck/env_check/file.py +++ b/profiler/msprof_analyze/precheck/env_check/file.py @@ -20,37 +20,6 @@ from constant import Constant logger = logging.getLogger() -def check_path_valid(path: str, max_size: int = Constant.MAX_READ_FILE_BYTES) -> bool: - """ - 检查给定的文件路径是否有效。 - """ - if not path: - logger.error("The path is empty. Please enter a valid path.") - return False - if not os.path.isfile(path): - logger.error(f"The path \"{path}\" is not a file. Please check the path.") - return False - if os.path.islink(path): - logger.error(f"The path \"{path}\" is link. Please check the path.") - return False - if os.path.getsize(path) > max_size: - logger.error(f"The path \"{path}\" is too large to read. Please check the path.") - return False - return True - -def create_file(file_path): - """ - 根据提供的路径创建一个文件,覆盖原有文件。如果目录不存在,则先创建目录。 - """ - directory = os.path.dirname(file_path) - if not os.path.exists(directory): - os.makedirs(directory, mode=Constant.DIR_AUTHORITY) - try: - with open(file_path, 'w') as file: - os.chmod(file_path, Constant.File_AUTHORITY) - except Exception as e: - logger.error(f"Failed to create file: {e}") - class File: """ open的安全文件操作类,使用with语句进行上下文管理 @@ -61,42 +30,88 @@ class File: self.max_size = max_size self.file_handler = None - def __enter__(self: any) -> any: - if not check_path_valid(self.file_path, max_size=self.max_size): - return None - self.file_handler = open(self.file_path, self.mode, encoding='utf-8') - return self.file_handler - - def __exit__(self, exc_type, exc_val, exc_tb): - if self.file_handler: - self.file_handler.close() + @staticmethod + def check_path_valid(path: str, max_size: int = Constant.MAX_READ_FILE_BYTES) -> bool: + """ + 检查给定的文件路径是否有效。 + """ + if not path: + logger.error("The path is empty. Please enter a valid path.") + return False + if len(path) > Constant.MAX_PATH_SIZE: + logger.error(f"The length of file path is large than {Constant.MAX_PATH_SIZE}. Please check the path.") + return False + if os.path.getsize(path) > max_size: + logger.error(f"The path \"{path}\" is too large to read. Please check the path.") + return False + if not os.path.isfile(path): + logger.error(f"The path \"{path}\" is not a file. Please check the path.") + return False + if os.path.islink(path): + logger.error(f"The path \"{path}\" is link. Please check the path.") + return False + return True + + @staticmethod + def create_file(file_path): + """ + 根据提供的路径创建一个文件,覆盖原有文件。如果目录不存在,则先创建目录。 + """ + directory = os.path.dirname(file_path) + if not os.path.exists(directory): + os.makedirs(directory, mode=Constant.DIR_AUTHORITY) + try: + with open(file_path, 'w') as file: + os.chmod(file_path, Constant.File_AUTHORITY) + except Exception as e: + logger.error(f"Failed to create file: {e}") class FileReader(File): """ 读取文件内容的类,使用with语句进行上下文管理 """ - def __init__(self, file_path: str, max_size: int = Constant.MAX_READ_FILE_BYTES): - super().__init__(file_path, mode="r", max_size=max_size) + def __init__(self, file_path: str, mode="r", max_size: int = Constant.MAX_READ_FILE_BYTES): + super().__init__(file_path, mode=mode, max_size=max_size) def __enter__(self): - if not os.access(self.file_path, os.R_OK): - logger.error(f"The path \"{self.file_path}\" does not have permission to read. ") + if not self.check_path_valid(self.file_path) or not self.check_read_permission(self.file_path): + logger.error("Cannot access the file: {self.file_path}") return None - self.file_handler = super().__enter__() + self.file_handler = open(self.file_path, self.mode, encoding='utf-8') return self.file_handler + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.file_handler: + self.file_handler.close() + + @staticmethod + def check_read_permission(file_path): + if not os.access(file_path, os.R_OK): + logger.error(f"The path \"{file_path}\" does not have permission to read. ") + return False + return True class FileWriter(File): """ 写入文件内容的类,使用with语句进行上下文管理 """ - def __init__(self, file_path: str, max_size: int = Constant.MAX_WRITE_FILE_BYTES): - super().__init__(file_path, mode="w", max_size=max_size) + def __init__(self, file_path: str, mode="w", max_size: int = Constant.MAX_WRITE_FILE_BYTES): + super().__init__(file_path, mode=mode, max_size=max_size) def __enter__(self): - if not os.access(self.file_path, os.W_OK): - logger.error(f"The path \"{self.file_path}\" does not have permission to write. ") - return None - self.file_handler = super().__enter__() + if not self.check_path_valid(self.file_path) or not self.check_write_permission(self.file_path): + logger.info("File does not exist, create new file: {self.file_path}") + self.create_file(self.file_path) + self.file_handler = open(self.file_path, self.mode, encoding='utf-8') return self.file_handler - - + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.file_handler: + self.file_handler.close() + + @staticmethod + def check_write_permission(file_path): + if not os.access(file_path, os.W_OK): + logger.error(f"The path \"{file_path}\" does not have permission to write. ") + return False + return True diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index c68979535d..a1d7884e8a 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -19,7 +19,7 @@ import importlib.metadata from environment_check import SoftwareCheck from constant import Constant -from file import FileReader, create_file +from file import File, FileReader class PythonLibraryCheck(SoftwareCheck): @@ -31,7 +31,7 @@ class PythonLibraryCheck(SoftwareCheck): self.rank = args.get("rank", 0) log_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.rank}.log") - create_file(log_file_path) + File.create_file(log_file_path) file_handler = logging.FileHandler(log_file_path) self.logger = logging.getLogger() self.logger.addHandler(file_handler) -- Gitee From 5a0ced67b79182002fe11116b476063f744a3fd5 Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Fri, 28 Feb 2025 12:16:14 +0800 Subject: [PATCH 05/15] =?UTF-8?q?=E9=80=82=E9=85=8D=E4=BF=AE=E6=94=B9args?= =?UTF-8?q?=E4=BC=A0=E5=8F=82=EF=BC=8C=E4=B8=BB=E5=8D=A1=E8=BF=9B=E7=A8=8B?= =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E6=A3=80=E6=B5=8B=E5=B9=B6=E5=86=99=E5=85=A5?= =?UTF-8?q?=E6=97=A5=E5=BF=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../env_check/environment_variable_check.py | 26 +++++++++++-------- .../env_check/python_library_check.py | 26 +++++++++++-------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index 9f11070456..dc05beba5f 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -26,20 +26,24 @@ class EnvironmentVariableCheck(SoftwareCheck): def __init__(self, args): super().__init__(args) - self.output_path = args.get("output", "./") - self.rank = args.get("rank", 0) - log_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.rank}.log") + self.output_path = args.output + self.rank = args.rank + self.node_rank = args.node_rank + self.local_rank = args.local_rank - File.create_file(log_file_path) - file_handler = logging.FileHandler(log_file_path) - self.logger = logging.getLogger() - self.logger.addHandler(file_handler) + if self.local_rank == 0: + log_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.node_rank}.log") + File.create_file(log_file_path) + file_handler = logging.FileHandler(log_file_path) + self.logger = logging.getLogger() + self.logger.addHandler(file_handler) def check(self): - target_variables = self.__get_target_variables() - env_variables = self.__get_env_variables() - mismatched = self.__check_env_variables(target_variables, env_variables) - self.__print_result(mismatched) + if self.local_rank == 0: + target_variables = self.__get_target_variables() + env_variables = self.__get_env_variables() + mismatched = self.__check_env_variables(target_variables, env_variables) + self.__print_result(mismatched) def __get_target_variables(self): variables_file_path = os.path.abspath(Constant.TARGET_VARIABLE_FILE_PATH) diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index a1d7884e8a..d51a311257 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -27,20 +27,24 @@ class PythonLibraryCheck(SoftwareCheck): def __init__(self, args): super().__init__(args) - self.output_path = args.get("output", "./") - self.rank = args.get("rank", 0) - log_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.rank}.log") + self.output_path = args.output + self.rank = args.rank + self.node_rank = args.node_rank + self.local_rank = args.local_rank - File.create_file(log_file_path) - file_handler = logging.FileHandler(log_file_path) - self.logger = logging.getLogger() - self.logger.addHandler(file_handler) + if self.local_rank == 0: + log_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.node_rank}.log") + File.create_file(log_file_path) + file_handler = logging.FileHandler(log_file_path) + self.logger = logging.getLogger() + self.logger.addHandler(file_handler) def check(self): - target_libraries = self.__get_target_libraries() - env_libraries = self.__get_env_libraries() - missing_or_different = self.__check_env_libraries(target_libraries, env_libraries) - self.__print_result(missing_or_different) + if self.local_rank == 0: + target_libraries = self.__get_target_libraries() + env_libraries = self.__get_env_libraries() + missing_or_different = self.__check_env_libraries(target_libraries, env_libraries) + self.__print_result(missing_or_different) def __get_target_libraries(self): requirements_file_path = os.path.abspath(Constant.TARGET_LIBRARY_FILE_PATH) -- Gitee From d23d4a015c10e6678fbe5ec3e14232ec5a2387f1 Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Fri, 28 Feb 2025 15:33:50 +0800 Subject: [PATCH 06/15] =?UTF-8?q?file.py=E6=96=87=E4=BB=B6=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=EF=BC=8C=E6=96=B0=E5=A2=9Eopen,close=E5=87=BD?= =?UTF-8?q?=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../precheck/env_check/constant.py | 3 +- .../env_check/environment_variable_check.py | 6 +-- .../env_check/python_library_check.py | 6 +-- .../precheck/env_check/{ => utils}/file.py | 50 +++++++++++++++---- 4 files changed, 46 insertions(+), 19 deletions(-) rename profiler/msprof_analyze/precheck/env_check/{ => utils}/file.py (71%) diff --git a/profiler/msprof_analyze/precheck/env_check/constant.py b/profiler/msprof_analyze/precheck/env_check/constant.py index 28d630e3ba..b3eaa1c381 100644 --- a/profiler/msprof_analyze/precheck/env_check/constant.py +++ b/profiler/msprof_analyze/precheck/env_check/constant.py @@ -18,8 +18,7 @@ class Constant(object): MAX_PATH_SIZE = 255 # Input file size limit 64 KB - MAX_READ_FILE_BYTES = 64 * 1024 - MAX_WRITE_FILE_BYTES = 64 * 1024 + MAX_FILE_BYTES = 64 * 1024 # Athority of directory and file DIR_AUTHORITY = 0o750 diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index dc05beba5f..17c47945b8 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -18,7 +18,7 @@ import logging from environment_check import SoftwareCheck from constant import Constant -from file import File, FileReader +from utils.file import File, FileReader class EnvironmentVariableCheck(SoftwareCheck): @@ -43,7 +43,7 @@ class EnvironmentVariableCheck(SoftwareCheck): target_variables = self.__get_target_variables() env_variables = self.__get_env_variables() mismatched = self.__check_env_variables(target_variables, env_variables) - self.__print_result(mismatched) + self.__log_result(mismatched) def __get_target_variables(self): variables_file_path = os.path.abspath(Constant.TARGET_VARIABLE_FILE_PATH) @@ -71,7 +71,7 @@ class EnvironmentVariableCheck(SoftwareCheck): return mismatched - def __print_result(self, mismatched): + def __log_result(self, mismatched): if mismatched: self.logger.warning("Missing or different environment variables:") for item in mismatched: diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index d51a311257..e0f455063e 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -19,7 +19,7 @@ import importlib.metadata from environment_check import SoftwareCheck from constant import Constant -from file import File, FileReader +from utils.file import File, FileReader class PythonLibraryCheck(SoftwareCheck): @@ -44,7 +44,7 @@ class PythonLibraryCheck(SoftwareCheck): target_libraries = self.__get_target_libraries() env_libraries = self.__get_env_libraries() missing_or_different = self.__check_env_libraries(target_libraries, env_libraries) - self.__print_result(missing_or_different) + self.__log_result(missing_or_different) def __get_target_libraries(self): requirements_file_path = os.path.abspath(Constant.TARGET_LIBRARY_FILE_PATH) @@ -82,7 +82,7 @@ class PythonLibraryCheck(SoftwareCheck): missing_or_mismatched.append(f"Third-party libraries '{lib_name}' missing, expected version: '{specifier}'.") return missing_or_mismatched - def __print_result(self, missing_or_mismatched): + def __log_result(self, missing_or_mismatched): if missing_or_mismatched: self.logger.warning("Missing or version-mismatched Python libraries:") for item in missing_or_mismatched: diff --git a/profiler/msprof_analyze/precheck/env_check/file.py b/profiler/msprof_analyze/precheck/env_check/utils/file.py similarity index 71% rename from profiler/msprof_analyze/precheck/env_check/file.py rename to profiler/msprof_analyze/precheck/env_check/utils/file.py index 74e8ff1537..604c739b16 100644 --- a/profiler/msprof_analyze/precheck/env_check/file.py +++ b/profiler/msprof_analyze/precheck/env_check/utils/file.py @@ -20,18 +20,19 @@ from constant import Constant logger = logging.getLogger() -class File: +class File(object): """ open的安全文件操作类,使用with语句进行上下文管理 """ - def __init__(self, file_path: str, mode: str = "r", max_size: int = Constant.MAX_READ_FILE_BYTES): + def __init__(self, file_path: str, mode: str = "r", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): self.file_path = file_path self.mode = mode self.max_size = max_size + self.encoding = encoding self.file_handler = None @staticmethod - def check_path_valid(path: str, max_size: int = Constant.MAX_READ_FILE_BYTES) -> bool: + def check(path: str, max_size: int = Constant.MAX_FILE_BYTES) -> bool: """ 检查给定的文件路径是否有效。 """ @@ -70,11 +71,11 @@ class FileReader(File): """ 读取文件内容的类,使用with语句进行上下文管理 """ - def __init__(self, file_path: str, mode="r", max_size: int = Constant.MAX_READ_FILE_BYTES): - super().__init__(file_path, mode=mode, max_size=max_size) + def __init__(self, file_path: str, mode="r", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): + super().__init__(file_path, mode=mode, max_size=max_size, encoding=encoding) def __enter__(self): - if not self.check_path_valid(self.file_path) or not self.check_read_permission(self.file_path): + if not self.check(self.file_path): logger.error("Cannot access the file: {self.file_path}") return None self.file_handler = open(self.file_path, self.mode, encoding='utf-8') @@ -85,21 +86,34 @@ class FileReader(File): self.file_handler.close() @staticmethod - def check_read_permission(file_path): + def check(file_path): + if not File.check(file_path): + return False if not os.access(file_path, os.R_OK): logger.error(f"The path \"{file_path}\" does not have permission to read. ") return False return True + + def open(self, file_path: str, mode="r", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): + super().__init__(file_path, mode=mode, max_size=max_size) + if not self.check(file_path): + logger.error("Cannot access the file: {file_path}") + self.file_handler = open(self.file_path, self.mode, encoding=encoding) + return self.file_handler + + def close(self): + if self.file_handler: + self.file_handler.close() class FileWriter(File): """ 写入文件内容的类,使用with语句进行上下文管理 """ - def __init__(self, file_path: str, mode="w", max_size: int = Constant.MAX_WRITE_FILE_BYTES): - super().__init__(file_path, mode=mode, max_size=max_size) + def __init__(self, file_path: str, mode="w", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): + super().__init__(file_path, mode=mode, max_size=max_size, encoding=encoding) def __enter__(self): - if not self.check_path_valid(self.file_path) or not self.check_write_permission(self.file_path): + if not self.check(self.file_path): logger.info("File does not exist, create new file: {self.file_path}") self.create_file(self.file_path) self.file_handler = open(self.file_path, self.mode, encoding='utf-8') @@ -110,8 +124,22 @@ class FileWriter(File): self.file_handler.close() @staticmethod - def check_write_permission(file_path): + def check(file_path): + if not File.check(file_path): + return False if not os.access(file_path, os.W_OK): logger.error(f"The path \"{file_path}\" does not have permission to write. ") return False return True + + def open(self, file_path: str, mode="w", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): + super().__init__(file_path, mode=mode, max_size=max_size) + if not self.check(file_path): + logger.info("File does not exist, create new file: {self.file_path}") + self.create_file(self.file_path) + self.file_handler = open(self.file_path, self.mode, encoding=encoding) + return self.file_handler + + def close(self): + if self.file_handler: + self.file_handler.close() -- Gitee From 48cbcb158a60707d3bf6abfa961687bab9764a42 Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Fri, 28 Feb 2025 17:20:47 +0800 Subject: [PATCH 07/15] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../precheck/env_check/environment_variable_check.py | 2 +- .../precheck/env_check/python_library_check.py | 2 +- .../precheck/env_check/{ => utils}/constant.py | 2 +- profiler/msprof_analyze/precheck/env_check/utils/file.py | 6 ++---- 4 files changed, 5 insertions(+), 7 deletions(-) rename profiler/msprof_analyze/precheck/env_check/{ => utils}/constant.py (94%) diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index 17c47945b8..f66fe1cd58 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -17,7 +17,7 @@ import yaml import logging from environment_check import SoftwareCheck -from constant import Constant +from utils.constant import Constant from utils.file import File, FileReader diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index e0f455063e..e36799e911 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -18,7 +18,7 @@ import pkg_resources import importlib.metadata from environment_check import SoftwareCheck -from constant import Constant +from utils.constant import Constant from utils.file import File, FileReader diff --git a/profiler/msprof_analyze/precheck/env_check/constant.py b/profiler/msprof_analyze/precheck/env_check/utils/constant.py similarity index 94% rename from profiler/msprof_analyze/precheck/env_check/constant.py rename to profiler/msprof_analyze/precheck/env_check/utils/constant.py index b3eaa1c381..93526e403d 100644 --- a/profiler/msprof_analyze/precheck/env_check/constant.py +++ b/profiler/msprof_analyze/precheck/env_check/utils/constant.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/profiler/msprof_analyze/precheck/env_check/utils/file.py b/profiler/msprof_analyze/precheck/env_check/utils/file.py index 604c739b16..eb22b08a0b 100644 --- a/profiler/msprof_analyze/precheck/env_check/utils/file.py +++ b/profiler/msprof_analyze/precheck/env_check/utils/file.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,7 +15,7 @@ import os import logging -from constant import Constant +from utils.constant import Constant logger = logging.getLogger() @@ -95,7 +95,6 @@ class FileReader(File): return True def open(self, file_path: str, mode="r", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): - super().__init__(file_path, mode=mode, max_size=max_size) if not self.check(file_path): logger.error("Cannot access the file: {file_path}") self.file_handler = open(self.file_path, self.mode, encoding=encoding) @@ -133,7 +132,6 @@ class FileWriter(File): return True def open(self, file_path: str, mode="w", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): - super().__init__(file_path, mode=mode, max_size=max_size) if not self.check(file_path): logger.info("File does not exist, create new file: {self.file_path}") self.create_file(self.file_path) -- Gitee From fe662d154745455c18be340bc91ca3af47dcca58 Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Fri, 28 Feb 2025 20:41:10 +0800 Subject: [PATCH 08/15] =?UTF-8?q?file.py=E6=96=87=E4=BB=B6=E4=BF=AE?= =?UTF-8?q?=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../env_check/environment_variable_check.py | 8 +- .../env_check/python_library_check.py | 8 +- .../precheck/env_check/utils/__init__.py | 14 +++ .../precheck/env_check/utils/file.py | 89 ++++--------------- 4 files changed, 41 insertions(+), 78 deletions(-) create mode 100644 profiler/msprof_analyze/precheck/env_check/utils/__init__.py diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index f66fe1cd58..3ded95b49c 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -18,7 +18,7 @@ import logging from environment_check import SoftwareCheck from utils.constant import Constant -from utils.file import File, FileReader +from utils.file import File, FileOpen class EnvironmentVariableCheck(SoftwareCheck): @@ -47,11 +47,11 @@ class EnvironmentVariableCheck(SoftwareCheck): def __get_target_variables(self): variables_file_path = os.path.abspath(Constant.TARGET_VARIABLE_FILE_PATH) - with FileReader(variables_file_path) as file: - if not file: + with FileOpen(variables_file_path) as file: + if not file.file_reader: return {} try: - res = yaml.safe_load(file) + res = yaml.safe_load(file.file_reader) except yaml.YAMLError as e: self.logger.error(f"Error when loading target variables: {e}") return res if res is not None else {} diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index e36799e911..6e6b6e8b5f 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -19,7 +19,7 @@ import importlib.metadata from environment_check import SoftwareCheck from utils.constant import Constant -from utils.file import File, FileReader +from utils.file import File, FileOpen class PythonLibraryCheck(SoftwareCheck): @@ -49,10 +49,10 @@ class PythonLibraryCheck(SoftwareCheck): def __get_target_libraries(self): requirements_file_path = os.path.abspath(Constant.TARGET_LIBRARY_FILE_PATH) target_libs = {} - with FileReader(requirements_file_path) as file: - if not file: + with FileOpen(requirements_file_path) as file: + if not file.file_reader: return {} - for line in file: + for line in file.file_reader: line = line.strip() if line and not line.startswith('#'): try: diff --git a/profiler/msprof_analyze/precheck/env_check/utils/__init__.py b/profiler/msprof_analyze/precheck/env_check/utils/__init__.py new file mode 100644 index 0000000000..b14094e3f9 --- /dev/null +++ b/profiler/msprof_analyze/precheck/env_check/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/profiler/msprof_analyze/precheck/env_check/utils/file.py b/profiler/msprof_analyze/precheck/env_check/utils/file.py index eb22b08a0b..f077af4f60 100644 --- a/profiler/msprof_analyze/precheck/env_check/utils/file.py +++ b/profiler/msprof_analyze/precheck/env_check/utils/file.py @@ -20,17 +20,10 @@ from utils.constant import Constant logger = logging.getLogger() -class File(object): +class File: """ open的安全文件操作类,使用with语句进行上下文管理 """ - def __init__(self, file_path: str, mode: str = "r", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): - self.file_path = file_path - self.mode = mode - self.max_size = max_size - self.encoding = encoding - self.file_handler = None - @staticmethod def check(path: str, max_size: int = Constant.MAX_FILE_BYTES) -> bool: """ @@ -45,9 +38,6 @@ class File(object): if os.path.getsize(path) > max_size: logger.error(f"The path \"{path}\" is too large to read. Please check the path.") return False - if not os.path.isfile(path): - logger.error(f"The path \"{path}\" is not a file. Please check the path.") - return False if os.path.islink(path): logger.error(f"The path \"{path}\" is link. Please check the path.") return False @@ -67,77 +57,36 @@ class File(object): except Exception as e: logger.error(f"Failed to create file: {e}") -class FileReader(File): +class FileOpen: """ 读取文件内容的类,使用with语句进行上下文管理 """ - def __init__(self, file_path: str, mode="r", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): - super().__init__(file_path, mode=mode, max_size=max_size, encoding=encoding) + def __init__(self, path: str, mode: str = "r", max_size: int = Constant.MAX_FILE_BYTES): + self.path = path + self.mode = mode + self.max_size = max_size + self.file_reader = None def __enter__(self): - if not self.check(self.file_path): - logger.error("Cannot access the file: {self.file_path}") + if not self.check(self.path): + logger.error(f"Cannot access the file: {self.path}") return None - self.file_handler = open(self.file_path, self.mode, encoding='utf-8') - return self.file_handler + self.file_reader = open(self.path, self.mode) + return self def __exit__(self, exc_type, exc_val, exc_tb): - if self.file_handler: - self.file_handler.close() + if self.file_reader: + self.file_reader.close() @staticmethod - def check(file_path): - if not File.check(file_path): + def check(file_path, max_size: int = Constant.MAX_FILE_BYTES): + if not File.check(file_path, max_size): + logger.error(f"FileReader check failed: {file_path}") + return False + if not os.path.isfile(file_path): + logger.error(f"The read path \"{file_path}\" is not a file.") return False if not os.access(file_path, os.R_OK): logger.error(f"The path \"{file_path}\" does not have permission to read. ") return False return True - - def open(self, file_path: str, mode="r", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): - if not self.check(file_path): - logger.error("Cannot access the file: {file_path}") - self.file_handler = open(self.file_path, self.mode, encoding=encoding) - return self.file_handler - - def close(self): - if self.file_handler: - self.file_handler.close() - -class FileWriter(File): - """ - 写入文件内容的类,使用with语句进行上下文管理 - """ - def __init__(self, file_path: str, mode="w", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): - super().__init__(file_path, mode=mode, max_size=max_size, encoding=encoding) - - def __enter__(self): - if not self.check(self.file_path): - logger.info("File does not exist, create new file: {self.file_path}") - self.create_file(self.file_path) - self.file_handler = open(self.file_path, self.mode, encoding='utf-8') - return self.file_handler - - def __exit__(self, exc_type, exc_val, exc_tb): - if self.file_handler: - self.file_handler.close() - - @staticmethod - def check(file_path): - if not File.check(file_path): - return False - if not os.access(file_path, os.W_OK): - logger.error(f"The path \"{file_path}\" does not have permission to write. ") - return False - return True - - def open(self, file_path: str, mode="w", max_size: int = Constant.MAX_FILE_BYTES, encoding='utf-8'): - if not self.check(file_path): - logger.info("File does not exist, create new file: {self.file_path}") - self.create_file(self.file_path) - self.file_handler = open(self.file_path, self.mode, encoding=encoding) - return self.file_handler - - def close(self): - if self.file_handler: - self.file_handler.close() -- Gitee From b8e2da40ac09c7a70441e941564d0118bef39561 Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Tue, 4 Mar 2025 20:21:21 +0800 Subject: [PATCH 09/15] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=86=99=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E7=B1=BB=EF=BC=8Clog/data=E5=88=86=E5=BC=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../env_check/environment_variable_check.py | 22 ++++--- .../env_check/python_library_check.py | 21 ++++--- .../precheck/env_check/utils/file.py | 58 +++++++++++++++++++ 3 files changed, 84 insertions(+), 17 deletions(-) diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index 3ded95b49c..bbc49c80c0 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -18,8 +18,7 @@ import logging from environment_check import SoftwareCheck from utils.constant import Constant -from utils.file import File, FileOpen - +from utils.file import File, FileOpen, FdOpen class EnvironmentVariableCheck(SoftwareCheck): CHECK_TYPE = "env_variable" @@ -32,9 +31,12 @@ class EnvironmentVariableCheck(SoftwareCheck): self.local_rank = args.local_rank if self.local_rank == 0: - log_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.node_rank}.log") - File.create_file(log_file_path) - file_handler = logging.FileHandler(log_file_path) + self.data_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.node_rank}.txt") + self.log_file_path = os.path.join(self.output_path, f"log/{self.CHECK_TYPE}_{self.node_rank}.log") + File.create_file(self.data_file_path) + File.create_file(self.log_file_path) + + file_handler = logging.FileHandler(self.log_file_path) self.logger = logging.getLogger() self.logger.addHandler(file_handler) @@ -73,11 +75,13 @@ class EnvironmentVariableCheck(SoftwareCheck): def __log_result(self, mismatched): if mismatched: - self.logger.warning("Missing or different environment variables:") - for item in mismatched: - self.logger.warning(item) + with FdOpen(self.data_file_path) as file: + file.write(f"NODE_RANK {self.node_rank} Missing or different environment variables:\n") + for item in mismatched: + file.write(item + "\n") else: - self.logger.info("All required environment variables are present and have the correct values.") + with FdOpen(self.data_file_path) as file: + file.write(f"NODE_RANK {self.node_rank} All required environment variables are present and have the correct values.\n") if __name__ == "__main__": diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index 6e6b6e8b5f..ce6816d32a 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -19,7 +19,7 @@ import importlib.metadata from environment_check import SoftwareCheck from utils.constant import Constant -from utils.file import File, FileOpen +from utils.file import File, FileOpen, FdOpen class PythonLibraryCheck(SoftwareCheck): @@ -33,9 +33,12 @@ class PythonLibraryCheck(SoftwareCheck): self.local_rank = args.local_rank if self.local_rank == 0: - log_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.node_rank}.log") - File.create_file(log_file_path) - file_handler = logging.FileHandler(log_file_path) + self.data_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.node_rank}.txt") + self.log_file_path = os.path.join(self.output_path, f"log/{self.CHECK_TYPE}_{self.node_rank}.log") + File.create_file(self.data_file_path) + File.create_file(self.log_file_path) + + file_handler = logging.FileHandler(self.log_file_path) self.logger = logging.getLogger() self.logger.addHandler(file_handler) @@ -84,11 +87,13 @@ class PythonLibraryCheck(SoftwareCheck): def __log_result(self, missing_or_mismatched): if missing_or_mismatched: - self.logger.warning("Missing or version-mismatched Python libraries:") - for item in missing_or_mismatched: - self.logger.warning(item) + with FdOpen(self.data_file_path) as file: + file.write(f"NODE_RANK {self.node_rank} Missing or version-mismatched Python libraries:\n") + for item in missing_or_mismatched: + file.write(item + "\n") else: - self.logger.info("All required Python libraries are present and have the correct versions.") + with FdOpen(self.data_file_path) as file: + file.write(f"NODE_RANK {self.node_rank} All required Python libraries are present and have the correct versions.\n") if __name__ == '__main__': python_library_check = PythonLibraryCheck() diff --git a/profiler/msprof_analyze/precheck/env_check/utils/file.py b/profiler/msprof_analyze/precheck/env_check/utils/file.py index f077af4f60..16ba4bb9c3 100644 --- a/profiler/msprof_analyze/precheck/env_check/utils/file.py +++ b/profiler/msprof_analyze/precheck/env_check/utils/file.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +import stat import logging from utils.constant import Constant @@ -43,6 +44,17 @@ class File: return False return True + @staticmethod + def check_dir_for_create_file(file_dir): + """ + 创建文件需要目录有w和x权限,否则无法创建文件 + """ + if not os.access(file_dir, os.W_OK | os.X_OK): + logger.error(f"The path \"{file_dir}\" does not have permission to create file. ") + return False + return True + + @staticmethod def create_file(file_path): """ @@ -90,3 +102,49 @@ class FileOpen: logger.error(f"The path \"{file_path}\" does not have permission to read. ") return False return True + +class FdOpen: + """ + 新建和写入文件内容的类,使用with语句进行上下文管理 + """ + def __init__(self, path: str, mode: str="w", permission: int = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP, + flags: int = os.O_WRONLY | os.O_CREAT | os.O_TRUNC, newline: str = None) -> None: + self.path = path + self.mode = mode + self.permission = permission + self.flags = flags + self.newline = newline + self.fd = None + self.file_open = None + + def __enter__(self): + if not self.check(self.path): + logger.error(f"Cannot access the file: {self.path}") + return None + self.fd = os.open(self.path, self.flags, self.permission) + if self.newline is None: + self.file_open = os.fdopen(self.fd, self.mode) + else: + self.file_open = os.fdopen(self.fd, self.mode, newline=self.newline) + return self.file_open + + def __exit__(self, exc_type, exc_value, traceback): + if self.file_open: + self.file_open.close() + elif self.fd: + os.close(self.fd) + + @staticmethod + def check(file_path, max_size: int = Constant.MAX_FILE_BYTES): + if not os.path.exists(file_path): + return File.check_dir_for_create_file(os.path.dirname(file_path)) + if not File.check(file_path, max_size): + logger.error(f"FileReader check failed: {file_path}") + return False + if not os.path.isfile(file_path): + logger.error(f"The write path \"{file_path}\" is not a file.") + return False + if not os.access(file_path, os.W_OK): + logger.error(f"The path \"{file_path}\" does not have permission to write. ") + return False + return True -- Gitee From 4b8f5b58f4f80b706a89ed65d679e12e0daa38a5 Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Thu, 6 Mar 2025 11:04:55 +0800 Subject: [PATCH 10/15] =?UTF-8?q?cleancode=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../env_check/environment_variable_check.py | 7 +++++-- .../precheck/env_check/python_library_check.py | 13 ++++++++----- .../msprof_analyze/precheck/env_check/utils/file.py | 4 +++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index bbc49c80c0..9d7790e3ad 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -13,13 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -import yaml import logging +import yaml + from environment_check import SoftwareCheck from utils.constant import Constant from utils.file import File, FileOpen, FdOpen + class EnvironmentVariableCheck(SoftwareCheck): CHECK_TYPE = "env_variable" @@ -81,7 +83,8 @@ class EnvironmentVariableCheck(SoftwareCheck): file.write(item + "\n") else: with FdOpen(self.data_file_path) as file: - file.write(f"NODE_RANK {self.node_rank} All required environment variables are present and have the correct values.\n") + file.write(f"NODE_RANK {self.node_rank} All required environment variables " + f"are present and have the correct values.\n") if __name__ == "__main__": diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index ce6816d32a..ae4ad4ad16 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -12,10 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import logging import os -import pkg_resources +import logging import importlib.metadata +import pkg_resources from environment_check import SoftwareCheck from utils.constant import Constant @@ -80,9 +80,11 @@ class PythonLibraryCheck(SoftwareCheck): if lib_name in env_libraries: installed_version = env_libraries[lib_name] if specifier and not specifier.contains(installed_version): - missing_or_mismatched.append(f"Third-party libraries '{lib_name}' version mismatch, expected '{specifier}', got '{installed_version}'.") + missing_or_mismatched.append(f"Third-party libraries '{lib_name}' version mismatch, " + f"expected '{specifier}', got '{installed_version}'.") else: - missing_or_mismatched.append(f"Third-party libraries '{lib_name}' missing, expected version: '{specifier}'.") + missing_or_mismatched.append(f"Third-party libraries '{lib_name}' missing, " + f"expected version: '{specifier}'.") return missing_or_mismatched def __log_result(self, missing_or_mismatched): @@ -93,7 +95,8 @@ class PythonLibraryCheck(SoftwareCheck): file.write(item + "\n") else: with FdOpen(self.data_file_path) as file: - file.write(f"NODE_RANK {self.node_rank} All required Python libraries are present and have the correct versions.\n") + file.write(f"NODE_RANK {self.node_rank} All required Python libraries " + f"are present and have the correct versions.\n") if __name__ == '__main__': python_library_check = PythonLibraryCheck() diff --git a/profiler/msprof_analyze/precheck/env_check/utils/file.py b/profiler/msprof_analyze/precheck/env_check/utils/file.py index 16ba4bb9c3..c1f4f3e025 100644 --- a/profiler/msprof_analyze/precheck/env_check/utils/file.py +++ b/profiler/msprof_analyze/precheck/env_check/utils/file.py @@ -69,6 +69,7 @@ class File: except Exception as e: logger.error(f"Failed to create file: {e}") + class FileOpen: """ 读取文件内容的类,使用with语句进行上下文管理 @@ -103,11 +104,12 @@ class FileOpen: return False return True + class FdOpen: """ 新建和写入文件内容的类,使用with语句进行上下文管理 """ - def __init__(self, path: str, mode: str="w", permission: int = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP, + def __init__(self, path: str, mode: str = "w", permission: int = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP, flags: int = os.O_WRONLY | os.O_CREAT | os.O_TRUNC, newline: str = None) -> None: self.path = path self.mode = mode -- Gitee From a1185c29a530af551da20654402b9190e22175cf Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Fri, 7 Mar 2025 11:03:25 +0800 Subject: [PATCH 11/15] =?UTF-8?q?=E4=BF=AE=E6=94=B9=EF=BC=9A1.=20logging?= =?UTF-8?q?=E5=AD=97=E8=8A=82=E6=89=93=E5=B1=8F=EF=BC=8C=E4=B8=8D=E7=94=9F?= =?UTF-8?q?=E6=88=90log=E6=96=87=E4=BB=B6=20=20=202.=20=E4=B8=BB=E8=8A=82?= =?UTF-8?q?=E7=82=B9=E6=94=B6=E9=9B=86=E5=85=A8=E5=B1=80=E4=BF=A1=E6=81=AF?= =?UTF-8?q?=E5=B9=B6=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../env_check/environment_variable_check.py | 21 +++++++++++++------ .../env_check/python_library_check.py | 21 +++++++++++++------ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index 9d7790e3ad..8e851246b0 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -12,12 +12,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import glob import os import logging import yaml from environment_check import SoftwareCheck +from profiler.msprof_analyze.precheck.distributed_cluster.distributed_cluster_base import DistributedClusterBase from utils.constant import Constant from utils.file import File, FileOpen, FdOpen @@ -31,16 +33,12 @@ class EnvironmentVariableCheck(SoftwareCheck): self.rank = args.rank self.node_rank = args.node_rank self.local_rank = args.local_rank + self.no_shared_storage = args.no_shared_storage if self.local_rank == 0: self.data_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.node_rank}.txt") - self.log_file_path = os.path.join(self.output_path, f"log/{self.CHECK_TYPE}_{self.node_rank}.log") File.create_file(self.data_file_path) - File.create_file(self.log_file_path) - - file_handler = logging.FileHandler(self.log_file_path) - self.logger = logging.getLogger() - self.logger.addHandler(file_handler) + self.logger = logging.getLogger(f"{self.CHECK_TYPE}_{self.node_rank}") def check(self): if self.local_rank == 0: @@ -85,6 +83,17 @@ class EnvironmentVariableCheck(SoftwareCheck): with FdOpen(self.data_file_path) as file: file.write(f"NODE_RANK {self.node_rank} All required environment variables " f"are present and have the correct values.\n") + self.logger.info(f"NODE_RANK {self.node_rank} finished checking environment variables.") + + # 收集全局信息 + env = DistributedClusterBase() + if self.no_shared_storage: + env.collect_global_info(self.data_file_path, self.data_file_path) + # 打印全局结果 + if self.rank == 0: + for file in glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")): + with FileOpen(file, "r") as f: + print(f.read() + "\n\n") if __name__ == "__main__": diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index ae4ad4ad16..f35de1770e 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -12,12 +12,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import glob import os import logging import importlib.metadata import pkg_resources from environment_check import SoftwareCheck +from profiler.msprof_analyze.precheck.distributed_cluster.distributed_cluster_base import DistributedClusterBase from utils.constant import Constant from utils.file import File, FileOpen, FdOpen @@ -31,16 +33,12 @@ class PythonLibraryCheck(SoftwareCheck): self.rank = args.rank self.node_rank = args.node_rank self.local_rank = args.local_rank + self.no_shared_storage = args.no_shared_storage if self.local_rank == 0: self.data_file_path = os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_{self.node_rank}.txt") - self.log_file_path = os.path.join(self.output_path, f"log/{self.CHECK_TYPE}_{self.node_rank}.log") File.create_file(self.data_file_path) - File.create_file(self.log_file_path) - - file_handler = logging.FileHandler(self.log_file_path) - self.logger = logging.getLogger() - self.logger.addHandler(file_handler) + self.logger = logging.getLogger(f"{self.CHECK_TYPE}_{self.rank}") def check(self): if self.local_rank == 0: @@ -97,6 +95,17 @@ class PythonLibraryCheck(SoftwareCheck): with FdOpen(self.data_file_path) as file: file.write(f"NODE_RANK {self.node_rank} All required Python libraries " f"are present and have the correct versions.\n") + self.logger.info(f"NODE_RANK {self.node_rank} finished checking Python libraries.") + + # 收集全局信息 + env = DistributedClusterBase() + if self.no_shared_storage: + env.collect_global_info(self.data_file_path, self.data_file_path) + # 打印全局结果 + if self.rank == 0: + for file in glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")): + with FileOpen(file, "r") as f: + print(f.read() + "\n\n") if __name__ == '__main__': python_library_check = PythonLibraryCheck() -- Gitee From db41fc3652e5eeb20e6c91bb3468643fcc26939c Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Mon, 10 Mar 2025 17:16:46 +0800 Subject: [PATCH 12/15] =?UTF-8?q?=E9=AA=8C=E8=AF=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../env_check/environment_variable_check.py | 29 ++++++++++--------- .../env_check/python_library_check.py | 26 ++++++++--------- .../env_check/target_config/__init__.py | 14 +++++++++ .../precheck/env_check/utils/file.py | 2 +- 4 files changed, 43 insertions(+), 28 deletions(-) create mode 100644 profiler/msprof_analyze/precheck/env_check/target_config/__init__.py diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index 8e851246b0..647e3c6950 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -18,10 +18,10 @@ import logging import yaml -from environment_check import SoftwareCheck -from profiler.msprof_analyze.precheck.distributed_cluster.distributed_cluster_base import DistributedClusterBase -from utils.constant import Constant -from utils.file import File, FileOpen, FdOpen +from msprof_analyze.precheck.distributed_cluster.distributed_cluster_base import DistributedClusterBase +from msprof_analyze.precheck.env_check.environment_check import SoftwareCheck +from msprof_analyze.precheck.env_check.utils.constant import Constant +from msprof_analyze.precheck.env_check.utils.file import File, FileOpen, FdOpen class EnvironmentVariableCheck(SoftwareCheck): @@ -48,7 +48,9 @@ class EnvironmentVariableCheck(SoftwareCheck): self.__log_result(mismatched) def __get_target_variables(self): - variables_file_path = os.path.abspath(Constant.TARGET_VARIABLE_FILE_PATH) + current_directory = os.path.dirname(os.path.abspath(__file__)) + variables_file_path = os.path.join(current_directory, Constant.TARGET_VARIABLE_FILE_PATH) + res = {} with FileOpen(variables_file_path) as file: if not file.file_reader: return {} @@ -56,7 +58,7 @@ class EnvironmentVariableCheck(SoftwareCheck): res = yaml.safe_load(file.file_reader) except yaml.YAMLError as e: self.logger.error(f"Error when loading target variables: {e}") - return res if res is not None else {} + return res def __get_env_variables(self): res = {key: os.getenv(key) for key in os.environ} @@ -76,24 +78,23 @@ class EnvironmentVariableCheck(SoftwareCheck): def __log_result(self, mismatched): if mismatched: with FdOpen(self.data_file_path) as file: - file.write(f"NODE_RANK {self.node_rank} Missing or different environment variables:\n") + file.write(f"Missing or different environment variables:\n") for item in mismatched: file.write(item + "\n") else: with FdOpen(self.data_file_path) as file: - file.write(f"NODE_RANK {self.node_rank} All required environment variables " - f"are present and have the correct values.\n") + file.write(f"All required environment variables are present and have the correct values.\n") self.logger.info(f"NODE_RANK {self.node_rank} finished checking environment variables.") - # 收集全局信息 + # collect and print global info env = DistributedClusterBase() if self.no_shared_storage: env.collect_global_info(self.data_file_path, self.data_file_path) - # 打印全局结果 + if self.rank == 0: - for file in glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")): - with FileOpen(file, "r") as f: - print(f.read() + "\n\n") + for file_path in glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")): + with FileOpen(file_path, "r") as file: + self.logger.warning(file.file_reader.read()) if __name__ == "__main__": diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index f35de1770e..7833913bbb 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -18,10 +18,10 @@ import logging import importlib.metadata import pkg_resources -from environment_check import SoftwareCheck -from profiler.msprof_analyze.precheck.distributed_cluster.distributed_cluster_base import DistributedClusterBase -from utils.constant import Constant -from utils.file import File, FileOpen, FdOpen +from msprof_analyze.precheck.distributed_cluster.distributed_cluster_base import DistributedClusterBase +from msprof_analyze.precheck.env_check.environment_check import SoftwareCheck +from msprof_analyze.precheck.env_check.utils.constant import Constant +from msprof_analyze.precheck.env_check.utils.file import File, FileOpen, FdOpen class PythonLibraryCheck(SoftwareCheck): @@ -48,7 +48,8 @@ class PythonLibraryCheck(SoftwareCheck): self.__log_result(missing_or_different) def __get_target_libraries(self): - requirements_file_path = os.path.abspath(Constant.TARGET_LIBRARY_FILE_PATH) + current_directory = os.path.dirname(os.path.abspath(__file__)) + requirements_file_path = os.path.join(current_directory, Constant.TARGET_LIBRARY_FILE_PATH) target_libs = {} with FileOpen(requirements_file_path) as file: if not file.file_reader: @@ -88,24 +89,23 @@ class PythonLibraryCheck(SoftwareCheck): def __log_result(self, missing_or_mismatched): if missing_or_mismatched: with FdOpen(self.data_file_path) as file: - file.write(f"NODE_RANK {self.node_rank} Missing or version-mismatched Python libraries:\n") + file.write(f"Missing or version-mismatched Python libraries:\n") for item in missing_or_mismatched: file.write(item + "\n") else: with FdOpen(self.data_file_path) as file: - file.write(f"NODE_RANK {self.node_rank} All required Python libraries " - f"are present and have the correct versions.\n") + file.write(f"All required Python libraries are present and have the correct versions.\n") self.logger.info(f"NODE_RANK {self.node_rank} finished checking Python libraries.") - # 收集全局信息 + # collect and print global info env = DistributedClusterBase() if self.no_shared_storage: env.collect_global_info(self.data_file_path, self.data_file_path) - # 打印全局结果 + if self.rank == 0: - for file in glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")): - with FileOpen(file, "r") as f: - print(f.read() + "\n\n") + for file_path in glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")): + with FileOpen(file_path, "r") as file: + self.logger.warning(file.file_reader.read()) if __name__ == '__main__': python_library_check = PythonLibraryCheck() diff --git a/profiler/msprof_analyze/precheck/env_check/target_config/__init__.py b/profiler/msprof_analyze/precheck/env_check/target_config/__init__.py new file mode 100644 index 0000000000..b14094e3f9 --- /dev/null +++ b/profiler/msprof_analyze/precheck/env_check/target_config/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/profiler/msprof_analyze/precheck/env_check/utils/file.py b/profiler/msprof_analyze/precheck/env_check/utils/file.py index c1f4f3e025..eeef193512 100644 --- a/profiler/msprof_analyze/precheck/env_check/utils/file.py +++ b/profiler/msprof_analyze/precheck/env_check/utils/file.py @@ -16,7 +16,7 @@ import os import stat import logging -from utils.constant import Constant +from msprof_analyze.precheck.env_check.utils.constant import Constant logger = logging.getLogger() -- Gitee From f7d7dd26df6eccd4a6f6016acb3791c64199619f Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Tue, 11 Mar 2025 10:03:21 +0800 Subject: [PATCH 13/15] =?UTF-8?q?=E5=BA=95=E5=BA=A7bug=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../precheck/distributed_cluster/distributed_cluster_base.py | 2 +- profiler/msprof_analyze/precheck/precheck.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/profiler/msprof_analyze/precheck/distributed_cluster/distributed_cluster_base.py b/profiler/msprof_analyze/precheck/distributed_cluster/distributed_cluster_base.py index 4e73dd3f44..8fd329b1b4 100644 --- a/profiler/msprof_analyze/precheck/distributed_cluster/distributed_cluster_base.py +++ b/profiler/msprof_analyze/precheck/distributed_cluster/distributed_cluster_base.py @@ -396,7 +396,7 @@ class DistributedClusterBase: "node_rank": self.node_rank, "master_addr": self.master_addr, "master_port": self.master_port, - "master_rank_num": self.master_rank_num, + "master_rank_num": self.local_world_size, "split_file_size": split_file_size, "time_out": time_out, "log_file": log_file diff --git a/profiler/msprof_analyze/precheck/precheck.py b/profiler/msprof_analyze/precheck/precheck.py index bb32447d45..5c9b2e24f6 100644 --- a/profiler/msprof_analyze/precheck/precheck.py +++ b/profiler/msprof_analyze/precheck/precheck.py @@ -52,8 +52,8 @@ class Precheck: f"--pipeline-model-parallel-size {pipeline_model_parallel_size} " f"--context-parallel-size {context_parallel_size} " f"--expert-model-parallel-size {expert_model_parallel_size} " - f"--output {output} " - f"--check-type {check_type} " + f"--output '{output}' " + f"--check-type '{check_type}' " + ("--no-shared-storage " if kwargs.get("no_shared_storage", False) else "") ] try: -- Gitee From d2c959caf5786ad7d59f8e1a9afe59e3bf0f1feb Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Tue, 11 Mar 2025 22:41:41 +0800 Subject: [PATCH 14/15] =?UTF-8?q?=E8=BE=93=E5=87=BA=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../env_check/environment_variable_check.py | 18 +++++++++++++----- .../precheck/env_check/python_library_check.py | 18 +++++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index 647e3c6950..43c2f6fbea 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -15,6 +15,7 @@ import glob import os import logging +import time import yaml @@ -30,6 +31,7 @@ class EnvironmentVariableCheck(SoftwareCheck): def __init__(self, args): super().__init__(args) self.output_path = args.output + self.world_size = args.world_size self.rank = args.rank self.node_rank = args.node_rank self.local_rank = args.local_rank @@ -76,13 +78,13 @@ class EnvironmentVariableCheck(SoftwareCheck): return mismatched def __log_result(self, mismatched): - if mismatched: - with FdOpen(self.data_file_path) as file: + with FdOpen(self.data_file_path) as file: + file.write(f"NODE_RANK {self.node_rank} checking environment variables result:\n") + if mismatched: file.write(f"Missing or different environment variables:\n") for item in mismatched: file.write(item + "\n") - else: - with FdOpen(self.data_file_path) as file: + else: file.write(f"All required environment variables are present and have the correct values.\n") self.logger.info(f"NODE_RANK {self.node_rank} finished checking environment variables.") @@ -92,7 +94,13 @@ class EnvironmentVariableCheck(SoftwareCheck): env.collect_global_info(self.data_file_path, self.data_file_path) if self.rank == 0: - for file_path in glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")): + file_path_list = glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")) + while len(file_path_list) < self.world_size: + time.sleep(10) + self.logger.info(f"Waiting for other nodes to finish checking Python libraries.") + file_path_list = glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")) + + for file_path in file_path_list: with FileOpen(file_path, "r") as file: self.logger.warning(file.file_reader.read()) diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index 7833913bbb..aded32367b 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -16,6 +16,7 @@ import glob import os import logging import importlib.metadata +import time import pkg_resources from msprof_analyze.precheck.distributed_cluster.distributed_cluster_base import DistributedClusterBase @@ -30,6 +31,7 @@ class PythonLibraryCheck(SoftwareCheck): def __init__(self, args): super().__init__(args) self.output_path = args.output + self.world_size = args.world_size self.rank = args.rank self.node_rank = args.node_rank self.local_rank = args.local_rank @@ -87,13 +89,13 @@ class PythonLibraryCheck(SoftwareCheck): return missing_or_mismatched def __log_result(self, missing_or_mismatched): - if missing_or_mismatched: - with FdOpen(self.data_file_path) as file: + with FdOpen(self.data_file_path) as file: + file.write(f"NODE_RANK {self.node_rank} checking Python libraries result:\n") + if missing_or_mismatched: file.write(f"Missing or version-mismatched Python libraries:\n") for item in missing_or_mismatched: file.write(item + "\n") - else: - with FdOpen(self.data_file_path) as file: + else: file.write(f"All required Python libraries are present and have the correct versions.\n") self.logger.info(f"NODE_RANK {self.node_rank} finished checking Python libraries.") @@ -103,7 +105,13 @@ class PythonLibraryCheck(SoftwareCheck): env.collect_global_info(self.data_file_path, self.data_file_path) if self.rank == 0: - for file_path in glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")): + file_path_list = glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")) + while len(file_path_list) < self.world_size: + time.sleep(10) + self.logger.info(f"Waiting for other nodes to finish checking Python libraries.") + file_path_list = glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")) + + for file_path in file_path_list: with FileOpen(file_path, "r") as file: self.logger.warning(file.file_reader.read()) -- Gitee From c3dc7a188cdd8c6b0b82a8b520d1f9f76c19ce3f Mon Sep 17 00:00:00 2001 From: wangzhihe23 Date: Wed, 12 Mar 2025 10:02:26 +0800 Subject: [PATCH 15/15] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=97=A5=E5=BF=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../precheck/env_check/environment_variable_check.py | 6 ++++-- .../precheck/env_check/python_library_check.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py index 43c2f6fbea..0fadfe2945 100644 --- a/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py +++ b/profiler/msprof_analyze/precheck/env_check/environment_variable_check.py @@ -31,7 +31,9 @@ class EnvironmentVariableCheck(SoftwareCheck): def __init__(self, args): super().__init__(args) self.output_path = args.output + self.nproc_per_node = args.nproc_per_node self.world_size = args.world_size + self.nnodes = args.world_size / self.nproc_per_node self.rank = args.rank self.node_rank = args.node_rank self.local_rank = args.local_rank @@ -95,10 +97,10 @@ class EnvironmentVariableCheck(SoftwareCheck): if self.rank == 0: file_path_list = glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")) - while len(file_path_list) < self.world_size: - time.sleep(10) + while len(file_path_list) < self.nnodes: self.logger.info(f"Waiting for other nodes to finish checking Python libraries.") file_path_list = glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")) + time.sleep(10) for file_path in file_path_list: with FileOpen(file_path, "r") as file: diff --git a/profiler/msprof_analyze/precheck/env_check/python_library_check.py b/profiler/msprof_analyze/precheck/env_check/python_library_check.py index aded32367b..b17ceb1417 100644 --- a/profiler/msprof_analyze/precheck/env_check/python_library_check.py +++ b/profiler/msprof_analyze/precheck/env_check/python_library_check.py @@ -31,7 +31,9 @@ class PythonLibraryCheck(SoftwareCheck): def __init__(self, args): super().__init__(args) self.output_path = args.output + self.nproc_per_node = args.nproc_per_node self.world_size = args.world_size + self.nnodes = args.world_size / self.nproc_per_node self.rank = args.rank self.node_rank = args.node_rank self.local_rank = args.local_rank @@ -106,10 +108,10 @@ class PythonLibraryCheck(SoftwareCheck): if self.rank == 0: file_path_list = glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")) - while len(file_path_list) < self.world_size: - time.sleep(10) + while len(file_path_list) < self.nnodes: self.logger.info(f"Waiting for other nodes to finish checking Python libraries.") file_path_list = glob.glob(os.path.join(self.output_path, f"data/{self.CHECK_TYPE}_*.txt")) + time.sleep(10) for file_path in file_path_list: with FileOpen(file_path, "r") as file: -- Gitee