From ad8e1c24d3f8de7d288e12a7c3d69f15b70b03b4 Mon Sep 17 00:00:00 2001 From: z30043230 Date: Tue, 22 Jul 2025 11:08:19 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90=E5=BC=80=E5=8F=91=E8=87=AA=E6=8F=90?= =?UTF-8?q?=E3=80=91=E3=80=90=E5=AE=89=E5=85=A8=E3=80=91os.walk=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E6=B7=B1=E5=BA=A6=E6=A0=A1=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprof_analyze/advisor/analyzer/base_analyzer.py | 5 +++-- .../advisor/analyzer/overall/overall_summary_analyzer.py | 3 ++- .../dataset/communication/communication_dataset.py | 3 ++- profiler/msprof_analyze/advisor/dataset/dataset.py | 3 ++- .../advisor/dataset/environment_variable_dataset.py | 3 ++- profiler/msprof_analyze/advisor/utils/utils.py | 9 +++++---- .../analysis/cluster_base_info_analysis.py | 2 +- .../msprof_analyze/cluster_analyse/cluster_analysis.py | 2 +- profiler/msprof_analyze/prof_common/path_manager.py | 9 +++++++++ 9 files changed, 27 insertions(+), 12 deletions(-) diff --git a/profiler/msprof_analyze/advisor/analyzer/base_analyzer.py b/profiler/msprof_analyze/advisor/analyzer/base_analyzer.py index 7f558c387..b6b8fe2e0 100644 --- a/profiler/msprof_analyze/advisor/analyzer/base_analyzer.py +++ b/profiler/msprof_analyze/advisor/analyzer/base_analyzer.py @@ -27,6 +27,7 @@ from msprof_analyze.advisor.display.html.render import HTMLRender from msprof_analyze.advisor.display.html.priority_background_color import PriorityBackgroundColor from msprof_analyze.advisor.utils.utils import safe_division from msprof_analyze.prof_common.file_manager import FileManager +from msprof_analyze.prof_common.path_manager import PathManager logger = logging.getLogger() @@ -114,7 +115,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): elif self.collection_path.endswith(ASCEND_PT): profiling_type = [elem for elem in profiling_type_list if Constant.PYTORCH in elem][0] else: - for _, dirs, __ in os.walk(self.collection_path): + for _, dirs, __ in PathManager.limited_depth_walk(self.collection_path): is_found_type = False for direction in dirs: if direction.endswith(ASCEND_MS): @@ -143,7 +144,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): if self.collection_path.endswith(ASCEND_MS): ascend_dirs.append(self.collection_path) else: - for root, dirs, _ in os.walk(self.collection_path): + for root, dirs, _ in PathManager.limited_depth_walk(self.collection_path): for direction in dirs: if direction.endswith(ASCEND_MS): ascend_dirs.append(os.path.join(root, direction)) diff --git a/profiler/msprof_analyze/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/msprof_analyze/advisor/analyzer/overall/overall_summary_analyzer.py index 1bfaf8d61..0f2cb8303 100644 --- a/profiler/msprof_analyze/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/msprof_analyze/advisor/analyzer/overall/overall_summary_analyzer.py @@ -23,6 +23,7 @@ from msprof_analyze.advisor.result.result import OptimizeResult from msprof_analyze.compare_tools.compare_interface.comparison_interface import ComparisonInterface from msprof_analyze.prof_common.additional_args_manager import AdditionalArgsManager from msprof_analyze.prof_common.constant import Constant +from msprof_analyze.prof_common.path_manager import PathManager class OverallSummaryAnalyzer(BaseAnalyzer): @@ -237,7 +238,7 @@ class OverallSummaryAnalyzer(BaseAnalyzer): def get_profile_path(collection_path): - for root, _, files in os.walk(collection_path): + for root, _, files in PathManager.limited_depth_walk(collection_path): for file in files: if file.startswith("profiler_info"): return root diff --git a/profiler/msprof_analyze/advisor/dataset/communication/communication_dataset.py b/profiler/msprof_analyze/advisor/dataset/communication/communication_dataset.py index eed7e2995..acd44ac13 100644 --- a/profiler/msprof_analyze/advisor/dataset/communication/communication_dataset.py +++ b/profiler/msprof_analyze/advisor/dataset/communication/communication_dataset.py @@ -24,6 +24,7 @@ from msprof_analyze.cluster_analyse.common_func.table_constant import TableConst from msprof_analyze.prof_common.singleton import singleton from msprof_analyze.prof_common.constant import Constant from msprof_analyze.prof_common.file_manager import FileManager +from msprof_analyze.prof_common.path_manager import PathManager from msprof_analyze.advisor.dataset.cluster.hccl_collection import HcclInfo from msprof_analyze.advisor.utils.utils import CheckPathAccess from msprof_analyze.advisor.dataset.dataset import Dataset @@ -69,7 +70,7 @@ class CommunicationDataset(Dataset): if not os.path.isdir(path): logger.warning("Expected existed directory, but got %s", path) - for root, _, files in os.walk(path): + for root, _, files in PathManager.limited_depth_walk(path): if os.path.basename(root) != "ASCEND_PROFILER_OUTPUT": continue for filename in files: diff --git a/profiler/msprof_analyze/advisor/dataset/dataset.py b/profiler/msprof_analyze/advisor/dataset/dataset.py index 0bd2dacde..f8149f108 100644 --- a/profiler/msprof_analyze/advisor/dataset/dataset.py +++ b/profiler/msprof_analyze/advisor/dataset/dataset.py @@ -22,6 +22,7 @@ import re from msprof_analyze.prof_common.constant import Constant from msprof_analyze.prof_common.file_manager import FileManager +from msprof_analyze.prof_common.path_manager import PathManager from msprof_analyze.advisor.config.config import Config logger = logging.getLogger() @@ -64,7 +65,7 @@ class Dataset: def get_data_type(self): # 递归搜索ASCEND_PROFILER_PATH文件夹 - for root, dirs, _ in os.walk(self.collection_path): + for root, dirs, _ in PathManager.limited_depth_walk(self.collection_path): if Constant.ASCEND_PROFILER_OUTPUT in dirs: profiler_dir = os.path.join(root, Constant.ASCEND_PROFILER_OUTPUT) diff --git a/profiler/msprof_analyze/advisor/dataset/environment_variable_dataset.py b/profiler/msprof_analyze/advisor/dataset/environment_variable_dataset.py index 6fa569ad9..43c5e1560 100644 --- a/profiler/msprof_analyze/advisor/dataset/environment_variable_dataset.py +++ b/profiler/msprof_analyze/advisor/dataset/environment_variable_dataset.py @@ -17,6 +17,7 @@ import logging from msprof_analyze.prof_common.constant import Constant from msprof_analyze.prof_common.file_manager import FileManager +from msprof_analyze.prof_common.path_manager import PathManager class EnvironmentVariableDataset: @@ -27,7 +28,7 @@ class EnvironmentVariableDataset: @staticmethod def get_env_data_file(collection_path: str) -> str: - for root, _, files in os.walk(collection_path): + for root, _, files in PathManager.limited_depth_walk(collection_path): for file_name in files: if file_name == Constant.PROFILER_METADATA: return os.path.join(root, file_name) diff --git a/profiler/msprof_analyze/advisor/utils/utils.py b/profiler/msprof_analyze/advisor/utils/utils.py index 001949d6e..72d3e9952 100644 --- a/profiler/msprof_analyze/advisor/utils/utils.py +++ b/profiler/msprof_analyze/advisor/utils/utils.py @@ -33,6 +33,7 @@ from tqdm import tqdm from msprof_analyze.advisor.utils.log import init_logger, get_log_level from msprof_analyze.prof_common.constant import Constant from msprof_analyze.prof_common.singleton import singleton +from msprof_analyze.prof_common.path_manager import PathManager logger = logging.getLogger() logger.setLevel(get_log_level()) @@ -109,7 +110,7 @@ def get_file_path_from_directory(path: str, check_func: Any) -> list: get file from directory """ file_list = [] - for root, _, files in os.walk(path, onerror=walk_error_handler): + for root, _, files in PathManager.limited_depth_walk(path, onerror=walk_error_handler): for filename in files: filepath = os.path.join(root, filename) if check_func(filename): @@ -267,7 +268,7 @@ def get_file_path_from_directory(path, check_func): if not os.path.isdir(path): logger.warning("Expected existed directory, but got %s", path) - for root, _, files in os.walk(path): + for root, _, files in PathManager.limited_depth_walk(path): for filename in files: filepath = os.path.join(root, filename) if check_func(filename): @@ -288,7 +289,7 @@ def join_prof_path(root_dir: str, sub_dir: str) -> str: regular expression matching method for path concatenation """ if is_regex_pattern(sub_dir): - for root, _, _ in os.walk(root_dir, onerror=walk_error_handler): + for root, _, _ in PathManager.limited_depth_walk(root_dir, onerror=walk_error_handler): if re.match(sub_dir, os.path.basename(root)): return root logger.debug("Fail to get profiling path %s from local path %s by regular expression matching", sub_dir, @@ -351,7 +352,7 @@ class SafeOpen: def get_file_path_by_walk(root, filename): file_path = "" - for root, _, files in os.walk(root, topdown=True): + for root, _, files in PathManager.limited_depth_walk(root): for name in files: if name == filename: file_path = os.path.join(root, name) diff --git a/profiler/msprof_analyze/cluster_analyse/analysis/cluster_base_info_analysis.py b/profiler/msprof_analyze/cluster_analyse/analysis/cluster_base_info_analysis.py index 664ef3ba1..c7666020f 100644 --- a/profiler/msprof_analyze/cluster_analyse/analysis/cluster_base_info_analysis.py +++ b/profiler/msprof_analyze/cluster_analyse/analysis/cluster_base_info_analysis.py @@ -82,7 +82,7 @@ class ClusterBaseInfoAnalysis(BaseAnalysis): def get_profiler_metadata_file(self): meta_file_list = [] - for root, _, files in os.walk(self.collection_path): + for root, _, files in PathManager.limited_depth_walk(self.collection_path): for file_name in files: if file_name == Constant.PROFILER_METADATA: meta_file_list.append(os.path.join(root, file_name)) diff --git a/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py b/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py index 233bc5b3b..fb1b1ccba 100644 --- a/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py +++ b/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py @@ -73,7 +73,7 @@ class Interface: ascend_pt_dirs = [] ascend_ms_dirs = [] prof_dirs = [] - for root, dirs, _ in os.walk(self.collection_path): + for root, dirs, _ in PathManager.limited_depth_walk(self.collection_path): for dir_name in dirs: if dir_name.endswith(self.ASCEND_PT): ascend_pt_dirs.append(os.path.join(root, dir_name)) diff --git a/profiler/msprof_analyze/prof_common/path_manager.py b/profiler/msprof_analyze/prof_common/path_manager.py index 0b92ebc2d..8062fc8f3 100644 --- a/profiler/msprof_analyze/prof_common/path_manager.py +++ b/profiler/msprof_analyze/prof_common/path_manager.py @@ -221,3 +221,12 @@ class PathManager: def expanduser_for_argumentparser(cls, str_name: str): # None 对应 参数未赋值的场景 return str_name if str_name is None else os.path.expanduser(str_name.lstrip('=')) + + @classmethod + def limited_depth_walk(cls, path, max_depth=10, *args, **kwargs): + base_depth = path.count(os.sep) + for root, dirs, files in os.walk(path, *args, **kwargs): + if root.count(os.sep) - base_depth > max_depth: + dirs.clear() + continue + yield root, dirs, files \ No newline at end of file -- Gitee