diff --git a/profiler/msprof_analyze/advisor/analyzer/base_analyzer.py b/profiler/msprof_analyze/advisor/analyzer/base_analyzer.py index 7f558c3871a37c5f13c89b0bd6840de6b073d1f3..b6b8fe2e04476ec3c70df25823ef5355c02dc98c 100644 --- a/profiler/msprof_analyze/advisor/analyzer/base_analyzer.py +++ b/profiler/msprof_analyze/advisor/analyzer/base_analyzer.py @@ -27,6 +27,7 @@ from msprof_analyze.advisor.display.html.render import HTMLRender from msprof_analyze.advisor.display.html.priority_background_color import PriorityBackgroundColor from msprof_analyze.advisor.utils.utils import safe_division from msprof_analyze.prof_common.file_manager import FileManager +from msprof_analyze.prof_common.path_manager import PathManager logger = logging.getLogger() @@ -114,7 +115,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): elif self.collection_path.endswith(ASCEND_PT): profiling_type = [elem for elem in profiling_type_list if Constant.PYTORCH in elem][0] else: - for _, dirs, __ in os.walk(self.collection_path): + for _, dirs, __ in PathManager.limited_depth_walk(self.collection_path): is_found_type = False for direction in dirs: if direction.endswith(ASCEND_MS): @@ -143,7 +144,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): if self.collection_path.endswith(ASCEND_MS): ascend_dirs.append(self.collection_path) else: - for root, dirs, _ in os.walk(self.collection_path): + for root, dirs, _ in PathManager.limited_depth_walk(self.collection_path): for direction in dirs: if direction.endswith(ASCEND_MS): ascend_dirs.append(os.path.join(root, direction)) diff --git a/profiler/msprof_analyze/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/msprof_analyze/advisor/analyzer/overall/overall_summary_analyzer.py index 1bfaf8d611964af8d3a23544d630eeddd116206b..0f2cb83034c2fb669311d3fd638970e1650e3d1f 100644 --- a/profiler/msprof_analyze/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/msprof_analyze/advisor/analyzer/overall/overall_summary_analyzer.py @@ -23,6 +23,7 @@ from msprof_analyze.advisor.result.result import OptimizeResult from msprof_analyze.compare_tools.compare_interface.comparison_interface import ComparisonInterface from msprof_analyze.prof_common.additional_args_manager import AdditionalArgsManager from msprof_analyze.prof_common.constant import Constant +from msprof_analyze.prof_common.path_manager import PathManager class OverallSummaryAnalyzer(BaseAnalyzer): @@ -237,7 +238,7 @@ class OverallSummaryAnalyzer(BaseAnalyzer): def get_profile_path(collection_path): - for root, _, files in os.walk(collection_path): + for root, _, files in PathManager.limited_depth_walk(collection_path): for file in files: if file.startswith("profiler_info"): return root diff --git a/profiler/msprof_analyze/advisor/dataset/communication/communication_dataset.py b/profiler/msprof_analyze/advisor/dataset/communication/communication_dataset.py index eed7e299589f26c132c83638a579811d2faa1557..acd44ac137e2e0ebc05ea6a1ffef64f45cc4c628 100644 --- a/profiler/msprof_analyze/advisor/dataset/communication/communication_dataset.py +++ b/profiler/msprof_analyze/advisor/dataset/communication/communication_dataset.py @@ -24,6 +24,7 @@ from msprof_analyze.cluster_analyse.common_func.table_constant import TableConst from msprof_analyze.prof_common.singleton import singleton from msprof_analyze.prof_common.constant import Constant from msprof_analyze.prof_common.file_manager import FileManager +from msprof_analyze.prof_common.path_manager import PathManager from msprof_analyze.advisor.dataset.cluster.hccl_collection import HcclInfo from msprof_analyze.advisor.utils.utils import CheckPathAccess from msprof_analyze.advisor.dataset.dataset import Dataset @@ -69,7 +70,7 @@ class CommunicationDataset(Dataset): if not os.path.isdir(path): logger.warning("Expected existed directory, but got %s", path) - for root, _, files in os.walk(path): + for root, _, files in PathManager.limited_depth_walk(path): if os.path.basename(root) != "ASCEND_PROFILER_OUTPUT": continue for filename in files: diff --git a/profiler/msprof_analyze/advisor/dataset/dataset.py b/profiler/msprof_analyze/advisor/dataset/dataset.py index 0bd2dacde6caaa874bcba600eb34fe070054837d..f8149f1088264300aae6517159607bbccdb74edd 100644 --- a/profiler/msprof_analyze/advisor/dataset/dataset.py +++ b/profiler/msprof_analyze/advisor/dataset/dataset.py @@ -22,6 +22,7 @@ import re from msprof_analyze.prof_common.constant import Constant from msprof_analyze.prof_common.file_manager import FileManager +from msprof_analyze.prof_common.path_manager import PathManager from msprof_analyze.advisor.config.config import Config logger = logging.getLogger() @@ -64,7 +65,7 @@ class Dataset: def get_data_type(self): # 递归搜索ASCEND_PROFILER_PATH文件夹 - for root, dirs, _ in os.walk(self.collection_path): + for root, dirs, _ in PathManager.limited_depth_walk(self.collection_path): if Constant.ASCEND_PROFILER_OUTPUT in dirs: profiler_dir = os.path.join(root, Constant.ASCEND_PROFILER_OUTPUT) diff --git a/profiler/msprof_analyze/advisor/dataset/environment_variable_dataset.py b/profiler/msprof_analyze/advisor/dataset/environment_variable_dataset.py index 6fa569ad9fad2149fdb35ebdf145a633f416031d..43c5e1560b3fb460113b5b16b3fe630ccce702e4 100644 --- a/profiler/msprof_analyze/advisor/dataset/environment_variable_dataset.py +++ b/profiler/msprof_analyze/advisor/dataset/environment_variable_dataset.py @@ -17,6 +17,7 @@ import logging from msprof_analyze.prof_common.constant import Constant from msprof_analyze.prof_common.file_manager import FileManager +from msprof_analyze.prof_common.path_manager import PathManager class EnvironmentVariableDataset: @@ -27,7 +28,7 @@ class EnvironmentVariableDataset: @staticmethod def get_env_data_file(collection_path: str) -> str: - for root, _, files in os.walk(collection_path): + for root, _, files in PathManager.limited_depth_walk(collection_path): for file_name in files: if file_name == Constant.PROFILER_METADATA: return os.path.join(root, file_name) diff --git a/profiler/msprof_analyze/advisor/utils/utils.py b/profiler/msprof_analyze/advisor/utils/utils.py index 001949d6e29c04837f7bb527a3bce2547f58f116..72d3e9952de18fdd43b466eae8e43f3ad4caf8c5 100644 --- a/profiler/msprof_analyze/advisor/utils/utils.py +++ b/profiler/msprof_analyze/advisor/utils/utils.py @@ -33,6 +33,7 @@ from tqdm import tqdm from msprof_analyze.advisor.utils.log import init_logger, get_log_level from msprof_analyze.prof_common.constant import Constant from msprof_analyze.prof_common.singleton import singleton +from msprof_analyze.prof_common.path_manager import PathManager logger = logging.getLogger() logger.setLevel(get_log_level()) @@ -109,7 +110,7 @@ def get_file_path_from_directory(path: str, check_func: Any) -> list: get file from directory """ file_list = [] - for root, _, files in os.walk(path, onerror=walk_error_handler): + for root, _, files in PathManager.limited_depth_walk(path, onerror=walk_error_handler): for filename in files: filepath = os.path.join(root, filename) if check_func(filename): @@ -267,7 +268,7 @@ def get_file_path_from_directory(path, check_func): if not os.path.isdir(path): logger.warning("Expected existed directory, but got %s", path) - for root, _, files in os.walk(path): + for root, _, files in PathManager.limited_depth_walk(path): for filename in files: filepath = os.path.join(root, filename) if check_func(filename): @@ -288,7 +289,7 @@ def join_prof_path(root_dir: str, sub_dir: str) -> str: regular expression matching method for path concatenation """ if is_regex_pattern(sub_dir): - for root, _, _ in os.walk(root_dir, onerror=walk_error_handler): + for root, _, _ in PathManager.limited_depth_walk(root_dir, onerror=walk_error_handler): if re.match(sub_dir, os.path.basename(root)): return root logger.debug("Fail to get profiling path %s from local path %s by regular expression matching", sub_dir, @@ -351,7 +352,7 @@ class SafeOpen: def get_file_path_by_walk(root, filename): file_path = "" - for root, _, files in os.walk(root, topdown=True): + for root, _, files in PathManager.limited_depth_walk(root): for name in files: if name == filename: file_path = os.path.join(root, name) diff --git a/profiler/msprof_analyze/cluster_analyse/analysis/cluster_base_info_analysis.py b/profiler/msprof_analyze/cluster_analyse/analysis/cluster_base_info_analysis.py index 664ef3ba1a424eec7b673a0fe765ca390ec372b4..c7666020f9430173570eb2631b0b7352da58bfdb 100644 --- a/profiler/msprof_analyze/cluster_analyse/analysis/cluster_base_info_analysis.py +++ b/profiler/msprof_analyze/cluster_analyse/analysis/cluster_base_info_analysis.py @@ -82,7 +82,7 @@ class ClusterBaseInfoAnalysis(BaseAnalysis): def get_profiler_metadata_file(self): meta_file_list = [] - for root, _, files in os.walk(self.collection_path): + for root, _, files in PathManager.limited_depth_walk(self.collection_path): for file_name in files: if file_name == Constant.PROFILER_METADATA: meta_file_list.append(os.path.join(root, file_name)) diff --git a/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py b/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py index 233bc5b3bc7579644119f263b1ad0be32808988f..fb1b1ccba8fe1c430ca0c36f8e1bc31da60a2f81 100644 --- a/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py +++ b/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py @@ -73,7 +73,7 @@ class Interface: ascend_pt_dirs = [] ascend_ms_dirs = [] prof_dirs = [] - for root, dirs, _ in os.walk(self.collection_path): + for root, dirs, _ in PathManager.limited_depth_walk(self.collection_path): for dir_name in dirs: if dir_name.endswith(self.ASCEND_PT): ascend_pt_dirs.append(os.path.join(root, dir_name)) diff --git a/profiler/msprof_analyze/prof_common/path_manager.py b/profiler/msprof_analyze/prof_common/path_manager.py index 0b92ebc2de1365e2e05a576ed3e471d09da4d238..8062fc8f3f1eef7f2feeb22844512ae58331b4a4 100644 --- a/profiler/msprof_analyze/prof_common/path_manager.py +++ b/profiler/msprof_analyze/prof_common/path_manager.py @@ -221,3 +221,12 @@ class PathManager: def expanduser_for_argumentparser(cls, str_name: str): # None 对应 参数未赋值的场景 return str_name if str_name is None else os.path.expanduser(str_name.lstrip('=')) + + @classmethod + def limited_depth_walk(cls, path, max_depth=10, *args, **kwargs): + base_depth = path.count(os.sep) + for root, dirs, files in os.walk(path, *args, **kwargs): + if root.count(os.sep) - base_depth > max_depth: + dirs.clear() + continue + yield root, dirs, files \ No newline at end of file