diff --git a/profiler/README.md b/profiler/README.md index 05460f77f58eda76eb64d06cbe5fbfb270b95ba3..63bfdcb12ccc2516a07fcea6e5768b21403063cc 100644 --- a/profiler/README.md +++ b/profiler/README.md @@ -107,7 +107,7 @@ Successfully installed msprof-analyze-{version} | profiler版本 | 发布日期 | 下载链接 | 校验码 | |------------|------------|-------------------------------------------------------------------------------------------------------------------------------------------| ------------------------------------------------------------ | - | 1.2.3 | 2024-08-29 | [msprof_analyze-1.2.3-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.2.3/msprof_analyze-1.2.3-py3-none-any.whl) | 72aa827b8b09557cfb29684e13b496527d53087f6cac6803ddf9933335fa8e0c | + | 1.2.3 | 2024-08-29 | [msprof_analyze-1.2.3-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.2.3/msprof_analyze-1.2.3-py3-none-any.whl) | 553cd63161c8729c09646665c8a2e55d165855cd7c422a3982d6923ce1cd931c | | 1.2.2 | 2024-08-23 | [msprof_analyze-1.2.2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.2.2/msprof_analyze-1.2.2-py3-none-any.whl) | ed92a8e4eaf5ada8a2b4079072ec0cc42501b1b1f2eb00c8fdcb077fecb4ae02 | | 1.2.1 | 2024-08-14 | [msprof_analyze-1.2.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.2.1/msprof_analyze-1.2.1-py3-none-any.whl) | 7acd477417bfb3ea29029dadf175d019ad3212403b7e11dc1f87e84c2412c078 | | 1.2.0 | 2024-07-25 | [msprof_analyze-1.2.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.2.0/msprof_analyze-1.2.0-py3-none-any.whl) | 6a4366e3beca40b4a8305080e6e441d6ecafb5c05489e5905ac0265787555f37 | diff --git a/profiler/advisor/__init__.py b/profiler/advisor/__init__.py index e79018ed05c6d1cdeb56feaa6182f048e3c8e06f..8400fd5ecd1246eaee795cebfccfacc80a94f08c 100644 --- a/profiler/advisor/__init__.py +++ b/profiler/advisor/__init__.py @@ -12,6 +12,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - -from profiler.advisor.interface.interface import Interface \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py b/profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py index e9be4675963a9cd48da3b4cd91ee646f8e82468b..2cc1eebebc921510e44e8869bd70c423995efeb0 100644 --- a/profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py +++ b/profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py @@ -14,11 +14,14 @@ # limitations under the License. import os +import logging from abc import abstractmethod from common_func.constant import Constant from advice_base import AdviceBase from cluster_analysis import Interface +logger = logging.getLogger() + class ClusterAdviceBase(AdviceBase): def __init__(self, collection_path: str): @@ -37,11 +40,11 @@ class ClusterAdviceBase(AdviceBase): """ for file in os.listdir(self.collection_path): if file == 'cluster_analysis_output': - print("[INFO]Cluster has been analyzed " - "because of the existence of cluster analysis output directory.") - print("[INFO]Skip Cluster analyze backend.") + logger.info("Cluster has been analyzed " + "because of the existence of cluster analysis output directory.") + logger.info("Skip Cluster analyze backend.") return - print("[INFO] cluster analysis is in the process, please wait...") + logger.info("cluster analysis is in the process, please wait...") self.cluster_analyze() def cluster_analyze(self): diff --git a/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py b/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py index e9dcd263d6c2875ece7e94409191a7f9ceee1b27..4f4d9d753f88ca1396c69fe068a85a80d1da4072 100644 --- a/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py +++ b/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py @@ -45,4 +45,4 @@ class FusionOPAnalyzer(BaseAnalyzer): else: checker.find_fusion_matched_issues_with_times(graph_data, profiling_data) checker.make_record(self.result) - self.html = checker.make_render(self.html_render, add_render_list) \ No newline at end of file + self.html = checker.make_render(self.html_render) diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index 298e94fc18c171ff9b6e84aba035db782af4809d..ea8303c9d69d8cc611a58f821227402253337ce1 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -12,6 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os +import stat + # timeline DEQUEUE = "Dequeue" @@ -156,3 +159,11 @@ MAX_FILE_SIZE = 10 ** 10 MAX_NUM_PROCESSES = 4 DEFAULT_STEP = "-1" STEP_RANK_SEP = "_" + + +MAX_READ_LINE_BYTES = 8196 * 1024 +MAX_READ_FILE_BYTES = 64 * 1024 * 1024 * 1024 +MAX_READ_DB_FILE_BYTES = 8 * 1024 * 1024 * 1024 + +WRITE_MODES = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP +WRITE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC diff --git a/profiler/advisor/common/graph/graph_parser.py b/profiler/advisor/common/graph/graph_parser.py index ef4dc4d681e0664c12120c9c8904ad48970a5840..fe3f6bf29d6d41bde5acb5e4c17f6b848a7de31e 100644 --- a/profiler/advisor/common/graph/graph_parser.py +++ b/profiler/advisor/common/graph/graph_parser.py @@ -6,6 +6,7 @@ from dataclasses import dataclass from typing import List, Tuple, Dict from profiler.cluster_analyse.common_func.file_manager import FileManager +from profiler.advisor.utils.file import FileOpen logger = logging.getLogger() @@ -220,9 +221,9 @@ class HostGraphParser: def _parse(self, graph_file): # pylint:disable=broad-except graph_list = [] - with open(graph_file, "r", encoding="gbk") as file: + with FileOpen(graph_file, "r") as file: try: - graph_list = self._parse_line(file, graph_list) + graph_list = self._parse_line(file.file_reader, graph_list) except Exception: logger.error( "Parse line %s of file %s failed, make sure the format is correct.", self.line_no, graph_file diff --git a/profiler/advisor/common/profiling/ge_info.py b/profiler/advisor/common/profiling/ge_info.py index 4fd5846d88ddbab5d898c020b76537c1ec52db3b..5cdebe6da4a28e9a6d6a4c479e5e731f5c15712d 100644 --- a/profiler/advisor/common/profiling/ge_info.py +++ b/profiler/advisor/common/profiling/ge_info.py @@ -6,9 +6,11 @@ import os from typing import Any, List from sqlalchemy import text +from sqlalchemy.exc import SQLAlchemyError from profiler.advisor.dataset.profiling.db_manager import ConnectionManager from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser +from profiler.advisor.utils.utils import check_path_valid logger = logging.getLogger() @@ -33,9 +35,14 @@ class GeInfo(ProfilingParser): ge info """ db_path, db_file = os.path.split(profiling_db_file) + check_path_valid(db_path) if not ConnectionManager.check_db_exists(db_path, [db_file]): return False - conn = ConnectionManager(db_path, db_file) + try: + conn = ConnectionManager(db_path, db_file) + except SQLAlchemyError as e: + logger.error("Database error: %s", e) + return False if conn.check_table_exists(['TaskInfo']): with conn().connect() as sql_conn: self.op_state_info_list = sql_conn.execute(text("select op_name, op_state from TaskInfo")).fetchall() diff --git a/profiler/advisor/dataset/cluster/cluster_dataset.py b/profiler/advisor/dataset/cluster/cluster_dataset.py index e268b4092f63a15e5849cac10a3cdf78ffaf959a..fd8980dbf3dc8d5293e2226d98e870c9e39e277c 100644 --- a/profiler/advisor/dataset/cluster/cluster_dataset.py +++ b/profiler/advisor/dataset/cluster/cluster_dataset.py @@ -42,9 +42,9 @@ class ClusterDataset(Dataset): """ for filename in os.listdir(self.cluster_analysis_output_path): if filename == 'cluster_analysis_output': - logger.info("[INFO]Cluster has been analyzed " + logger.info("Cluster has been analyzed " "because of the existence of cluster analysis output directory.") - logger.info("[INFO]Skip Cluster analyze backend.") + logger.info("Skip Cluster analyze backend.") return True return False @@ -56,7 +56,7 @@ class ClusterDataset(Dataset): Constant.ANALYSIS_MODE: "all", Constant.CLUSTER_ANALYSIS_OUTPUT_PATH: self.cluster_analysis_output_path } - print("[INFO] cluster analysis is in the process, please wait...") + logger.info("cluster analysis is in the process, please wait...") try: Interface(parameter).run() except Exception as e: @@ -94,7 +94,7 @@ class ClusterStepTraceTimeDataset(ClusterDataset): try: step_data = self.load_csv_data(const.CLUSTER_STEP_TIME_CSV, ClusterStepTraceTimeBean) except RuntimeError as e: - print("捕获到异常:", e) + logger.error("捕获到异常:%s", e) self._step_dict = None return False self._step_dict = self.format_data(step_data) @@ -166,7 +166,7 @@ class ClusterCommunicationDataset(ClusterDataset): try: communication_json = self.load_json_data(const.CLUSTER_COMM_JSON) except RuntimeError as e: - print("捕获到异常:", e) + logger.error("捕获到异常:%s", e) self.rank_bw_dict = None return False self.process(communication_json) diff --git a/profiler/advisor/dataset/profiling/device_info.py b/profiler/advisor/dataset/profiling/device_info.py index 110cd0794c6cb153644b9d2e59c7d0793eb280b4..0c3201657962fce329f7869f893e2a7a86b9bba3 100644 --- a/profiler/advisor/dataset/profiling/device_info.py +++ b/profiler/advisor/dataset/profiling/device_info.py @@ -6,6 +6,7 @@ import logging from profiler.advisor.config.config import Config from profiler.advisor.utils.utils import get_file_path_from_directory +from profiler.advisor.utils.file import FileOpen logger = logging.getLogger() @@ -40,10 +41,16 @@ class DeviceInfoParser: if info_file.endswith("done"): return False # skip info.json.0.done try: - with open(info_file, encoding="utf-8") as file: - info = json.load(file) - except (IOError, ValueError) as error: - logger.error("Parse json info file %s failed : %s", info_file, error) + with FileOpen(info_file) as file: + info = json.load(file.file_reader) + except FileNotFoundError as error: + logger.error("Parse json info file %s failed due to FileNotFoundError : %s", info_file, error) + return False + except PermissionError as error: + logger.error("Parse json info file %s failed due to PermissionError : %s", info_file, error) + return False + except OSError as error: + logger.error("Parse json info file %s failed due to OSError : %s", info_file, error) return False if "DeviceInfo" not in info: logger.error("No device info in json info file %s", info_file) diff --git a/profiler/advisor/dataset/profiling/profiling_parser.py b/profiler/advisor/dataset/profiling/profiling_parser.py index 51996617c2b83a3a1e4d1f873140957c8ff68b51..41a3aba16d65869101598755414fdbd22fd2d80a 100644 --- a/profiler/advisor/dataset/profiling/profiling_parser.py +++ b/profiler/advisor/dataset/profiling/profiling_parser.py @@ -6,6 +6,7 @@ from typing import List, Dict from profiler.advisor.dataset.profiling.info_collection import logger from profiler.advisor.utils.utils import get_file_path_from_directory, SafeOpen, format_excel_title +from profiler.advisor.utils.file import FileOpen class ProfilingParser: @@ -101,8 +102,8 @@ class ProfilingParser: logger.debug("Parse file %s", file) self._filename = os.path.splitext(os.path.basename(file))[0] try: - with open(file, encoding="utf-8") as json_file: - self._raw_data = json.load(json_file) + with FileOpen(file) as json_file: + self._raw_data = json.load(json_file.file_reader) except (OSError, ValueError) as error: logger.error("Parse json file %s failed : %s", file, error) return False diff --git a/profiler/advisor/result/result.py b/profiler/advisor/result/result.py index fcc623d2f9015f341f2c934530df43e09944a2f9..4d251383c42ef83dd0236a798236c8a23537c166 100644 --- a/profiler/advisor/result/result.py +++ b/profiler/advisor/result/result.py @@ -1,7 +1,6 @@ import json import os import stat -from textwrap import fill from collections import OrderedDict import click @@ -11,6 +10,7 @@ from prettytable import ALL, PrettyTable from profiler.advisor.common import constant as const from profiler.advisor.utils.utils import singleton, logger from profiler.advisor.config.config import Config +from profiler.advisor.utils.file import FdOpen, check_dir_writable class ResultWriter: @@ -19,6 +19,7 @@ class ResultWriter: def __init__(self, result_path=None): self.result_path = result_path + check_dir_writable(os.path.dirname(result_path)) self.workbook = xlsxwriter.Workbook(result_path, {"nan_inf_to_errors": True}) self.header_format = None @@ -171,12 +172,17 @@ class OptimizeResult: tune_op_dict = {"tune_ops_name": self._tune_op_list} tune_ops_file = Config().tune_ops_file try: - - with os.fdopen(os.open(tune_ops_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR), - 'w', encoding="utf-8") as op_tune_file: + with FdOpen(tune_ops_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR, 'w') as \ + op_tune_file: json.dump(tune_op_dict, op_tune_file) + except PermissionError as error: + logger.error("Dump op_list to %s failed, %s due to PermissionError", tune_ops_file, error) + return + except FileNotFoundError as error: + logger.error("Dump op_list to %s failed, %s due to FileNotFoundError", tune_ops_file, error) + return except OSError as error: - logger.error("Dump op_list to %s failed, %s", tune_ops_file, error) + logger.error("Dump op_list to %s failed, %s due to OSError", tune_ops_file, error) return logger.info("Save tune op name list to %s", tune_ops_file) diff --git a/profiler/advisor/utils/file.py b/profiler/advisor/utils/file.py new file mode 100644 index 0000000000000000000000000000000000000000..66fd0597adee2c0c407b9dcb34957039399cdd2a --- /dev/null +++ b/profiler/advisor/utils/file.py @@ -0,0 +1,72 @@ +import os +import logging +from profiler.advisor.common import constant as const +from profiler.advisor.utils.utils import check_path_valid +from profiler.advisor.utils.log import get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) + + +class FileOpen: + """ + open and read file + """ + + def __init__(self: any, file_path: str, mode: str = "r", max_size: int = const.MAX_READ_FILE_BYTES) -> None: + self.file_path = file_path + self.file_reader = None + self.mode = mode + self.max_size = max_size + + def __enter__(self: any) -> any: + check_path_valid(self.file_path, True, max_size=self.max_size) + self.file_reader = open(self.file_path, self.mode) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.file_reader: + self.file_reader.close() + + +class FdOpen: + """ + creat and write file + """ + + def __init__(self: any, file_path: str, flags: int = const.WRITE_FLAGS, mode: int = const.WRITE_MODES, + operate: str = "w", newline: str = None) -> None: + self.file_path = file_path + self.flags = flags + self.newline = newline + self.mode = mode + self.operate = operate + self.fd = None + self.file_open = None + + def __enter__(self: any) -> any: + file_dir = os.path.dirname(self.file_path) + check_dir_writable(file_dir) + self.fd = os.open(self.file_path, self.flags, self.mode) + if self.newline is None: + self.file_open = os.fdopen(self.fd, self.operate) + else: + self.file_open = os.fdopen(self.fd, self.operate, newline=self.newline) + return self.file_open + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.file_open: + self.file_open.close() + elif self.fd: + os.close(self.fd) + + +def check_dir_writable(path: str) -> None: + """ + check path is dir and writable + """ + check_path_valid(path, False) + if not os.access(path, os.W_OK): + raise PermissionError(f"The path \"{path}\" does not have permission to write. " + f"Please check that the path is writeable.") + diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index 3cf9a35a9d4269d9585864a21f41b6607cd53a71..2e9fd47bb1c5f2582980f32f10cf9d940ddb1c0c 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -14,12 +14,9 @@ from functools import wraps from typing import Any, Set import ijson import click -import requests -from requests.adapters import HTTPAdapter from tqdm import tqdm from profiler.advisor.common import constant as const -from profiler.advisor.common.version_control import VersionControl from profiler.advisor.utils.log import init_logger, get_log_level logger = logging.getLogger() @@ -31,18 +28,6 @@ def ignore_warning(exception: Exception = None): return exception -class ContextObject(object): - def __init__(self): - self._debug = False - - def set_debug(self, debug=False): - self._debug = debug - - @property - def debug_mode(self): - return self._debug - - def debug_option(f): return click.option('--debug', is_flag=True, @@ -271,19 +256,6 @@ class ParallelJob: completed_queue.put(token) -def mp_queue_to_list(job_queue): - queue_list = [] - while True: - try: - if job_queue.empty(): - break - token = job_queue.get(timeout=1) - queue_list.append(token) - except queue.Empty: - continue - return queue_list - - def load_parameter(parameter, default): if not os.environ.get(parameter, None): return default @@ -291,21 +263,6 @@ def load_parameter(parameter, default): return os.environ.get(parameter) -def get_supported_subclass(clazz: VersionControl.__class__, cann_version: str): - """ - Returns a list of subclasses that support the specified version, because of the __subclasses__(), - you need to import the all subclass first - :param clazz: Class name which is extends to VersionControl.__class__ - :param cann_version: The CANN software version - :return: The list of subclasses that support the specified CANN version - """ - # 获取所有支持这个cann版本的子类 - dataset_classes = clazz.__subclasses__() - sub_class_list = [cls for cls in dataset_classes if cls.is_supported(cann_version)] - logger.debug("The support subclass list is %s, cann version is %s", str(sub_class_list), cann_version) - return sub_class_list - - def to_percent(num: float) -> str: """ change float to percent format @@ -476,99 +433,6 @@ class SafeOpen: return True -def save_downloaded_file(response, url_path, file_save_path): - """保存响应体中的文件 - - 参数: - response: 请求后获取的响应体 - url_path: url路径 - file_save_path: 保存路径 - 返回: - final_file_path: 文件保存绝对路径 - """ - # 获取url路径中的文件名, 拼接在保存路径下 - file_save_path = os.path.normpath(file_save_path) - file_name = os.path.basename(url_path) - final_file_path = os.path.join(file_save_path, file_name) - # 若目标保存路径不存在,则自动生成 - if not os.path.exists(file_save_path): - os.makedirs(file_save_path) - if response.status_code <= 300: - logger.debug("Response status code is %s", response.status_code) - flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL - modes = stat.S_IWUSR | stat.S_IRUSR - # 若文件已存在,则移除已有的文件并保存最新的文件 - if os.path.exists(final_file_path): - os.remove(final_file_path) - # 保存文件 - with os.fdopen(os.open(final_file_path, flags, modes), mode="wb") as f: - f.write(response.content) - logger.info("Success to save content in: %s", os.path.abspath(final_file_path)) - else: - # 若响应码不为预期的数值, 显示相应告警 - logger.warning("Failed to save the response body. The response status code is %s. " - "Please check the network or try another region", response.status_code) - - -def request_with_retry(url_path, region_name=None): - """使用requests请求获取文件, 失败则进行重试, 最多请求 max_retries+1 次 - - 参数: - url_path: URL路径 - file_save_path: 云文件保存路径 - """ - logger.debug("Requesting or retrying to get file from region: %s", region_name) - - # 若从环境变量指定了保存路径,优先从环境变量中获取,若为空则使用默认的云文件保存路径constant.CLOUD_RULE_PATH - file_save_path = os.path.join(os.path.expanduser("~"), const.CLOUD_RULE_PATH) - if os.getenv(const.ADVISOR_RULE_PATH): - file_save_path = os.getenv(const.ADVISOR_RULE_PATH) - - session = requests.Session() - # 使用session发起的所有请求, 默认最多会重试 max_retries 次, 计入最初请求, 最差情况下请求 max_retries+1 次 - adapter = HTTPAdapter(max_retries=const.MAX_RETRIES) - session.mount(const.HTTP_PREFIXES, adapter) - session.mount(const.HTTPS_PREFIXES, adapter) - - logger.debug('Session try to get response') - response = None - try: - response = session.get(url_path, timeout=const.TIMEOUT) - except Exception as e: - logger.debug("Error: %s: %s", e, traceback.format_exc()) - - if response is None: - logger.warning("Fail to download file from region: %s, response is None, " - "please use the environment variable %s for more detailed information", - region_name, const.ADVISOR_LOG_LEVEL) - else: - try: - # 若响应码为400~600之间,response.raise_for_status抛出HTTPError错误, 跳过调用save_downloaded_file函数逻辑 - response.raise_for_status() - save_downloaded_file(response, url_path=url_path, file_save_path=file_save_path) - except Exception as e: - logger.warning("Error: %s: %s", e, traceback.format_exc()) - # 关闭 session, 清除所有装配器 - session.close() - - -def read_csv(file): - import csv - - raw_data = [] - logger.debug("Parse file %s", file) - with SafeOpen(file, encoding="utf-8") as csv_file: - try: - csv_content = csv.reader(csv_file) - for row in csv_content: - raw_data.append(row) - except OSError as error: - logger.error("Read csv file failed : %s", error) - return [] - - return raw_data - - def get_file_path_by_walk(root, filename): file_path = "" for root, _, files in os.walk(root, topdown=True): @@ -579,16 +443,33 @@ def get_file_path_by_walk(root, filename): return file_path -def check_path_valid(path): - if os.path.islink(os.path.abspath(path)): - logger.error("fThe path is detected as a soft connection. path:%ss", path) - return False - elif not os.access(path, os.R_OK): - logger.error(f"The file is not readable. path:%ss", path) - return False - elif os.path.getsize(path) > const.MAX_FILE_SIZE: - logger.error(f"The file size exceeds the limit. path:%ss, MAX_FILE_SIZE:%ss B", path, const.MAX_FILE_SIZE) - return False +def check_path_valid(path: str, is_file: bool = True, max_size: int = const.MAX_READ_FILE_BYTES) -> bool: + """ + check the path is valid or not + :param path: file path + :param is_file: file or not + :param max_size: file's max size + :return: bool + """ + if path == "": + raise FileNotFoundError("The path is empty. Please enter a valid path.") + if not os.path.exists(path): + raise FileNotFoundError(f"The path \"{path}\" does not exist. Please check that the path exists.") + if is_file: + if not os.path.isfile(path): + raise FileNotFoundError(f"The path \"{path}\" is not a file. Please check the path.") + if os.path.islink(path): + raise FileNotFoundError(f"The path \"{path}\" is link. Please check the path.") + if os.path.getsize(path) > max_size: + raise OSError(f"The path \"{path}\" is too large to read. Please check the path.") + else: + if not os.path.isdir(path): + raise FileNotFoundError(f"The path \"{path}\" is not a directory. Please check the path.") + if os.path.islink(path): + raise FileNotFoundError(f"The path \"{path}\" is link. Please check the path.") + if not os.access(path, os.R_OK): + raise PermissionError(f"The path \"{path}\" does not have permission to read. " + f"Please check that the path is readable.") return True diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py index 44f0e7105db6bedbdda1f8a230d0c7858b3e38c5..2e5df19827d49a3765f64dac60d14a1a5b28c260 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py @@ -1,5 +1,6 @@ from abc import abstractmethod, ABC from decimal import Decimal +import logging from compare_backend.compare_bean.origin_data_bean.compare_event import KernelEvent, MemoryEvent from compare_backend.compare_bean.origin_data_bean.kernel_details_bean import KernelDetailsBean @@ -8,6 +9,8 @@ from compare_backend.compare_bean.profiling_info import ProfilingInfo from compare_backend.utils.constant import Constant from compare_backend.utils.file_reader import FileReader +logger = logging.getLogger() + class ProfilingResult: @@ -313,19 +316,20 @@ class BaseProfilingParser(ABC): def _check_result_data(self): if self._enable_operator_compare or self._enable_memory_compare or self._enable_api_compare: if not self._result_data.torch_op_data: - print(f"[WARNING] Can't find any torch op in the file: {self._profiling_path}") + logger.warning("Can't find any torch op in the file: %s", self._profiling_path) if self._enable_operator_compare and not self._result_data.kernel_dict: - print(f"[WARNING] Can't find any flow event in the file: {self._profiling_path}") + logger.warning("Can't find any flow event in the file: %s", self._profiling_path) if self._enable_memory_compare and not self._result_data.memory_list: - print(f"[WARNING] Can't find any memory event in the file: {self._profiling_path}") + logger.warning("Can't find any memory event in the file: %s", self._profiling_path) if self._enable_communication_compare and not self._result_data.communication_dict: - print(f"[WARNING] Can't find any communication op in the file: {self._profiling_path}") + logger.warning("Can't find any communication op in the file: %s", self._profiling_path) if self._enable_kernel_compare and not self._result_data.kernel_details: if self._profiling_type == Constant.GPU: - print(f"[WARNING] kernel compare only support between NPU data and NPU data.") + logger.warning(f"kernel compare only support between NPU data and NPU data.") else: - print(f"[WARNING] Can't find any valid kernels in the file: {self._profiling_path}. Please " - f"make sure that the profiling data is greater than level0 and aic_metrics=PipeUtilization.") + logger.warning("Can't find any valid kernels in the file: %s. Please " + "make sure that the profiling data is greater than level0 and " + "aic_metrics=PipeUtilization.", self._profiling_path) def _read_trace_event(self): try: diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 8f2714d9cd488c6c9cd9d684a749108c90d6b9b2..28d25e3e1c1f0aa4a8e62ce9a51584c582fc9c8b 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -1,5 +1,6 @@ import os import sys +import logging from math import ceil from compare_backend.compare_bean.origin_data_bean.kernel_details_bean import KernelDetailsBean @@ -11,6 +12,9 @@ from compare_backend.utils.constant import Constant from compare_backend.utils.file_reader import FileReader +logger = logging.getLogger() + + class NPUProfilingParser(BaseProfilingParser): FLOW_CAT = "async_npu" TORCH_OP_CAT = "cpu_op" @@ -85,9 +89,9 @@ class NPUProfilingParser(BaseProfilingParser): try: kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) except FileNotFoundError: - print("[WARNING] The file kernel_details.csv does not exist.") + logger.warning("The file kernel_details.csv does not exist.") except Exception: - print("[ERROR] Failed to read kernel_details.csv.") + logger.warning("Failed to read kernel_details.csv.") return if not kernel_details: return @@ -106,7 +110,7 @@ class NPUProfilingParser(BaseProfilingParser): " please check whether the data contains this step." raise RuntimeError(msg) else: - print("[WARNING] Failed to enable enable_kernel_compare, type of kernel_details.csv is null.") + logger.warning("Failed to enable enable_kernel_compare, type of kernel_details.csv is null.") return self._result_data.update_kernel_details(kernels_dict) @@ -114,10 +118,10 @@ class NPUProfilingParser(BaseProfilingParser): try: memory_data = FileReader.read_csv_file(self._operator_memory_path, OperatorMemoryBean) except FileNotFoundError: - print("[WARNING] The file operator_memory.csv does not exist.") + logger.warning("The file operator_memory.csv does not exist.") return except Exception: - print("[ERROR] Failed to read operator_memory.csv.") + logger.error("Failed to read operator_memory.csv.") return if memory_data: self._dequeue_data.sort(key=lambda x: x.start_time) @@ -156,12 +160,12 @@ class NPUProfilingParser(BaseProfilingParser): try: communication_json = FileReader.read_trace_file(self._communication_path) except FileNotFoundError: - print("[WARNING] The file communication.json does not exist.") + logger.warning("The file communication.json does not exist.") except Exception: - print("[ERROR] Failed to read communication.json.") + logger.error("Failed to read communication.json.") return if not communication_json: - print("[WARNING] The communication.json file is empty.") + logger.warning("The communication.json file is empty.") return for _, group_dict in communication_json.items(): step_dict = group_dict.get("collective", {}) @@ -317,10 +321,10 @@ class NPUProfilingParser(BaseProfilingParser): try: json_data = FileReader.read_trace_file(self._info_json_path) except Exception: - print('[ERROR] Failed to read profiler_info.json.') + logger.error('Failed to read profiler_info.json.') return if not isinstance(json_data, dict) or not json_data: - print('[WARNING] Invalid profiler info.') + logger.warning('Invalid profiler info.') return level = json_data.get('config', {}).get('experimental_config', {}).get('_profiler_level', '') if self.LEVEL_0 != level: @@ -339,7 +343,7 @@ class NPUProfilingParser(BaseProfilingParser): try: kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) except Exception: - print('[ERROR] Npu kernel details csv file is not available.') + logger.error('Npu kernel details csv file is not available.') return if not kernel_details or kernel_details[0].is_hide_op_pmu(): self._result_data.overall_metrics.hide_op_details = True @@ -355,16 +359,16 @@ class NPUProfilingParser(BaseProfilingParser): try: memory_record = FileReader.read_csv_file(self._memory_record_path, MemoryRecordBean) except FileNotFoundError: - print('[WARNING] Npu memory record csv file is not available.') + logger.warning('Npu memory record csv file is not available.') except Exception: - print('[ERROR] Load memory info failed.') + logger.error('Load memory info failed.') else: memory_used = max([memory.total_reserved_mb for memory in memory_record]) / 1024 self._result_data.overall_metrics.set_memory_used(memory_used) def __add_overlap_analysis_time(self): if not self._overlap_analysis: - print('[WARNING] Failed to get overlap analysis data.') + logger.warning('Failed to get overlap analysis data.') return min_ts = sys.float_info.max max_ts = sys.float_info.min diff --git a/profiler/requirements/build.txt b/profiler/requirements/build.txt index ec879a35366820f589906321186420754385a9a7..a5763cf474ca1a21d9439b7a166d6d9afc84c44b 100644 --- a/profiler/requirements/build.txt +++ b/profiler/requirements/build.txt @@ -11,5 +11,5 @@ xlsxwriter sqlalchemy urllib3<2.0 bottleneck>=1.3.6 -numpy==1.26.4 +numpy<=1.26.4 pandas \ No newline at end of file