diff --git a/debug/accuracy_tools/msprobe/mindspore/common/utils.py b/debug/accuracy_tools/msprobe/mindspore/common/utils.py index dc4444e78635e2530eb6f74b2d19ab2d9fb93b9d..d86060dace900a99311715baf428b321c30f2fe1 100644 --- a/debug/accuracy_tools/msprobe/mindspore/common/utils.py +++ b/debug/accuracy_tools/msprobe/mindspore/common/utils.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ - +import os import mindspore as ms from msprobe.core.common.exceptions import DistributedNotInitializedError -from msprobe.core.common.file_check import path_len_exceeds_limit +from msprobe.core.common.file_check import path_len_exceeds_limit, check_path_exists from msprobe.core.common.utils import save_npy from msprobe.core.common.log import logger @@ -43,6 +43,30 @@ def save_tensor_as_npy(tensor, file_path): logger.warning(f'The file path {file_path} length exceeds limit.') +def convert_to_int(value): + try: + return int(value) + except Exception: + return -1 + + +def list_lowest_level_directories(root_dir): + check_path_exists(root_dir) + lowest_level_dirs = [] + + def recurse_dirs(current_dir): + for entry in os.listdir(current_dir): + full_path = os.path.join(current_dir, entry) + if os.path.isdir(full_path): + if any(os.path.isdir(os.path.join(full_path, subentry)) for subentry in os.listdir(full_path)): + recurse_dirs(full_path) + else: + lowest_level_dirs.append(full_path) + + recurse_dirs(root_dir) + return lowest_level_dirs + + class MsprobeStep(ms.train.Callback): def __init__(self, debugger): diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py index 69d95ca5e9422191a1325698b5477ce357c29e23..801e77ca5fda036e17ab1bbd432589b74fc93358 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py @@ -1,19 +1,17 @@ +import copy import csv import glob import os -import sys -import copy import numpy as np import pandas as pd from msprobe.core.common.const import CompareConst, GraphMode -from msprobe.core.common.exceptions import FileCheckException -from msprobe.core.common.file_check import create_directory -from msprobe.mindspore.common.log import logger +from msprobe.core.common.file_check import FileOpen +from msprobe.core.common.log import logger from msprobe.core.common.utils import add_time_with_xlsx, CompareException from msprobe.core.compare.multiprocessing_compute import _ms_graph_handle_multi_process, check_accuracy from msprobe.core.compare.npy_compare import npy_data_check, statistics_data_check, reshape_value, compare_ops_apply -from msprobe.core.common.file_check import FileOpen +from msprobe.mindspore.common.utils import convert_to_int, list_lowest_level_directories class row_data: def __init__(self, mode): @@ -147,8 +145,8 @@ class GraphMSComparator: self.output_path = output_path self.base_npu_path = input_param.get('npu_path', None) self.base_bench_path = input_param.get('bench_path', None) - self.rank_list = input_param.get('rank_id', []) - self.step_list = input_param.get('step_id', []) + self.rank_list = [convert_to_int(rank_id) for rank_id in input_param.get('rank_id', [])] + self.step_list = [convert_to_int(step_id) for step_id in input_param.get('step_id', [])] @staticmethod def compare_ops(compare_result_db, mode):