diff --git a/debug/accuracy_tools/grad_tool/grad_comparator.py b/debug/accuracy_tools/grad_tool/grad_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..9cd3768e0c8ec24e48c28b2f06aeb2a09b0fba47 --- /dev/null +++ b/debug/accuracy_tools/grad_tool/grad_comparator.py @@ -0,0 +1,104 @@ +import os +import torch +from tqdm import tqdm +import matplotlib.pyplot as plt +from grad_tool.utils import write_csv, path_check + + +class GradComparator: + @staticmethod + def compare(path1: str, path2: str, output_dir): + steps = GradComparator._get_steps(path1, path2) + if len(steps) == 0: + raise Exception("no step for comparison") + similarities = {} + print(f"the following steps will be compared:\n{steps}") + for step in tqdm(steps, desc="culculate similarities"): + pt_files = GradComparator._get_pt_files(path1, path2, step) + same_count_summary = 0 + total_count_summary = 0 + for pt_file in pt_files: + pt1 = f'{path1}/step_{step}/{pt_file}' + pt2 = f'{path2}/step_{step}/{pt_file}' + same_count, total_count = GradComparator._calc_similarity(pt1, pt2) + same_count_summary += same_count + total_count_summary += total_count + if pt_file not in similarities: + similarities[pt_file] = [] + if total_count == 0: + similarities[pt_file].append(0) + else: + similarities[pt_file].append(same_count / total_count) + if "summary" not in similarities: + similarities["summary"] = [] + if total_count_summary == 0: + similarities["summary"].append(0) + else: + similarities["summary"].append(same_count_summary / total_count_summary) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + GradComparator._save_similar(similarities, steps, output_dir) + + @staticmethod + def _get_steps(path1: str, path2: str): + path_check(path1, isdir=True) + path_check(path2, isdir=True) + steps = [] + for folder1 in os.listdir(path1): + splits = folder1.split('_') + if len(splits) == 0 or splits[0] != 'step' or not splits[1].isdigit(): + continue + + folder2 = f'{path2}/{folder1}' + if not os.path.exists(folder2): + continue + steps.append(int(splits[1])) + steps = sorted(steps) + return steps + + @staticmethod + def _get_pt_files(path1: str, path2: str, step: int): + path1 = f'{path1}/step_{step}' + path2 = f'{path2}/step_{step}' + path_check(path1, isdir=True) + path_check(path2, isdir=True) + pt_files = [] + for folder1 in os.listdir(path1): + splits = folder1.split('.') + if len(splits) < 1 or splits[-1] != 'pt': + continue + folder2 = f'{path2}/{folder1}' + if not os.path.exists(folder2): + continue + pt_files.append(folder1) + return sorted(pt_files) + + @staticmethod + def _save_similar(similarities: [float], steps: [int], output_dir: str): + if len(similarities) == 0: + raise Exception(f"length of similarities is 0") + for key, value in tqdm(similarities.items(), desc="save similarities"): + if len(value) != len(steps): + raise Exception(f"similarities length of {key}:{len(value)} not equal steps:{len(steps)}") + plt.plot(steps, value) + plt.xlabel('steps') + plt.ylabel('similarities') + plt.title(f'{key}_similarities') + plt.savefig(f'{output_dir}/{key}_similarities.png') + plt.close() + head_tuple = tuple(['step'] + [str(step) for step in steps]) + write_csv(f"{output_dir}/{key}_similarities.csv", [['similarity'] + value], head_tuple) + + @staticmethod + def _calc_similarity(pt_file1: str, pt_file2: str): + tensor1 = torch.load(pt_file1) + tensor2 = torch.load(pt_file2) + if tensor1.shape != tensor2.shape: + raise Exception(f"tensor shape is not equal: {pt_file1}, {pt_file2}") + if tensor1.dtype != torch.bool: + raise Exception(f"tensor type is not bool: {pt_file1}") + if tensor2.dtype != torch.bool: + raise Exception(f"tensor type is not bool: {pt_file2}") + same_count = (tensor1 == tensor2).sum().item() + total_count = tensor1.numel() + return same_count, total_count \ No newline at end of file diff --git a/debug/accuracy_tools/grad_tool/utils.py b/debug/accuracy_tools/grad_tool/utils.py index 4e563d9928112bb95017ba7b8ec7bd281cb0832d..7417668a6febb30898284db8e59a4a44a0a284d9 100644 --- a/debug/accuracy_tools/grad_tool/utils.py +++ b/debug/accuracy_tools/grad_tool/utils.py @@ -5,6 +5,7 @@ import torch import pandas as pd from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileOpen, create_directory, \ check_link, FileChecker, FileCheckConst +from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import check_file_or_directory_path class ListCache(list): @@ -115,3 +116,7 @@ def get_rank_id(tensor): if rank is not None: return rank return os.getpid() + + +def path_check(path, isdir=False): + check_file_or_directory_path(path, isdir) \ No newline at end of file