From d31c0ee251b859724351b84e105631abcfc48b6e Mon Sep 17 00:00:00 2001 From: wangqingcai Date: Thu, 15 May 2025 19:57:05 +0800 Subject: [PATCH] bugfix: fix test_csv2tb ut --- .../test/pytorch_ut/monitor/test_csv2tb.py | 532 +++++++++--------- 1 file changed, 267 insertions(+), 265 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py index d9e898c1d3..abe46bf49c 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py @@ -17,7 +17,6 @@ import os import shutil import random import unittest -import pytest import torch import numpy as np import torch.nn as nn @@ -30,13 +29,9 @@ from msprobe.pytorch.hook_module.api_register import get_api_register get_api_register().restore_all_api() - base_dir = os.path.dirname(os.path.realpath(__file__)) config_json_path = os.path.join(base_dir, "config", "all_config.json") monitor_output = os.path.join(base_dir, "./monitor_output_csv2tb") -os.environ[MonitorConst.MONITOR_OUTPUT_DIR] = monitor_output -timestamp_dirpath = None -csv2tb_dirpath = None def seed_all(seed=1234, mode=False): @@ -46,8 +41,8 @@ def seed_all(seed=1234, mode=False): torch.manual_seed(seed) torch.use_deterministic_algorithms(mode) -seed_all() +seed_all() inputs = [torch.rand(10, 10) for _ in range(10)] labels = [torch.randint(0, 5, (10,)) for _ in range(10)] @@ -65,31 +60,6 @@ class MockModule(nn.Module): return x2 -def data_collect(): - loss_fun = nn.CrossEntropyLoss() - test_module = MockModule() - nn.init.constant_(test_module.linear.weight, 1.0) - nn.init.constant_(test_module.linear.bias, 1.0) - optimizer = torch.optim.Adam(test_module.parameters()) - - monitor = TrainerMon(config_json_path, params_have_main_grad=False) - monitor.set_monitor(test_module, grad_acc_steps=1, optimizer=optimizer) - - for input_data, label in zip(inputs, labels): - output = test_module(input_data) - loss = loss_fun(output, label) - optimizer.zero_grad() - loss.backward() - optimizer.step() - - global timestamp_dirpath, csv2tb_dirpath - timestamp_dirpath = os.path.join(monitor_output, os.listdir(monitor_output)[0]) - csv2tensorboard_by_step(monitor_output) - for dirname in os.listdir(monitor_output): - if "csv2tensorboard" in dirname: - csv2tb_dirpath = os.path.join(monitor_output, dirname, "rank0") - - def extract_scalars_from_tensorboard(log_dir): # 初始化 EventAccumulator event_acc = EventAccumulator(log_dir) @@ -144,97 +114,102 @@ def compare_scalar_dicts(dict1, dict2): return True -@pytest.fixture(scope="session") -def setup_all(): - data_collect() - yield - shutil.rmtree(monitor_output) - -@pytest.mark.usefixtures("setup_all") class TestGradMonitor(unittest.TestCase): + timestamp_dirpath = None + csv2tb_dirpath = None + + @classmethod + def setUpClass(cls): + + os.environ[MonitorConst.MONITOR_OUTPUT_DIR] = monitor_output + if os.path.exists(monitor_output): + shutil.rmtree(monitor_output) + + loss_fun = nn.CrossEntropyLoss() + test_module = MockModule() + nn.init.constant_(test_module.linear.weight, 1.0) + nn.init.constant_(test_module.linear.bias, 1.0) + optimizer = torch.optim.Adam(test_module.parameters()) + + monitor = TrainerMon(config_json_path, params_have_main_grad=False) + monitor.set_monitor(test_module, grad_acc_steps=1, optimizer=optimizer) + + for input_data, label in zip(inputs, labels): + output = test_module(input_data) + loss = loss_fun(output, label) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + cls.timestamp_dirpath = os.path.join(monitor_output, os.listdir(monitor_output)[0]) + csv2tensorboard_by_step(monitor_output) + for dirname in os.listdir(monitor_output): + if "csv2tensorboard" in dirname: + cls.csv2tb_dirpath = os.path.join(monitor_output, dirname, "rank0") + os.environ.pop(MonitorConst.MONITOR_OUTPUT_DIR) def setUp(self): self.maxDiff = None - + def test_actv(self): - data = parse_step_fn(os.path.join(timestamp_dirpath,"actv_0-2.csv")) + data = parse_step_fn(os.path.join(self.timestamp_dirpath, "actv_0-2.csv")) result = { 'vp0:.input:micro0': { - 0: {'nans': 0.0,'norm': 5.550016}, - 1: {'nans': 0.0,'norm': 5.975112}, - 2: {'nans': 0.0,'norm': 5.789881} - }, + 0: {'nans': 0.0, 'norm': 5.550016}, + 1: {'nans': 0.0, 'norm': 5.975112}, + 2: {'nans': 0.0, 'norm': 5.789881} + }, 'vp0:.output:micro0': { - 0: {'nans': 0.0,'norm': 41.842655}, - 1: {'nans': 0.0,'norm': 44.40981}, - 2: {'nans': 0.0,'norm': 43.578354} - }, + 0: {'nans': 0.0, 'norm': 41.842655}, + 1: {'nans': 0.0, 'norm': 44.40981}, + 2: {'nans': 0.0, 'norm': 43.578354} + }, 'vp0:linear.input:micro0': { - 0: {'nans': 0.0,'norm': 5.550016}, - 1: {'nans': 0.0,'norm': 5.975112}, - 2: {'nans': 0.0,'norm': 5.789881} - }, + 0: {'nans': 0.0, 'norm': 5.550016}, + 1: {'nans': 0.0, 'norm': 5.975112}, + 2: {'nans': 0.0, 'norm': 5.789881} + }, 'vp0:linear.output:micro0': { - 0: {'nans': 0.0,'norm': 41.842655}, - 1: {'nans': 0.0,'norm': 44.40981}, - 2: {'nans': 0.0,'norm': 43.578354} - }, + 0: {'nans': 0.0, 'norm': 41.842655}, + 1: {'nans': 0.0, 'norm': 44.40981}, + 2: {'nans': 0.0, 'norm': 43.578354} + }, 'vp0:relu.input:micro0': { - 0: {'nans': 0.0,'norm': 41.842655}, - 1: {'nans': 0.0,'norm': 44.40981}, - 2: {'nans': 0.0,'norm': 43.578354} - }, + 0: {'nans': 0.0, 'norm': 41.842655}, + 1: {'nans': 0.0, 'norm': 44.40981}, + 2: {'nans': 0.0, 'norm': 43.578354} + }, 'vp0:relu.output:micro0': { - 0: {'nans': 0.0,'norm': 41.842655}, - 1: {'nans': 0.0,'norm': 44.40981}, - 2: {'nans': 0.0,'norm': 43.578354} - } + 0: {'nans': 0.0, 'norm': 41.842655}, + 1: {'nans': 0.0, 'norm': 44.40981}, + 2: {'nans': 0.0, 'norm': 43.578354} } - self.assertEqual(dict_equal(data, result), True) - tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "actv")) + } + self.assertDictEqual(data, result) + tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "actv")) print(tb_data) tb_result = { 'vp0:.input:micro0/nans': [(0, 0.0), - (1, 0.0), - (2, 0.0), - (3, 0.0), - (4, 0.0), - (5, 0.0), - (6, 0.0), - (7, 0.0), - (8, 0.0), - (9, 0.0)], + (1, 0.0), + (2, 0.0), + (3, 0.0), + (4, 0.0), + (5, 0.0), + (6, 0.0), + (7, 0.0), + (8, 0.0), + (9, 0.0)], 'vp0:.input:micro0/norm': [(0, 5.550015926361084), - (1, 5.975111961364746), - (2, 5.789881229400635), - (3, 6.052319049835205), - (4, 5.573315143585205), - (5, 5.864360809326172), - (6, 5.292460918426514), - (7, 5.477899074554443), - (8, 5.884613990783691), - (9, 5.456457138061523)], + (1, 5.975111961364746), + (2, 5.789881229400635), + (3, 6.052319049835205), + (4, 5.573315143585205), + (5, 5.864360809326172), + (6, 5.292460918426514), + (7, 5.477899074554443), + (8, 5.884613990783691), + (9, 5.456457138061523)], 'vp0:.output:micro0/nans': [(0, 0.0), - (1, 0.0), - (2, 0.0), - (3, 0.0), - (4, 0.0), - (5, 0.0), - (6, 0.0), - (7, 0.0), - (8, 0.0), - (9, 0.0)], - 'vp0:.output:micro0/norm': [(0, 41.842655181884766), - (1, 44.40980911254883), - (2, 43.57835388183594), - (3, 45.83631134033203), - (4, 42.0673828125), - (5, 43.46839141845703), - (6, 39.77947235107422), - (7, 40.200843811035156), - (8, 44.453147888183594), - (9, 40.841522216796875)], - 'vp0:linear.input:micro0/nans': [(0, 0.0), (1, 0.0), (2, 0.0), (3, 0.0), @@ -244,117 +219,137 @@ class TestGradMonitor(unittest.TestCase): (7, 0.0), (8, 0.0), (9, 0.0)], + 'vp0:.output:micro0/norm': [(0, 41.842655181884766), + (1, 44.40980911254883), + (2, 43.57835388183594), + (3, 45.83631134033203), + (4, 42.0673828125), + (5, 43.46839141845703), + (6, 39.77947235107422), + (7, 40.200843811035156), + (8, 44.453147888183594), + (9, 40.841522216796875)], + 'vp0:linear.input:micro0/nans': [(0, 0.0), + (1, 0.0), + (2, 0.0), + (3, 0.0), + (4, 0.0), + (5, 0.0), + (6, 0.0), + (7, 0.0), + (8, 0.0), + (9, 0.0)], 'vp0:linear.input:micro0/norm': [(0, 5.550015926361084), - (1, 5.975111961364746), - (2, 5.789881229400635), - (3, 6.052319049835205), - (4, 5.573315143585205), - (5, 5.864360809326172), - (6, 5.292460918426514), - (7, 5.477899074554443), - (8, 5.884613990783691), - (9, 5.456457138061523)], + (1, 5.975111961364746), + (2, 5.789881229400635), + (3, 6.052319049835205), + (4, 5.573315143585205), + (5, 5.864360809326172), + (6, 5.292460918426514), + (7, 5.477899074554443), + (8, 5.884613990783691), + (9, 5.456457138061523)], 'vp0:linear.output:micro0/nans': [(0, 0.0), - (1, 0.0), - (2, 0.0), - (3, 0.0), - (4, 0.0), - (5, 0.0), - (6, 0.0), - (7, 0.0), - (8, 0.0), - (9, 0.0)], + (1, 0.0), + (2, 0.0), + (3, 0.0), + (4, 0.0), + (5, 0.0), + (6, 0.0), + (7, 0.0), + (8, 0.0), + (9, 0.0)], 'vp0:linear.output:micro0/norm': [(0, 41.842655181884766), - (1, 44.40980911254883), - (2, 43.57835388183594), - (3, 45.83631134033203), - (4, 42.0673828125), - (5, 43.46839141845703), - (6, 39.77947235107422), - (7, 40.200843811035156), - (8, 44.453147888183594), - (9, 40.841522216796875)], + (1, 44.40980911254883), + (2, 43.57835388183594), + (3, 45.83631134033203), + (4, 42.0673828125), + (5, 43.46839141845703), + (6, 39.77947235107422), + (7, 40.200843811035156), + (8, 44.453147888183594), + (9, 40.841522216796875)], 'vp0:relu.input:micro0/nans': [(0, 0.0), - (1, 0.0), - (2, 0.0), - (3, 0.0), - (4, 0.0), - (5, 0.0), - (6, 0.0), - (7, 0.0), - (8, 0.0), - (9, 0.0)], + (1, 0.0), + (2, 0.0), + (3, 0.0), + (4, 0.0), + (5, 0.0), + (6, 0.0), + (7, 0.0), + (8, 0.0), + (9, 0.0)], 'vp0:relu.input:micro0/norm': [(0, 41.842655181884766), - (1, 44.40980911254883), - (2, 43.57835388183594), - (3, 45.83631134033203), - (4, 42.0673828125), - (5, 43.46839141845703), - (6, 39.77947235107422), - (7, 40.200843811035156), - (8, 44.453147888183594), - (9, 40.841522216796875)], + (1, 44.40980911254883), + (2, 43.57835388183594), + (3, 45.83631134033203), + (4, 42.0673828125), + (5, 43.46839141845703), + (6, 39.77947235107422), + (7, 40.200843811035156), + (8, 44.453147888183594), + (9, 40.841522216796875)], 'vp0:relu.output:micro0/nans': [(0, 0.0), - (1, 0.0), - (2, 0.0), - (3, 0.0), - (4, 0.0), - (5, 0.0), - (6, 0.0), - (7, 0.0), - (8, 0.0), - (9, 0.0)], + (1, 0.0), + (2, 0.0), + (3, 0.0), + (4, 0.0), + (5, 0.0), + (6, 0.0), + (7, 0.0), + (8, 0.0), + (9, 0.0)], 'vp0:relu.output:micro0/norm': [(0, 41.842655181884766), - (1, 44.40980911254883), - (2, 43.57835388183594), - (3, 45.83631134033203), - (4, 42.0673828125), - (5, 43.46839141845703), - (6, 39.77947235107422), - (7, 40.200843811035156), - (8, 44.453147888183594), - (9, 40.841522216796875)]} - self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True) - + (1, 44.40980911254883), + (2, 43.57835388183594), + (3, 45.83631134033203), + (4, 42.0673828125), + (5, 43.46839141845703), + (6, 39.77947235107422), + (7, 40.200843811035156), + (8, 44.453147888183594), + (9, 40.841522216796875)]} + self.assertDictEqual(tb_data, tb_result) def test_actv_grad(self): - data = parse_step_fn(os.path.join(timestamp_dirpath,"actv_grad_0-2.csv")) + data = parse_step_fn(os.path.join(self.timestamp_dirpath, "actv_grad_0-2.csv")) nan = np.nan result = { 'vp0:.input:micro0': { - 0: {'norm': nan, 'nans': nan}, - 1: {'norm': nan, 'nans': nan}, + 0: {'norm': nan, 'nans': nan}, + 1: {'norm': nan, 'nans': nan}, 2: {'norm': nan, 'nans': nan} - }, + }, 'vp0:.output:micro0': { - 0: {'norm': 0.282843, 'nans': 0.0}, - 1: {'norm': 0.282617, 'nans': 0.0}, + 0: {'norm': 0.282843, 'nans': 0.0}, + 1: {'norm': 0.282617, 'nans': 0.0}, 2: {'norm': 0.282655, 'nans': 0.0} - }, + }, 'vp0:relu.input:micro0': { - 0: {'norm': 0.282843, 'nans': 0.0}, - 1: {'norm': 0.282617, 'nans': 0.0}, + 0: {'norm': 0.282843, 'nans': 0.0}, + 1: {'norm': 0.282617, 'nans': 0.0}, 2: {'norm': 0.282655, 'nans': 0.0} - }, + }, 'vp0:relu.output:micro0': { - 0: {'norm': 0.282843, 'nans': 0.0}, - 1: {'norm': 0.282617, 'nans': 0.0}, + 0: {'norm': 0.282843, 'nans': 0.0}, + 1: {'norm': 0.282617, 'nans': 0.0}, 2: {'norm': 0.282655, 'nans': 0.0} - }, + }, 'vp0:linear.input:micro0': { - 0: {'norm': nan, 'nans': nan}, - 1: {'norm': nan, 'nans': nan}, + 0: {'norm': nan, 'nans': nan}, + 1: {'norm': nan, 'nans': nan}, 2: {'norm': nan, 'nans': nan} - }, + }, 'vp0:linear.output:micro0': { - 0: {'norm': 0.282843, 'nans': 0.0}, - 1: {'norm': 0.282617, 'nans': 0.0}, + 0: {'norm': 0.282843, 'nans': 0.0}, + 1: {'norm': 0.282617, 'nans': 0.0}, 2: {'norm': 0.282655, 'nans': 0.0} - } } + } + print(data) self.assertEqual(dict_equal(data, result), True) - - tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "actv_grad")) + + tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "actv_grad")) tb_result = { 'vp0:.input:micro0/nans': [(0, nan), (1, nan), @@ -475,88 +470,91 @@ class TestGradMonitor(unittest.TestCase): (6, 0.28316599130630493), (7, 0.28274500370025635), (8, 0.2833530008792877), - (9, 0.2825529873371124)]} + (9, 0.2825529873371124)] + } + print(tb_data) self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True) - def test_param(self): - data = parse_step_fn(os.path.join(timestamp_dirpath,"param_origin_0-2.csv")) + data = parse_step_fn(os.path.join(self.timestamp_dirpath, "param_origin_0-2.csv")) result = { 'vp0:linear.bias': { 0: {'nans': 0.0, 'norm': 2.236068}, 1: {'nans': 0.0, 'norm': 2.236198}, 2: {'nans': 0.0, 'norm': 2.235769} - }, + }, 'vp0:linear.weight': { 0: {'nans': 0.0, 'norm': 7.071068}, 1: {'nans': 0.0, 'norm': 7.068808}, 2: {'nans': 0.0, 'norm': 7.06771} - } } - self.assertEqual(dict_equal(data, result), True) - tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "param_origin")) + } + self.assertDictEqual(data, result) + tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "param_origin")) tb_result = { 'vp0:linear.weight/norm': [ - (0, 7.071067810058594), - (1, 7.068808078765869), - (2, 7.067709922790527), - (3, 7.0673418045043945), - (4, 7.066926956176758), - (5, 7.066311836242676), - (6, 7.065629959106445), - (7, 7.065262794494629), - (8, 7.065001964569092), - (9, 7.064840793609619)], + (0, 7.071067810058594), + (1, 7.068808078765869), + (2, 7.067709922790527), + (3, 7.0673418045043945), + (4, 7.066926956176758), + (5, 7.066311836242676), + (6, 7.065629959106445), + (7, 7.065262794494629), + (8, 7.065001964569092), + (9, 7.064840793609619)], 'vp0:linear.weight/nans': [ - (0, 0.0), - (1, 0.0), - (2, 0.0), - (3, 0.0), - (4, 0.0), - (5, 0.0), - (6, 0.0), - (7, 0.0), - (8, 0.0), - (9, 0.0)], + (0, 0.0), + (1, 0.0), + (2, 0.0), + (3, 0.0), + (4, 0.0), + (5, 0.0), + (6, 0.0), + (7, 0.0), + (8, 0.0), + (9, 0.0)], 'vp0:linear.bias/norm': [ - (0, 2.2360680103302), - (1, 2.2361979484558105), - (2, 2.235769033432007), - (3, 2.235903024673462), - (4, 2.2360129356384277), - (5, 2.2359039783477783), - (6, 2.2357990741729736), - (7, 2.2357349395751953), - (8, 2.2356700897216797), - (9, 2.235619068145752)], + (0, 2.2360680103302), + (1, 2.2361979484558105), + (2, 2.235769033432007), + (3, 2.235903024673462), + (4, 2.2360129356384277), + (5, 2.2359039783477783), + (6, 2.2357990741729736), + (7, 2.2357349395751953), + (8, 2.2356700897216797), + (9, 2.235619068145752) + ], 'vp0:linear.bias/nans': [ - (0, 0.0), - (1, 0.0), - (2, 0.0), - (3, 0.0), - (4, 0.0), - (5, 0.0), - (6, 0.0), - (7, 0.0), - (8, 0.0), - (9, 0.0)] - } - self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True) + (0, 0.0), + (1, 0.0), + (2, 0.0), + (3, 0.0), + (4, 0.0), + (5, 0.0), + (6, 0.0), + (7, 0.0), + (8, 0.0), + (9, 0.0) + ] + } + self.assertDictEqual(tb_data, tb_result) def test_exp_avg(self): - data = parse_step_fn(os.path.join(timestamp_dirpath,"exp_avg_0-2.csv")) + data = parse_step_fn(os.path.join(self.timestamp_dirpath, "exp_avg_0-2.csv")) result = { 'vp0:linear.bias': { 1: {'nans': 0.0, 'norm': 0.024495}, 2: {'nans': 0.0, 'norm': 0.052203} - }, + }, 'vp0:linear.weight': { 1: {'nans': 0.0, 'norm': 0.052394}, 2: {'nans': 0.0, 'norm': 0.099221} - } } - self.assertEqual(dict_equal(data, result), True) - tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "exp_avg")) + } + self.assertDictEqual(data, result) + tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "exp_avg")) tb_result = { 'vp0:linear.bias/nans': [(1, 0.0), (2, 0.0), @@ -594,22 +592,22 @@ class TestGradMonitor(unittest.TestCase): (7, 0.11372199654579163), (8, 0.12264800071716309), (9, 0.09017200022935867)]} - self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True) + self.assertDictEqual(tb_data, tb_result) def test_exp_avg_sq(self): - data = parse_step_fn(os.path.join(timestamp_dirpath,"exp_avg_sq_0-2.csv")) + data = parse_step_fn(os.path.join(self.timestamp_dirpath, "exp_avg_sq_0-2.csv")) result = { 'vp0:linear.bias': { 1: {'nans': 0.0, 'norm': 4.2e-05}, 2: {'nans': 0.0, 'norm': 9.6e-05} - }, + }, 'vp0:linear.weight': { 1: {'nans': 0.0, 'norm': 6.7e-05}, 2: {'nans': 0.0, 'norm': 0.000126} - } } - self.assertEqual(dict_equal(data, result), True) - tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "exp_avg_sq")) + } + self.assertDictEqual(data, result) + tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "exp_avg_sq")) tb_result = { 'vp0:linear.bias/nans': [(1, 0.0), (2, 0.0), @@ -647,24 +645,24 @@ class TestGradMonitor(unittest.TestCase): (7, 0.00026000000070780516), (8, 0.00028700000257231295), (9, 0.0003060000017285347)]} - self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True) - + self.assertDictEqual(tb_data, tb_result) + def test_grad_reduced(self): - data = parse_step_fn(os.path.join(timestamp_dirpath,"grad_reduced_0-2.csv")) + data = parse_step_fn(os.path.join(self.timestamp_dirpath, "grad_reduced_0-2.csv")) result = { 'vp0:linear.bias': { 0: {'nans': 0.0, 'norm': 0.244949}, 1: {'nans': 0.0, 'norm': 0.314345}, 2: {'nans': 0.0, 'norm': 0.281475} - }, + }, 'vp0:linear.weight': { 0: {'nans': 0.0, 'norm': 0.523935}, 1: {'nans': 0.0, 'norm': 0.595672}, 2: {'nans': 0.0, 'norm': 0.497603} - } } - self.assertEqual(dict_equal(data, result), True) - tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "grad_reduced")) + } + self.assertDictEqual(data, result) + tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "grad_reduced")) tb_result = { 'vp0:linear.bias/nans': [(0, 0.0), (1, 0.0), @@ -706,25 +704,25 @@ class TestGradMonitor(unittest.TestCase): (7, 0.4831080138683319), (8, 0.3234719932079315), (9, 0.32385098934173584)]} - self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True) - + self.assertDictEqual(tb_data, tb_result) + def test_grad_unreduced(self): - data = parse_step_fn(os.path.join(timestamp_dirpath,"grad_unreduced_0-2.csv")) + data = parse_step_fn(os.path.join(self.timestamp_dirpath, "grad_unreduced_0-2.csv")) result = { 'vp0:linear.bias': { 0: {'nans': 0.0, 'norm': 0.244949}, 1: {'nans': 0.0, 'norm': 0.314345}, 2: {'nans': 0.0, 'norm': 0.281475} - }, + }, 'vp0:linear.weight': { 0: {'nans': 0.0, 'norm': 0.523935}, 1: {'nans': 0.0, 'norm': 0.595672}, 2: {'nans': 0.0, 'norm': 0.497603} - } } - self.assertEqual(dict_equal(data, result), True) + } + self.assertDictEqual(data, result) - tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "grad_unreduced")) + tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "grad_unreduced")) tb_result = { 'vp0:linear.bias/nans': [(0, 0.0), (1, 0.0), @@ -766,4 +764,8 @@ class TestGradMonitor(unittest.TestCase): (7, 0.4831080138683319), (8, 0.3234719932079315), (9, 0.32385098934173584)]} - self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True) + self.assertDictEqual(tb_data, tb_result) + + +if __name__ == '__main__': + unittest.main() -- Gitee