diff --git a/debug/accuracy_tools/msprobe/mindspore/monitor/anomaly_detect.py b/debug/accuracy_tools/msprobe/mindspore/monitor/anomaly_detect.py
index 3544ebbd025614349585bc799b15e00a5c2c7956..9eed28575f7a843f71050221ac59ff9df65bcb4d 100644
--- a/debug/accuracy_tools/msprobe/mindspore/monitor/anomaly_detect.py
+++ b/debug/accuracy_tools/msprobe/mindspore/monitor/anomaly_detect.py
@@ -162,9 +162,8 @@ class TrainStage:
     OPTIMIZER_STAGE = 2
 
 
-FORWARD_KEY = [MonitorConst.ACTV_IN, MonitorConst.ACTV_OUT]
-BACKWARD_KEY = [MonitorConst.ACTVGRAD_IN, MonitorConst.ACTVGRAD_OUT,
-                MonitorConst.PRE_GRAD, MonitorConst.POST_GRAD, MonitorConst.ACC_GRAD]
+FORWARD_KEY = [MonitorConst.ACTV]
+BACKWARD_KEY = [MonitorConst.ACTVGRAD, MonitorConst.PRE_GRAD, MonitorConst.POST_GRAD, MonitorConst.ACC_GRAD]
 OPTIMIZER_KEY = [MonitorConst.EXP_AVG, MonitorConst.EXP_AVG_SQ]
 TRAIN_STAGE = {
     **{key_: TrainStage.FORWARD_STAGE for key_ in FORWARD_KEY},
@@ -222,7 +221,7 @@ class GradAnomalyData:
     @staticmethod
     def get_train_stage(tag_name):
         """
-        :param tag_name: "0:fc2_0/rank0/input", "0:fc1.weight/rank0/post_grad", "0:fc2.weight/rank0/exp_avg_sq"
+        :param tag_name: "0:fc2.input:0/rank0/actv", "0:fc1.weight/rank0/post_grad", "0:fc2.weight/rank0/exp_avg_sq"
         :return: int, if forward return 0; if backward return 1; if optimizer return 2
         """
         key_ = tag_name.split("/")[-1]
@@ -255,6 +254,45 @@ class BaseWriterWithAD:
         self.anomalies = []
         self.ndigits = writer_input.ndigits
 
+    @staticmethod
+    def stack_tensors(tensor_list):
+        """
+        Torch not support stack cpu and xpu tensors. Group the tensors into cpu_group and xpu_group,
+        stack them separately, migrate xpu_group to cpu, and then restore in the order of input.
+
+        :param tensor_list: [tensor(-1.6165), tensor(-1.0985), tensor(-1.7777), tensor(-1.8408, device='npu:0')]
+        :return: tensor: tensor([-1.6165, -1.0985, -1.7777, -1.8408], device='cpu')
+        """
+        cpu_tensors = []
+        xpu_tensors = []
+
+        # 将张量分别放入cpu_tensors和xpu_tensors列表
+        for tensor in tensor_list:
+            if tensor.device.type == 'cpu':
+                cpu_tensors.append(tensor)
+            else:
+                xpu_tensors.append(tensor)
+
+        # 分别堆叠cpu_tensors和xpu_tensors
+        cpu_stack = ops.stack(cpu_tensors) if cpu_tensors else ops.tensor([])
+        xpu_stack = ops.stack(xpu_tensors).tolist() if xpu_tensors else ops.tensor([])
+
+        # 按照输入的顺序恢复
+        result = []
+        cpu_tensors_idx, xpu_tensors_idx = 0, 0
+        for tensor in tensor_list:
+            if tensor.device.type == 'cpu':
+                result.append(cpu_stack[cpu_tensors_idx])
+                cpu_tensors_idx += 1
+            else:
+                result.append(xpu_stack[xpu_tensors_idx])
+                xpu_tensors_idx += 1
+
+        # 将结果堆叠成一个张量
+        result = ops.stack(result)
+
+        return result
+
     def get_anomalies(self):
         """返回已检测到的异常列表
         """
@@ -290,8 +328,12 @@ class BaseWriterWithAD:
         tags = list(itertools.product(metric_value.keys(), op_list))
         for op2tensor in metric_value.values():
             tensors.extend(op2tensor.values())
+
+        if not tensors:
+            return
+
         with _no_grad():
-            metric_list = ops.stack(tensors).tolist() if tensors else []
+            metric_list = self.stack_tensors(tensors)
         for tag, metric in zip(tags, metric_list):
             self.add_scalar(tag, metric, step, need_explain)
 
@@ -353,10 +395,9 @@ class CSVWriterWithAD(BaseWriterWithAD):
 
         new_data = []
         for name, metric_value in self.context_dict.items():
-            if MonitorConst.NAME_SEP not in name:
-                new_data.append([name] + [step] + metric_value)
-            else:
-                new_data.append(name.split(MonitorConst.NAME_SEP) + [step] + metric_value)
+            new_line = name.split(MonitorConst.NAME_SEP) + metric_value
+            new_line.insert(2, step)
+            new_data.append(new_line)
         new_data = pd.DataFrame(new_data).round(self.ndigits)
         write_df_to_csv(new_data, filepath, mode='a+', header=False)
         self.context_dict = defaultdict(list)
@@ -379,26 +420,11 @@ class CSVWriterWithAD(BaseWriterWithAD):
         need_explain = prefix == 'other'
         super().write_metrics(op_list, metric_value, step, prefix='', need_explain=need_explain)
 
-        # generate csv headers
-        # set hashmap to reduce the number of headers generated.
-        # 前向的norm用input.ops_和output.ops_，反向的用input_grad.ops_和output_grad.ops_
-        if prefix in {"actv", "actv_grad"}:
-            if prefix == "actv":
-                input_and_output = [MonitorConst.ACTV_IN, MonitorConst.ACTV_OUT]
-            else:
-                input_and_output = [MonitorConst.ACTVGRAD_IN, MonitorConst.ACTVGRAD_OUT]
-            ops_ = [MonitorConst.DOT.join(i) for i in itertools.product(input_and_output, op_list)]
-            csv_header = ["module_name", "step", *ops_]
+        if prefix in [MonitorConst.ACTV, MonitorConst.ACTVGRAD]:
+            self.header = MonitorConst.CSV_HEADER_XY + ops
         else:
-            csv_header = ["param_name", "step", *op_list]
-
-        keys = list(metric_value.keys())
-        if keys and MonitorConst.NAME_SEP in keys[0]:
-            csv_header.insert(0, "vpp_stage")
-
-        self.header = csv_header
+            self.header = MonitorConst.CSV_HEADER + ops
         self.write_csv(prefix, step)
-        self.header = []
 
     def close(self):
         pass
diff --git a/debug/accuracy_tools/msprobe/mindspore/monitor/module_hook.py b/debug/accuracy_tools/msprobe/mindspore/monitor/module_hook.py
index ffda6d6202eb98b38a9efe6a43d1357c714646e1..5e1269ac9c5bb2b6e2021cdeb1fa3a9c097eceb1 100644
--- a/debug/accuracy_tools/msprobe/mindspore/monitor/module_hook.py
+++ b/debug/accuracy_tools/msprobe/mindspore/monitor/module_hook.py
@@ -25,12 +25,12 @@ from mindspore import Tensor, mint
 from mindspore import nn, _no_grad
 
 from msprobe.core.common.log import logger
-from msprobe.core.common.const import MonitorConst
+from msprobe.core.common.const import MonitorConst, Const
 from msprobe.core.common.file_utils import load_json, save_json
 from msprobe.mindspore.common.utils import is_mindtorch
 from msprobe.mindspore.monitor.common_func import is_valid_instance, get_parameters, get_submodules, get_rank
 from msprobe.mindspore.monitor.utils import get_summary_writer_tag_name, validate_config, step_accumulates_one, \
-    is_skip_step, get_metrics, get_single_metrics, get_target_output_dir
+    is_skip_step, get_metrics, get_target_output_dir
 from msprobe.mindspore.monitor.module_spec_verifier import validate_config_spec
 from msprobe.mindspore.monitor.optimizer_collect import OptimizerMonFactory
 from msprobe.mindspore.monitor.anomaly_detect import AnomalyScanner, AnomalyDataFactory, \
@@ -576,21 +576,22 @@ class TrainerMon:
         for _, fwd_context in self.module_fwd_hook_context_by_module.items():
             if len(fwd_context.actv) == 0:
                 continue
-            self.summary_writer.write_metrics(self.ops, fwd_context.actv, step, 'actv')
+            self.summary_writer.write_metrics(self.ops, fwd_context.actv, step, MonitorConst.ACTV)
             fwd_context.actv.clear()
         if self.grad_context.actv:
-            self.summary_writer.write_metrics(self.ops, self.grad_context.actv, step, 'actv_grad')
+            self.summary_writer.write_metrics(self.ops, self.grad_context.actv, step, MonitorConst.ACTVGRAD)
 
     def write_param_tb(self, opt_context):
         if not self.param_distribution:
             return
-        self.summary_writer.write_metrics(self.ops, opt_context.param_metric, opt_context.step, 'param')
+        self.summary_writer.write_metrics(self.ops, opt_context.param_metric, opt_context.step, MonitorConst.PARAM)
 
     def write_mv_tb(self, opt_context):
         if not self.mv_distribution:
             return
-        self.summary_writer.write_metrics(self.ops, opt_context.exp_avg_metric, opt_context.step, 'exp_avg')
-        self.summary_writer.write_metrics(self.ops, opt_context.exp_avg_sq_metric, opt_context.step, 'exp_avg_sq')
+        self.summary_writer.write_metrics(self.ops, opt_context.exp_avg_metric, opt_context.step, MonitorConst.EXP_AVG)
+        self.summary_writer.write_metrics(self.ops, opt_context.exp_avg_sq_metric, opt_context.step,
+                                          MonitorConst.EXP_AVG_SQ)
 
     def write_grad_tb(self, step):
         if not self.wg_distribution:
@@ -604,13 +605,28 @@ class TrainerMon:
             return False
         return True
 
-    def build_tbtag_tensor_map(self, module_name, tag, tensor):
-        metrics = {}
-        key = get_summary_writer_tag_name(module_name, tag, str(self.rank))
+    def build_tbtag_tensor_map(self, module_name, suffix, tag, tensor):
+        """
+        :param module_name: str of module name
+        :param suffix:
+        :param tag:
+        :param tensor: torch.tensor or tuple/list of torch.tensor
+        :return: tensor_map
+        """
+        tensor_map = {}
         if isinstance(tensor, Tensor):
-            self.register_param_call_id("_hook_module", key)
-            metrics[key] = tensor
-        return metrics
+            tensor = [tensor]
+        if isinstance(tensor, tuple) or isinstance(tensor, list):
+            if len(tensor) == 1:
+                key = get_summary_writer_tag_name(module_name + suffix, tag, self.rank)
+                self.register_param_call_id("_hook_module", key)
+                tensor_map[key] = tensor[0]
+            else:
+                for i, tensor_i in enumerate(tensor):
+                    key = get_summary_writer_tag_name(module_name + f"_{i}" + suffix, tag, self.rank)
+                    self.register_param_call_id("_hook_module", key)
+                    tensor_map[key] = tensor_i
+        return tensor_map
 
     def register_param_call_id(self, hook_name: str, key: str):
         """
@@ -655,14 +671,20 @@ class TrainerMon:
             # nothing to hook
             return 0
 
-        def fwd_hook_fun(module, module_input, module_output, name):
+        def fwd_hook_fun(module, args, kwargs, module_output, name):
+
+            module_input = [tensor for tensor in args if isinstance(tensor, Tensor)]
+            if kwargs:
+                kwargs_tensors = [tensor for tensor in kwargs.values() if isinstance(tensor, Tensor)]
+                module_input.extend(kwargs_tensors)
+
             if module not in self.module_fwd_hook_context_by_module:
                 self.module_fwd_hook_context_by_module[module] = ModuleHookContext(name)
             context: ModuleHookContext = self.module_fwd_hook_context_by_module[module]
             if not context.struct:
                 context.struct = {
-                    MonitorConst.ACTV_IN: get_param_struct(module_input),
-                    MonitorConst.ACTV_OUT: get_param_struct(module_output)
+                    Const.INPUT: get_param_struct(module_input),
+                    Const.OUTPUT: get_param_struct(module_output)
                 }
             if self.print_struct:
                 self.module_struct[context.module_name].update(context.struct)
@@ -673,31 +695,16 @@ class TrainerMon:
                             self.collect_times):
                 step_accumulates_one(context, self.micro_batch_number)
                 return
-            if not context.format_by_arg:
-                context.set_format_by_arg(MonitorConst.ACTV_IN, self.targets)
-                context.set_format_by_arg(MonitorConst.ACTV_OUT, self.targets)
-            if not context.format_by_arg:
-                return
-            if not context.verified:
-                if not context.ignore_in:
-                    context.focused_in_col = validate_config_spec(context.format_by_arg[MonitorConst.ACTV_IN],
-                                                                  module_input, context.module_name,
-                                                                  MonitorConst.ACTV_IN)
-                context.focused_out_col = validate_config_spec(context.format_by_arg[MonitorConst.ACTV_OUT],
-                                                               module_output, context.module_name,
-                                                               MonitorConst.ACTV_OUT)
-                context.verified = True
 
             tbtag_tensor_map = {}
-            if not context.ignore_in:
-                cared_input = module_input if context.focused_in_col is None else module_input[context.focused_in_col]
-                tbtag_tensor_map.update(
-                    self.build_tbtag_tensor_map(f'{context.module_name}_{context.micro_step}', MonitorConst.ACTV_IN,
-                                                cared_input))
-            cared_output = module_output if context.focused_out_col is None else module_output[context.focused_out_col]
             tbtag_tensor_map.update(
-                self.build_tbtag_tensor_map(f'{context.module_name}_{context.micro_step}', MonitorConst.ACTV_OUT,
-                                            cared_output))
+                self.build_tbtag_tensor_map(
+                    f'{context.module_name}.{Const.INPUT}', f'{MonitorConst.NAME_SEP}{context.micro_step}',
+                    MonitorConst.ACTV, module_input))
+            tbtag_tensor_map.update(
+                self.build_tbtag_tensor_map(
+                    f'{context.module_name}.{Const.OUTPUT}', f'{MonitorConst.NAME_SEP}{context.micro_step}',
+                    MonitorConst.ACTV, module_output))
             try:
                 get_metrics(self.ops, tbtag_tensor_map, self.eps, context.actv)
             except Exception as e:
@@ -722,31 +729,16 @@ class TrainerMon:
                 step_accumulates_one(context, self.micro_batch_number)
                 return
 
-            if not context.format_by_arg:
-                context.set_format_by_arg(MonitorConst.ACTVGRAD_IN, self.targets)
-                context.set_format_by_arg(MonitorConst.ACTVGRAD_OUT, self.targets)
-            if not context.format_by_arg:
-                return
-            if not context.verified:
-                if not context.ignore_in:
-                    context.focused_in_col = validate_config_spec(context.format_by_arg[MonitorConst.ACTVGRAD_IN],
-                                                                  input_grad, context.module_name,
-                                                                  MonitorConst.ACTVGRAD_IN)
-                context.focused_out_col = validate_config_spec(context.format_by_arg[MonitorConst.ACTVGRAD_OUT],
-                                                               output_grad, context.module_name,
-                                                               MonitorConst.ACTVGRAD_OUT)
-                context.verified = True
-
             tbtag_tensor_map = {}
-            if not context.ignore_in:
-                cared_input_grad = input_grad if context.focused_in_col is None else input_grad[context.focused_in_col]
-                tbtag_tensor_map.update(
-                    self.build_tbtag_tensor_map(
-                        f'{context.module_name}_{context.micro_step}', MonitorConst.ACTVGRAD_IN, cared_input_grad))
-            cared_output_grad = output_grad if context.focused_out_col is None else output_grad[context.focused_out_col]
             tbtag_tensor_map.update(
-                self.build_tbtag_tensor_map(f'{context.module_name}_{context.micro_step}', MonitorConst.ACTVGRAD_OUT,
-                                            cared_output_grad))
+                self.build_tbtag_tensor_map(
+                    f'{context.module_name}.{Const.INPUT}', f'{MonitorConst.NAME_SEP}{context.micro_step}',
+                    MonitorConst.ACTV, input_grad))
+
+            tbtag_tensor_map.update(
+                self.build_tbtag_tensor_map(
+                    f'{context.module_name}.{Const.OUTPUT}', f'{MonitorConst.NAME_SEP}{context.micro_step}',
+                    MonitorConst.ACTV, output_grad))
 
             if context.micro_step == 0 and context.actvgrad:
                 logger.warning(f"actvgrad context of {context.module_name} is not empty when first micro_step, "
@@ -761,8 +753,8 @@ class TrainerMon:
             return
 
         def fwd_hook_fun_wrapper(fwd_hook_fun, name):
-            def wrapper(module, module_input, module_output):
-                return fwd_hook_fun(module, module_input, module_output, name)
+            def wrapper(module, args, kwargs, module_output):
+                return fwd_hook_fun(module, args, kwargs, module_output, name)
             return wrapper
 
         if self.backward_only and self.forward_only:
@@ -774,7 +766,8 @@ class TrainerMon:
                 if not name:
                     continue
                 if not self.backward_only:
-                    handle = submodule.register_forward_hook(fwd_hook_fun_wrapper(fwd_hook_fun, name=name))
+                    handle = submodule.register_forward_hook(fwd_hook_fun_wrapper(fwd_hook_fun, name=name),
+                                                             with_kwargs=True)
                     self.handles['xy'].append(handle)
                 if not self.forward_only:
                     handle = submodule.register_backward_hook(bwd_hook_fun)
diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/anomaly_detect.py b/debug/accuracy_tools/msprobe/pytorch/monitor/anomaly_detect.py
index 63f20b1928c80e1e29d7cb8224f267c246fcaa8b..64b1ca5ea5ce87e853d4a5384da3a51208c60bfd 100644
--- a/debug/accuracy_tools/msprobe/pytorch/monitor/anomaly_detect.py
+++ b/debug/accuracy_tools/msprobe/pytorch/monitor/anomaly_detect.py
@@ -254,6 +254,45 @@ class BaseWriterWithAD:
         self.anomalies = []
         self.ndigits = writer_input.ndigits
 
+    @staticmethod
+    def stack_tensors(tensor_list):
+        """
+        Torch not support stack cpu and xpu tensors. Group the tensors into cpu_group and xpu_group,
+        stack them separately, migrate xpu_group to cpu, and then restore in the order of input.
+
+        :param tensor_list: [tensor(-1.6165), tensor(-1.0985), tensor(-1.7777), tensor(-1.8408, device='npu:0')]
+        :return: tensor: tensor([-1.6165, -1.0985, -1.7777, -1.8408], device='cpu')
+        """
+        cpu_tensors = []
+        xpu_tensors = []
+
+        # 将张量分别放入cpu_tensors和xpu_tensors列表
+        for tensor in tensor_list:
+            if tensor.device.type == 'cpu':
+                cpu_tensors.append(tensor)
+            else:
+                xpu_tensors.append(tensor)
+
+        # 分别堆叠cpu_tensors和xpu_tensors
+        cpu_stack = torch.stack(cpu_tensors) if cpu_tensors else torch.tensor([])
+        xpu_stack = torch.stack(xpu_tensors).cpu() if xpu_tensors else torch.tensor([])
+
+        # 按照输入的顺序恢复
+        result = []
+        cpu_tensors_idx, xpu_tensors_idx = 0, 0
+        for tensor in tensor_list:
+            if tensor.device.type == 'cpu':
+                result.append(cpu_stack[cpu_tensors_idx])
+                cpu_tensors_idx += 1
+            else:
+                result.append(xpu_stack[xpu_tensors_idx])
+                xpu_tensors_idx += 1
+
+        # 将结果堆叠成一个张量
+        result = torch.stack(result)
+
+        return result
+
     def get_anomalies(self):
         """返回已检测到的异常列表
         """
@@ -299,7 +338,7 @@ class BaseWriterWithAD:
                 end = (i+1) * MonitorConst.SLICE_SIZE
                 if begin == len(tensors):
                     continue
-                metric_list = torch.stack(tensors[begin:end]).cpu()
+                metric_list = self.stack_tensors(tensors[begin:end])
                 for tag, metric in zip(tags[begin:end], metric_list):
                     self.add_scalar(tag, metric, step)
 
diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/csv2tb.py b/debug/accuracy_tools/msprobe/pytorch/monitor/csv2tb.py
index 7fbcac84efb38814e01c2cc3cf5b3696a0c1afd2..aad5ba73f1f47cc5fc91df902a6b1e6930db76b7 100644
--- a/debug/accuracy_tools/msprobe/pytorch/monitor/csv2tb.py
+++ b/debug/accuracy_tools/msprobe/pytorch/monitor/csv2tb.py
@@ -15,6 +15,7 @@
 import datetime
 import os
 import re
+import time
 from multiprocessing import Process
 
 import pytz
@@ -113,6 +114,7 @@ def csv2tb_by_step_work(target_output_dirs, output_dirpath, data_type_list):
                 all_step_result = update_dict(all_step_result, parse_step_result)
             if all_step_result:
                 write_step(output_dirpath, all_step_result, rank, data_type)
+    time.sleep(1)
 
 
 def check_process_num(process_num):
diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py b/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py
index cbc6f58fef69127ef577878422a157bda267b774..0efbdfd2454e8459a70db35c4f24c9e62e50ba4d 100644
--- a/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py
+++ b/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py
@@ -440,10 +440,28 @@ class TrainerMon:
                 return
         self.tensor_metrics.stat_insert(target_tensor, ops_list, module_name, tensor_name, rank)
 
-    def build_tbtag_tensor_map(self, module_name, tag, tensor):
-        key = get_summary_writer_tag_name(module_name, tag, self.rank)
-        self.register_param_call_id("_hook_module", key)
-        return {key: tensor}
+    def build_tbtag_tensor_map(self, module_name, suffix, tag, tensor):
+        """
+        :param module_name: str of module name
+        :param suffix:
+        :param tag:
+        :param tensor: torch.tensor or tuple/list of torch.tensor
+        :return: tensor_map
+        """
+        tensor_map = {}
+        if isinstance(tensor, torch.Tensor):
+            tensor = [tensor]
+        if isinstance(tensor, tuple) or isinstance(tensor, list):
+            if len(tensor) == 1:
+                key = get_summary_writer_tag_name(module_name + suffix, tag, self.rank)
+                self.register_param_call_id("_hook_module", key)
+                tensor_map[key] = tensor[0]
+            else:
+                for i, tensor_i in enumerate(tensor):
+                    key = get_summary_writer_tag_name(module_name + f"_{i}" + suffix, tag, self.rank)
+                    self.register_param_call_id("_hook_module", key)
+                    tensor_map[key] = tensor_i
+        return tensor_map
 
     def generate_param_map(self, tag, param_tensor):
         metrics = {}
@@ -920,11 +938,17 @@ class TrainerMon:
             # nothing to hook
             return 0
 
-        def fwd_hook_fun(module, module_input, module_output, name):
+        def fwd_hook_fun(module, args, kwargs, module_output, name):
             if not module.training or is_recomputation():
                 # 1 only monitor training stage.
                 # 2 when open recompute, skip recomputed forward stage.
                 return
+
+            module_input = [tensor for tensor in args if torch.is_tensor(tensor)]
+            if kwargs:
+                kwargs_tensors = [tensor for tensor in kwargs.values() if torch.is_tensor(tensor)]
+                module_input.extend(kwargs_tensors)
+
             if module not in self.module_fwd_hook_context_by_module:
                 self.module_fwd_hook_context_by_module[module] = ModuleHookContext(name)
             context: ModuleHookContext = self.module_fwd_hook_context_by_module[module]
@@ -936,31 +960,16 @@ class TrainerMon:
             if self.print_struct:
                 self.module_struct[context.module_name].update(context.struct)
                 return
-            if not context.format_by_arg:
-                context.set_format_by_arg(Const.INPUT, self.config['targets'])
-                context.set_format_by_arg(Const.OUTPUT, self.config['targets'])
-            if not context.format_by_arg:
-                return
-            if not context.verified:
-                context.focused_in_col = validate_config_spec(context.format_by_arg[Const.INPUT],
-                                                              module_input, context.module_name,
-                                                              Const.INPUT)
-                context.focused_out_col = validate_config_spec(context.format_by_arg[Const.OUTPUT],
-                                                               module_output, context.module_name,
-                                                               Const.OUTPUT)
-                context.verified = True
-            # expect output be tensor type
+
             tbtag_tensor_map = {}
-            cared_input = module_input if context.focused_in_col is None else module_input[context.focused_in_col]
             tbtag_tensor_map.update(
                 self.build_tbtag_tensor_map(
-                    f'{context.module_name}.{Const.INPUT}{MonitorConst.NAME_SEP}{context.micro_step}',
-                    MonitorConst.ACTV, cared_input))
-            cared_output = module_output if context.focused_out_col is None else module_output[context.focused_out_col]
+                    f'{context.module_name}.{Const.INPUT}', f'{MonitorConst.NAME_SEP}{context.micro_step}',
+                    MonitorConst.ACTV, module_input))
             tbtag_tensor_map.update(
                 self.build_tbtag_tensor_map(
-                    f'{context.module_name}.{Const.OUTPUT}{MonitorConst.NAME_SEP}{context.micro_step}',
-                    MonitorConst.ACTV, cared_output))
+                    f'{context.module_name}.{Const.OUTPUT}', f'{MonitorConst.NAME_SEP}{context.micro_step}',
+                    MonitorConst.ACTV, module_output))
 
             get_metrics(self.ops, tbtag_tensor_map, self.eps, context.actv)
             context.micro_step += 1
@@ -978,31 +987,17 @@ class TrainerMon:
             if self.print_struct:
                 self.module_struct[context.module_name].update(context.struct)
                 return
-            if not context.format_by_arg:
-                context.set_format_by_arg(MonitorConst.INPUT_GRAD, self.config['targets'])
-                context.set_format_by_arg(MonitorConst.OUTPUT_GRAD, self.config['targets'])
-            if not context.format_by_arg:
-                return
-            if not context.verified:
-                context.focused_in_col = validate_config_spec(
-                    context.format_by_arg[MonitorConst.INPUT_GRAD],
-                    input_grad, context.module_name, MonitorConst.INPUT_GRAD)
-                context.focused_out_col = validate_config_spec(
-                    context.format_by_arg[MonitorConst.OUTPUT_GRAD],
-                    output_grad, context.module_name, MonitorConst.OUTPUT_GRAD)
-                context.verified = True
 
             tbtag_tensor_map = {}
-            cared_input_grad = input_grad if context.focused_in_col is None else input_grad[context.focused_in_col]
             tbtag_tensor_map.update(
                 self.build_tbtag_tensor_map(
-                    f'{context.module_name}.{Const.INPUT}{MonitorConst.NAME_SEP}{context.micro_step}',
-                    MonitorConst.ACTV, cared_input_grad))
-            cared_output_grad = output_grad if context.focused_out_col is None else output_grad[context.focused_out_col]
+                    f'{context.module_name}.{Const.INPUT}', f'{MonitorConst.NAME_SEP}{context.micro_step}',
+                    MonitorConst.ACTV, input_grad))
+
             tbtag_tensor_map.update(
                 self.build_tbtag_tensor_map(
-                    f'{context.module_name}.{Const.OUTPUT}{MonitorConst.NAME_SEP}{context.micro_step}',
-                    MonitorConst.ACTV, cared_output_grad))
+                    f'{context.module_name}.{Const.OUTPUT}', f'{MonitorConst.NAME_SEP}{context.micro_step}',
+                    MonitorConst.ACTV, output_grad))
 
             if context.micro_step == 0 and context.actvgrad:
                 logger.warning(f"actvgrad context of {context.module_name} is not empty when first micro_step, "
@@ -1028,7 +1023,7 @@ class TrainerMon:
                 if submodule.__class__.__name__ == "FullyShardedDataParallel":
                     continue
                 if not self.backward_only:
-                    handle = submodule.register_forward_hook(partial(fwd_hook_fun, name=name))
+                    handle = submodule.register_forward_hook(partial(fwd_hook_fun, name=name), with_kwargs=True)
                     self.handles['xy'].append(handle)
                 if not self.forward_only and not self.has_register_backward_hook(name, submodule):
                     handle = submodule.register_full_backward_hook(bwd_hook_fun)
diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py
index 4178e2ef8fbfb2c2bafa90b32fa92d622b95e3cd..26bec0abdd88bfa10f5cdf9aa7271de57de66a8c 100644
--- a/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py
+++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py
@@ -17,7 +17,6 @@ import os
 import shutil
 import random
 import unittest
-import pytest
 import torch
 import numpy as np
 import torch.nn as nn
@@ -30,13 +29,9 @@ from msprobe.pytorch.hook_module.api_register import get_api_register
 
 get_api_register().restore_all_api()
 
-
 base_dir = os.path.dirname(os.path.realpath(__file__))
 config_json_path = os.path.join(base_dir, "config", "all_config.json")
 monitor_output = os.path.join(base_dir, "./monitor_output_csv2tb")
-os.environ[MonitorConst.MONITOR_OUTPUT_DIR] = monitor_output
-timestamp_dirpath = None
-csv2tb_dirpath = None
 
 
 def seed_all(seed=1234, mode=False):
@@ -46,8 +41,8 @@ def seed_all(seed=1234, mode=False):
     torch.manual_seed(seed)
     torch.use_deterministic_algorithms(mode)
 
-seed_all()
 
+seed_all()
 
 inputs = [torch.rand(10, 10) for _ in range(10)]
 labels = [torch.randint(0, 5, (10,)) for _ in range(10)]
@@ -65,31 +60,6 @@ class MockModule(nn.Module):
         return x2
 
 
-def data_collect():
-    loss_fun = nn.CrossEntropyLoss()
-    test_module = MockModule()
-    nn.init.constant_(test_module.linear.weight, 1.0)
-    nn.init.constant_(test_module.linear.bias, 1.0)
-    optimizer = torch.optim.Adam(test_module.parameters())
-
-    monitor = TrainerMon(config_json_path, params_have_main_grad=False)
-    monitor.set_monitor(test_module, grad_acc_steps=1, optimizer=optimizer)
-
-    for input_data, label in zip(inputs, labels):
-        output = test_module(input_data)
-        loss = loss_fun(output, label)
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-
-    global timestamp_dirpath, csv2tb_dirpath
-    timestamp_dirpath = os.path.join(monitor_output, os.listdir(monitor_output)[0])
-    csv2tensorboard_by_step(monitor_output)
-    for dirname in os.listdir(monitor_output):
-        if "csv2tensorboard" in dirname:
-            csv2tb_dirpath = os.path.join(monitor_output, dirname, "rank0")
-
-
 def extract_scalars_from_tensorboard(log_dir):
     # 初始化 EventAccumulator
     event_acc = EventAccumulator(log_dir)
@@ -144,97 +114,102 @@ def compare_scalar_dicts(dict1, dict2):
     return True
 
 
-@pytest.fixture(scope="session")
-def setup_all():
-    data_collect()
-    yield
-    shutil.rmtree(monitor_output)
-
-@pytest.mark.usefixtures("setup_all")
 class TestGradMonitor(unittest.TestCase):
+    timestamp_dirpath = None
+    csv2tb_dirpath = None
+
+    @classmethod
+    def setUpClass(cls):
+
+        os.environ[MonitorConst.MONITOR_OUTPUT_DIR] = monitor_output
+        if os.path.exists(monitor_output):
+            shutil.rmtree(monitor_output)
+
+        loss_fun = nn.CrossEntropyLoss()
+        test_module = MockModule()
+        nn.init.constant_(test_module.linear.weight, 1.0)
+        nn.init.constant_(test_module.linear.bias, 1.0)
+        optimizer = torch.optim.Adam(test_module.parameters())
+
+        monitor = TrainerMon(config_json_path, params_have_main_grad=False)
+        monitor.set_monitor(test_module, grad_acc_steps=1, optimizer=optimizer)
+
+        for input_data, label in zip(inputs, labels):
+            output = test_module(input_data)
+            loss = loss_fun(output, label)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+        cls.timestamp_dirpath = os.path.join(monitor_output, os.listdir(monitor_output)[0])
+        csv2tensorboard_by_step(monitor_output)
+        for dirname in os.listdir(monitor_output):
+            if "csv2tensorboard" in dirname:
+                cls.csv2tb_dirpath = os.path.join(monitor_output, dirname, "rank0")
+        os.environ.pop(MonitorConst.MONITOR_OUTPUT_DIR)
 
     def setUp(self):
         self.maxDiff = None
-    
+
     def test_actv(self):
-        data = parse_step_fn(os.path.join(timestamp_dirpath,"actv_0-2.csv"))
+        data = parse_step_fn(os.path.join(self.timestamp_dirpath, "actv_0-2.csv"))
         result = {
             'vp0:.input:micro0': {
-                0: {'nans': 0.0,'norm': 5.550016},
-                1: {'nans': 0.0,'norm': 5.975112},
-                2: {'nans': 0.0,'norm': 5.789881}
-                },
+                0: {'nans': 0.0, 'norm': 5.550016},
+                1: {'nans': 0.0, 'norm': 5.975112},
+                2: {'nans': 0.0, 'norm': 5.789881}
+            },
             'vp0:.output:micro0': {
-                0: {'nans': 0.0,'norm': 41.842655},
-                1: {'nans': 0.0,'norm': 44.40981},
-                2: {'nans': 0.0,'norm': 43.578354}
-                },
+                0: {'nans': 0.0, 'norm': 41.842655},
+                1: {'nans': 0.0, 'norm': 44.40981},
+                2: {'nans': 0.0, 'norm': 43.578354}
+            },
             'vp0:linear.input:micro0': {
-                0: {'nans': 0.0,'norm': 5.550016},
-                1: {'nans': 0.0,'norm': 5.975112},
-                2: {'nans': 0.0,'norm': 5.789881}
-                },
+                0: {'nans': 0.0, 'norm': 5.550016},
+                1: {'nans': 0.0, 'norm': 5.975112},
+                2: {'nans': 0.0, 'norm': 5.789881}
+            },
             'vp0:linear.output:micro0': {
-                0: {'nans': 0.0,'norm': 41.842655},
-                1: {'nans': 0.0,'norm': 44.40981},
-                2: {'nans': 0.0,'norm': 43.578354}
-                },
+                0: {'nans': 0.0, 'norm': 41.842655},
+                1: {'nans': 0.0, 'norm': 44.40981},
+                2: {'nans': 0.0, 'norm': 43.578354}
+            },
             'vp0:relu.input:micro0': {
-                0: {'nans': 0.0,'norm': 41.842655},
-                1: {'nans': 0.0,'norm': 44.40981},
-                2: {'nans': 0.0,'norm': 43.578354}
-                },
+                0: {'nans': 0.0, 'norm': 41.842655},
+                1: {'nans': 0.0, 'norm': 44.40981},
+                2: {'nans': 0.0, 'norm': 43.578354}
+            },
             'vp0:relu.output:micro0': {
-                0: {'nans': 0.0,'norm': 41.842655},
-                1: {'nans': 0.0,'norm': 44.40981},
-                2: {'nans': 0.0,'norm': 43.578354}
-                }
+                0: {'nans': 0.0, 'norm': 41.842655},
+                1: {'nans': 0.0, 'norm': 44.40981},
+                2: {'nans': 0.0, 'norm': 43.578354}
             }
+        }
         self.assertEqual(dict_equal(data, result), True)
-        tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "actv"))
+        tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "actv"))
         print(tb_data)
         tb_result = {
             'vp0:.input:micro0/nans': [(0, 0.0),
-                                  (1, 0.0),
-                                  (2, 0.0),
-                                  (3, 0.0),
-                                  (4, 0.0),
-                                  (5, 0.0),
-                                  (6, 0.0),
-                                  (7, 0.0),
-                                  (8, 0.0),
-                                  (9, 0.0)],
+                                       (1, 0.0),
+                                       (2, 0.0),
+                                       (3, 0.0),
+                                       (4, 0.0),
+                                       (5, 0.0),
+                                       (6, 0.0),
+                                       (7, 0.0),
+                                       (8, 0.0),
+                                       (9, 0.0)],
             'vp0:.input:micro0/norm': [(0, 5.550015926361084),
-                                  (1, 5.975111961364746),
-                                  (2, 5.789881229400635),
-                                  (3, 6.052319049835205),
-                                  (4, 5.573315143585205),
-                                  (5, 5.864360809326172),
-                                  (6, 5.292460918426514),
-                                  (7, 5.477899074554443),
-                                  (8, 5.884613990783691),
-                                  (9, 5.456457138061523)],
+                                       (1, 5.975111961364746),
+                                       (2, 5.789881229400635),
+                                       (3, 6.052319049835205),
+                                       (4, 5.573315143585205),
+                                       (5, 5.864360809326172),
+                                       (6, 5.292460918426514),
+                                       (7, 5.477899074554443),
+                                       (8, 5.884613990783691),
+                                       (9, 5.456457138061523)],
             'vp0:.output:micro0/nans': [(0, 0.0),
-                                   (1, 0.0),
-                                   (2, 0.0),
-                                   (3, 0.0),
-                                   (4, 0.0),
-                                   (5, 0.0),
-                                   (6, 0.0),
-                                   (7, 0.0),
-                                   (8, 0.0),
-                                   (9, 0.0)],
-            'vp0:.output:micro0/norm': [(0, 41.842655181884766),
-                                   (1, 44.40980911254883),
-                                   (2, 43.57835388183594),
-                                   (3, 45.83631134033203),
-                                   (4, 42.0673828125),
-                                   (5, 43.46839141845703),
-                                   (6, 39.77947235107422),
-                                   (7, 40.200843811035156),
-                                   (8, 44.453147888183594),
-                                   (9, 40.841522216796875)],
-            'vp0:linear.input:micro0/nans': [(0, 0.0),
                                         (1, 0.0),
                                         (2, 0.0),
                                         (3, 0.0),
@@ -244,117 +219,137 @@ class TestGradMonitor(unittest.TestCase):
                                         (7, 0.0),
                                         (8, 0.0),
                                         (9, 0.0)],
+            'vp0:.output:micro0/norm': [(0, 41.842655181884766),
+                                        (1, 44.40980911254883),
+                                        (2, 43.57835388183594),
+                                        (3, 45.83631134033203),
+                                        (4, 42.0673828125),
+                                        (5, 43.46839141845703),
+                                        (6, 39.77947235107422),
+                                        (7, 40.200843811035156),
+                                        (8, 44.453147888183594),
+                                        (9, 40.841522216796875)],
+            'vp0:linear.input:micro0/nans': [(0, 0.0),
+                                             (1, 0.0),
+                                             (2, 0.0),
+                                             (3, 0.0),
+                                             (4, 0.0),
+                                             (5, 0.0),
+                                             (6, 0.0),
+                                             (7, 0.0),
+                                             (8, 0.0),
+                                             (9, 0.0)],
             'vp0:linear.input:micro0/norm': [(0, 5.550015926361084),
-                                        (1, 5.975111961364746),
-                                        (2, 5.789881229400635),
-                                        (3, 6.052319049835205),
-                                        (4, 5.573315143585205),
-                                        (5, 5.864360809326172),
-                                        (6, 5.292460918426514),
-                                        (7, 5.477899074554443),
-                                        (8, 5.884613990783691),
-                                        (9, 5.456457138061523)],
+                                             (1, 5.975111961364746),
+                                             (2, 5.789881229400635),
+                                             (3, 6.052319049835205),
+                                             (4, 5.573315143585205),
+                                             (5, 5.864360809326172),
+                                             (6, 5.292460918426514),
+                                             (7, 5.477899074554443),
+                                             (8, 5.884613990783691),
+                                             (9, 5.456457138061523)],
             'vp0:linear.output:micro0/nans': [(0, 0.0),
-                                         (1, 0.0),
-                                         (2, 0.0),
-                                         (3, 0.0),
-                                         (4, 0.0),
-                                         (5, 0.0),
-                                         (6, 0.0),
-                                         (7, 0.0),
-                                         (8, 0.0),
-                                         (9, 0.0)],
+                                              (1, 0.0),
+                                              (2, 0.0),
+                                              (3, 0.0),
+                                              (4, 0.0),
+                                              (5, 0.0),
+                                              (6, 0.0),
+                                              (7, 0.0),
+                                              (8, 0.0),
+                                              (9, 0.0)],
             'vp0:linear.output:micro0/norm': [(0, 41.842655181884766),
-                                         (1, 44.40980911254883),
-                                         (2, 43.57835388183594),
-                                         (3, 45.83631134033203),
-                                         (4, 42.0673828125),
-                                         (5, 43.46839141845703),
-                                         (6, 39.77947235107422),
-                                         (7, 40.200843811035156),
-                                         (8, 44.453147888183594),
-                                         (9, 40.841522216796875)],
+                                              (1, 44.40980911254883),
+                                              (2, 43.57835388183594),
+                                              (3, 45.83631134033203),
+                                              (4, 42.0673828125),
+                                              (5, 43.46839141845703),
+                                              (6, 39.77947235107422),
+                                              (7, 40.200843811035156),
+                                              (8, 44.453147888183594),
+                                              (9, 40.841522216796875)],
             'vp0:relu.input:micro0/nans': [(0, 0.0),
-                                      (1, 0.0),
-                                      (2, 0.0),
-                                      (3, 0.0),
-                                      (4, 0.0),
-                                      (5, 0.0),
-                                      (6, 0.0),
-                                      (7, 0.0),
-                                      (8, 0.0),
-                                      (9, 0.0)],
+                                           (1, 0.0),
+                                           (2, 0.0),
+                                           (3, 0.0),
+                                           (4, 0.0),
+                                           (5, 0.0),
+                                           (6, 0.0),
+                                           (7, 0.0),
+                                           (8, 0.0),
+                                           (9, 0.0)],
             'vp0:relu.input:micro0/norm': [(0, 41.842655181884766),
-                                      (1, 44.40980911254883),
-                                      (2, 43.57835388183594),
-                                      (3, 45.83631134033203),
-                                      (4, 42.0673828125),
-                                      (5, 43.46839141845703),
-                                      (6, 39.77947235107422),
-                                      (7, 40.200843811035156),
-                                      (8, 44.453147888183594),
-                                      (9, 40.841522216796875)],
+                                           (1, 44.40980911254883),
+                                           (2, 43.57835388183594),
+                                           (3, 45.83631134033203),
+                                           (4, 42.0673828125),
+                                           (5, 43.46839141845703),
+                                           (6, 39.77947235107422),
+                                           (7, 40.200843811035156),
+                                           (8, 44.453147888183594),
+                                           (9, 40.841522216796875)],
             'vp0:relu.output:micro0/nans': [(0, 0.0),
-                                       (1, 0.0),
-                                       (2, 0.0),
-                                       (3, 0.0),
-                                       (4, 0.0),
-                                       (5, 0.0),
-                                       (6, 0.0),
-                                       (7, 0.0),
-                                       (8, 0.0),
-                                       (9, 0.0)],
+                                            (1, 0.0),
+                                            (2, 0.0),
+                                            (3, 0.0),
+                                            (4, 0.0),
+                                            (5, 0.0),
+                                            (6, 0.0),
+                                            (7, 0.0),
+                                            (8, 0.0),
+                                            (9, 0.0)],
             'vp0:relu.output:micro0/norm': [(0, 41.842655181884766),
-                                       (1, 44.40980911254883),
-                                       (2, 43.57835388183594),
-                                       (3, 45.83631134033203),
-                                       (4, 42.0673828125),
-                                       (5, 43.46839141845703),
-                                       (6, 39.77947235107422),
-                                       (7, 40.200843811035156),
-                                       (8, 44.453147888183594),
-                                       (9, 40.841522216796875)]}
+                                            (1, 44.40980911254883),
+                                            (2, 43.57835388183594),
+                                            (3, 45.83631134033203),
+                                            (4, 42.0673828125),
+                                            (5, 43.46839141845703),
+                                            (6, 39.77947235107422),
+                                            (7, 40.200843811035156),
+                                            (8, 44.453147888183594),
+                                            (9, 40.841522216796875)]}
         self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True)
-    
 
     def test_actv_grad(self):
-        data = parse_step_fn(os.path.join(timestamp_dirpath,"actv_grad_0-2.csv"))
+        data = parse_step_fn(os.path.join(self.timestamp_dirpath, "actv_grad_0-2.csv"))
         nan = np.nan
         result = {
             'vp0:.input:micro0': {
-                0: {'norm': nan, 'nans': nan}, 
-                1: {'norm': nan, 'nans': nan}, 
+                0: {'norm': nan, 'nans': nan},
+                1: {'norm': nan, 'nans': nan},
                 2: {'norm': nan, 'nans': nan}
-                }, 
+            },
             'vp0:.output:micro0': {
-                0: {'norm': 0.282843, 'nans': 0.0}, 
-                1: {'norm': 0.282617, 'nans': 0.0}, 
+                0: {'norm': 0.282843, 'nans': 0.0},
+                1: {'norm': 0.282617, 'nans': 0.0},
                 2: {'norm': 0.282655, 'nans': 0.0}
-                }, 
+            },
             'vp0:relu.input:micro0': {
-                0: {'norm': 0.282843, 'nans': 0.0}, 
-                1: {'norm': 0.282617, 'nans': 0.0}, 
+                0: {'norm': 0.282843, 'nans': 0.0},
+                1: {'norm': 0.282617, 'nans': 0.0},
                 2: {'norm': 0.282655, 'nans': 0.0}
-                }, 
+            },
             'vp0:relu.output:micro0': {
-                0: {'norm': 0.282843, 'nans': 0.0}, 
-                1: {'norm': 0.282617, 'nans': 0.0}, 
+                0: {'norm': 0.282843, 'nans': 0.0},
+                1: {'norm': 0.282617, 'nans': 0.0},
                 2: {'norm': 0.282655, 'nans': 0.0}
-                }, 
+            },
             'vp0:linear.input:micro0': {
-                0: {'norm': nan, 'nans': nan}, 
-                1: {'norm': nan, 'nans': nan}, 
+                0: {'norm': nan, 'nans': nan},
+                1: {'norm': nan, 'nans': nan},
                 2: {'norm': nan, 'nans': nan}
-                },
+            },
             'vp0:linear.output:micro0': {
-                0: {'norm': 0.282843, 'nans': 0.0}, 
-                1: {'norm': 0.282617, 'nans': 0.0}, 
+                0: {'norm': 0.282843, 'nans': 0.0},
+                1: {'norm': 0.282617, 'nans': 0.0},
                 2: {'norm': 0.282655, 'nans': 0.0}
-                }
             }
+        }
+        print(data)
         self.assertEqual(dict_equal(data, result), True)
-        
-        tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "actv_grad"))
+
+        tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "actv_grad"))
         tb_result = {
             'vp0:.input:micro0/nans': [(0, nan),
                                        (1, nan),
@@ -475,88 +470,91 @@ class TestGradMonitor(unittest.TestCase):
                                             (6, 0.28316599130630493),
                                             (7, 0.28274500370025635),
                                             (8, 0.2833530008792877),
-                                            (9, 0.2825529873371124)]}
+                                            (9, 0.2825529873371124)]
+        }
+        print(tb_data)
         self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True)
 
-    
     def test_param(self):
-        data = parse_step_fn(os.path.join(timestamp_dirpath,"param_0-2.csv"))
+        data = parse_step_fn(os.path.join(self.timestamp_dirpath, "param_0-2.csv"))
         result = {
             'vp0:linear.bias': {
                 0: {'nans': 0.0, 'norm': 2.236068},
                 1: {'nans': 0.0, 'norm': 2.236198},
                 2: {'nans': 0.0, 'norm': 2.235769}
-                },
+            },
             'vp0:linear.weight': {
                 0: {'nans': 0.0, 'norm': 7.071068},
                 1: {'nans': 0.0, 'norm': 7.068808},
                 2: {'nans': 0.0, 'norm': 7.06771}
-                }
             }
+        }
         self.assertEqual(dict_equal(data, result), True)
-        tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "param"))
+        tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "param"))
         tb_result = {
             'vp0:linear.weight/norm': [
-                (0, 7.071067810058594), 
-                (1, 7.068808078765869), 
-                (2, 7.067709922790527), 
-                (3, 7.0673418045043945), 
-                (4, 7.066926956176758), 
-                (5, 7.066311836242676), 
-                (6, 7.065629959106445), 
-                (7, 7.065262794494629), 
-                (8, 7.065001964569092), 
-                (9, 7.064840793609619)], 
+                (0, 7.071067810058594),
+                (1, 7.068808078765869),
+                (2, 7.067709922790527),
+                (3, 7.0673418045043945),
+                (4, 7.066926956176758),
+                (5, 7.066311836242676),
+                (6, 7.065629959106445),
+                (7, 7.065262794494629),
+                (8, 7.065001964569092),
+                (9, 7.064840793609619)],
             'vp0:linear.weight/nans': [
-                (0, 0.0), 
-                (1, 0.0), 
-                (2, 0.0), 
-                (3, 0.0), 
-                (4, 0.0), 
-                (5, 0.0), 
-                (6, 0.0), 
-                (7, 0.0), 
-                (8, 0.0), 
-                (9, 0.0)], 
+                (0, 0.0),
+                (1, 0.0),
+                (2, 0.0),
+                (3, 0.0),
+                (4, 0.0),
+                (5, 0.0),
+                (6, 0.0),
+                (7, 0.0),
+                (8, 0.0),
+                (9, 0.0)],
             'vp0:linear.bias/norm': [
-                (0, 2.2360680103302), 
-                (1, 2.2361979484558105), 
-                (2, 2.235769033432007), 
-                (3, 2.235903024673462), 
-                (4, 2.2360129356384277), 
-                (5, 2.2359039783477783), 
-                (6, 2.2357990741729736), 
-                (7, 2.2357349395751953), 
-                (8, 2.2356700897216797), 
-                (9, 2.235619068145752)], 
+                (0, 2.2360680103302),
+                (1, 2.2361979484558105),
+                (2, 2.235769033432007),
+                (3, 2.235903024673462),
+                (4, 2.2360129356384277),
+                (5, 2.2359039783477783),
+                (6, 2.2357990741729736),
+                (7, 2.2357349395751953),
+                (8, 2.2356700897216797),
+                (9, 2.235619068145752)
+            ],
             'vp0:linear.bias/nans': [
-                (0, 0.0), 
-                (1, 0.0), 
-                (2, 0.0), 
-                (3, 0.0), 
-                (4, 0.0), 
-                (5, 0.0), 
-                (6, 0.0), 
-                (7, 0.0), 
-                (8, 0.0), 
-                (9, 0.0)]
-            }
+                (0, 0.0),
+                (1, 0.0),
+                (2, 0.0),
+                (3, 0.0),
+                (4, 0.0),
+                (5, 0.0),
+                (6, 0.0),
+                (7, 0.0),
+                (8, 0.0),
+                (9, 0.0)
+            ]
+        }
         self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True)
 
     def test_exp_avg(self):
-        data = parse_step_fn(os.path.join(timestamp_dirpath,"exp_avg_0-2.csv"))
+        data = parse_step_fn(os.path.join(self.timestamp_dirpath, "exp_avg_0-2.csv"))
         result = {
             'vp0:linear.bias': {
                 1: {'nans': 0.0, 'norm': 0.024495},
                 2: {'nans': 0.0, 'norm': 0.052203}
-                },
+            },
             'vp0:linear.weight': {
                 1: {'nans': 0.0, 'norm': 0.052394},
                 2: {'nans': 0.0, 'norm': 0.099221}
-                }
             }
+        }
         self.assertEqual(dict_equal(data, result), True)
-        tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "exp_avg"))
+        tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "exp_avg"))
         tb_result = {
             'vp0:linear.bias/nans': [(1, 0.0),
                                      (2, 0.0),
@@ -597,19 +595,19 @@ class TestGradMonitor(unittest.TestCase):
         self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True)
 
     def test_exp_avg_sq(self):
-        data = parse_step_fn(os.path.join(timestamp_dirpath,"exp_avg_sq_0-2.csv"))
+        data = parse_step_fn(os.path.join(self.timestamp_dirpath, "exp_avg_sq_0-2.csv"))
         result = {
             'vp0:linear.bias': {
                 1: {'nans': 0.0, 'norm': 4.2e-05},
                 2: {'nans': 0.0, 'norm': 9.6e-05}
-                },
+            },
             'vp0:linear.weight': {
                 1: {'nans': 0.0, 'norm': 6.7e-05},
                 2: {'nans': 0.0, 'norm': 0.000126}
-                }
             }
+        }
         self.assertEqual(dict_equal(data, result), True)
-        tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "exp_avg_sq"))
+        tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "exp_avg_sq"))
         tb_result = {
             'vp0:linear.bias/nans': [(1, 0.0),
                                      (2, 0.0),
@@ -648,23 +646,23 @@ class TestGradMonitor(unittest.TestCase):
                                        (8, 0.00028700000257231295),
                                        (9, 0.0003060000017285347)]}
         self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True)
-    
+
     def test_grad_reduced(self):
-        data = parse_step_fn(os.path.join(timestamp_dirpath,"grad_reduced_0-2.csv"))
+        data = parse_step_fn(os.path.join(self.timestamp_dirpath, "grad_reduced_0-2.csv"))
         result = {
             'vp0:linear.bias': {
                 0: {'nans': 0.0, 'norm': 0.244949},
                 1: {'nans': 0.0, 'norm': 0.314345},
                 2: {'nans': 0.0, 'norm': 0.281475}
-                },
+            },
             'vp0:linear.weight': {
                 0: {'nans': 0.0, 'norm': 0.523935},
                 1: {'nans': 0.0, 'norm': 0.595672},
                 2: {'nans': 0.0, 'norm': 0.497603}
-                }
             }
+        }
         self.assertEqual(dict_equal(data, result), True)
-        tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "grad_reduced"))
+        tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "grad_reduced"))
         tb_result = {
             'vp0:linear.bias/nans': [(0, 0.0),
                                      (1, 0.0),
@@ -707,24 +705,24 @@ class TestGradMonitor(unittest.TestCase):
                                        (8, 0.3234719932079315),
                                        (9, 0.32385098934173584)]}
         self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True)
-        
+
     def test_grad_unreduced(self):
-        data = parse_step_fn(os.path.join(timestamp_dirpath,"grad_unreduced_0-2.csv"))
+        data = parse_step_fn(os.path.join(self.timestamp_dirpath, "grad_unreduced_0-2.csv"))
         result = {
             'vp0:linear.bias': {
                 0: {'nans': 0.0, 'norm': 0.244949},
                 1: {'nans': 0.0, 'norm': 0.314345},
                 2: {'nans': 0.0, 'norm': 0.281475}
-                },
+            },
             'vp0:linear.weight': {
                 0: {'nans': 0.0, 'norm': 0.523935},
                 1: {'nans': 0.0, 'norm': 0.595672},
                 2: {'nans': 0.0, 'norm': 0.497603}
-                }
             }
+        }
         self.assertEqual(dict_equal(data, result), True)
 
-        tb_data = extract_scalars_from_tensorboard(os.path.join(csv2tb_dirpath, "grad_unreduced"))
+        tb_data = extract_scalars_from_tensorboard(os.path.join(self.csv2tb_dirpath, "grad_unreduced"))
         tb_result = {
             'vp0:linear.bias/nans': [(0, 0.0),
                                      (1, 0.0),
@@ -767,3 +765,7 @@ class TestGradMonitor(unittest.TestCase):
                                        (8, 0.3234719932079315),
                                        (9, 0.32385098934173584)]}
         self.assertEqual(compare_scalar_dicts(tb_data, tb_result), True)
+
+
+if __name__ == '__main__':
+    unittest.main()