diff --git a/torch_npu/profiler/dynamic_profile.py b/torch_npu/profiler/dynamic_profile.py index 99bfd76f7263e115b2839e07a4bc8948b8969f64..313fe7d388f078fed4a63ffd668f531136a132a7 100644 --- a/torch_npu/profiler/dynamic_profile.py +++ b/torch_npu/profiler/dynamic_profile.py @@ -3,6 +3,7 @@ import json import atexit import time +from ..npu import mstx, current_stream from .profiler import tensorboard_trace_handler, profile from .scheduler import Schedule as schedule @@ -38,6 +39,7 @@ class _DynamicProfile: self._step_record_time = None self._step_time = 0 self._min_poll_interval = 1 + self._step_mstx_range_id = 0 def init(self): if self.repeat_init: @@ -78,6 +80,9 @@ class _DynamicProfile: self._step_time = max(self._min_poll_interval, int(time.time() - self._step_record_time)) self._dynamic_monitor.modify_step_time(self._step_time) if self.prof: + if self._step_mstx_range_id: + mstx.range_end(self._step_mstx_range_id) + self._step_mstx_range_id = mstx.range_start(f"step {self.cur_step}", current_stream()) self.prof.step() self.step_num -= 1 if 0 == self.step_num: @@ -138,7 +143,9 @@ class _DynamicProfile: with_modules=self.cfg_ctx.with_modules, experimental_config=self.cfg_ctx.experimental_config ) + self.prof._set_step_num_offset_for_dynamic_prof(self.cur_step) self.prof.start() + self._step_mstx_range_id = mstx.range_start(f"step {self.cur_step}", current_stream()) for key, value in self.cfg_ctx.meta_data().items(): self.prof.add_metadata_json(str(key), json.dumps(value)) DynamicProfilerUtils.out_log("Start Dynamic Profiler at {} step.".format( diff --git a/torch_npu/profiler/profiler.py b/torch_npu/profiler/profiler.py index 409013114a8302ad7a7130387ad17352479968f6..65fbf5b03863adc1fda618cf83bdb778c7755192 100644 --- a/torch_npu/profiler/profiler.py +++ b/torch_npu/profiler/profiler.py @@ -229,6 +229,7 @@ class profile(_KinetoProfile): self.on_trace_ready = on_trace_ready self.step_num = 0 self.current_action = self.schedule(self.step_num) + self._step_num_offset = 0 self.step_rec_fn: Optional[prof.record_function] = None if use_cuda is not None: print_warn_msg("This is npu environment, use_cuda is invalid") @@ -249,6 +250,10 @@ class profile(_KinetoProfile): if self.stopped == False: self.stop() + @no_exception_func() + def _set_step_num_offset_for_dynamic_prof(self, step: int): + self._step_num_offset = step + @no_exception_func() def start(self): self.stopped = False @@ -256,7 +261,7 @@ class profile(_KinetoProfile): ProfPathCreator().init(export_only_mode=True) self.action_controller.transit_action(ProfilerAction.NONE, self.current_action) if self.record_steps: - self.step_rec_fn = prof.record_function("ProfilerStep#" + str(self.step_num)) + self.step_rec_fn = prof.record_function("ProfilerStep#" + str(self.step_num + self._step_num_offset)) self.step_rec_fn.__enter__() @no_exception_func()