diff --git a/dynolog_npu/README.md b/dynolog_npu/README.md index 86a23b7f82925079c26623b070936538768d9b8c..72d1664752cb0526e67a6a2f55af334c06d6efff 100644 --- a/dynolog_npu/README.md +++ b/dynolog_npu/README.md @@ -97,6 +97,7 @@ nputrace子命令支持的参数选项 | profile-start-time | u64 | 用于同步采集的Unix时间戳,单位毫秒,默认值0,dynolog原生参数 | | duration-ms | u64 | 采集的周期,单位毫秒,默认值500,dynolog原生参数 | | iterations | i64 | 采集总迭代数,默认值-1,dynolog原生参数 | +| warmup | u64 | 采集前预热的迭代数,默认值0,默认不预热 | | log-file | String | 采集落盘的路径,必选值 | | start-step | u64 | 开始采集的迭代数,默认值0 | | record-shapes | action | 是否采集算子的InputShapes和InputTypes,设置参数采集,默认不采集 | @@ -154,6 +155,9 @@ dyno nputrace --start-step 10 --iterations 2 --activities NPU --analyse --data-s # 示例3:从第10个step开始采集,采集2个step,只采集CANN和device数据,只采集不解析,落盘路径为/tmp/profile_data dyno nputrace --start-step 10 --iterations 2 --activities NPU --log-file /tmp/profile_data + +# 示例4:从第10个step开始预热1个step,采集2个step,只采集CANN和device数据,只采集不解析,落盘路径为/tmp/profile_data +dyno nputrace --start-step 10 --iterations 2 --warmup 1 --activities NPU --log-file /tmp/profile_data ``` ### NPU Monitor功能 diff --git a/dynolog_npu/dynolog_npu/cli/src/commands/nputrace.rs b/dynolog_npu/dynolog_npu/cli/src/commands/nputrace.rs index f70923bca4cc5ce29a8855a464c411b63a930ef0..cfc53a624d962664d1c2f3732b4873914ecf0953 100644 --- a/dynolog_npu/dynolog_npu/cli/src/commands/nputrace.rs +++ b/dynolog_npu/dynolog_npu/cli/src/commands/nputrace.rs @@ -15,6 +15,7 @@ pub enum NpuTraceTriggerConfig { IterationBased { start_step: u64, iterations: i64, + warmup: u64, }, } @@ -31,11 +32,13 @@ impl NpuTraceTriggerConfig { NpuTraceTriggerConfig::IterationBased { start_step, iterations, + warmup, } => format!( r#"PROFILE_START_ITERATION=0 PROFILE_START_STEP={} -ACTIVITIES_ITERATIONS={}"#, - start_step, iterations +ACTIVITIES_ITERATIONS={} +WARMUP_ITERATIONS={}"#, + start_step, iterations, warmup ), } } @@ -185,14 +188,16 @@ ACTIVITIES_DURATION_MSECS=1000"# ); let trigger_config = NpuTraceTriggerConfig::IterationBased { - profile_start_step: 1000, + start_step: 1000, iterations: 1000, + warmup: 1000, }; assert_eq!( trigger_config.config(), r#"PROFILE_START_ITERATION=0 PROFILE_START_STEP=1000 -ACTIVITIES_ITERATIONS=1000"# +ACTIVITIES_ITERATIONS=1000 +WARMUP_ITERATIONS=1000"# ); } diff --git a/dynolog_npu/dynolog_npu/cli/src/main.rs b/dynolog_npu/dynolog_npu/cli/src/main.rs index 9fdea3d1254467081356b2e0daeb8ed3ca05a16d..d1a485ded1ba9169d07547f337ee14737bd71157 100644 --- a/dynolog_npu/dynolog_npu/cli/src/main.rs +++ b/dynolog_npu/dynolog_npu/cli/src/main.rs @@ -136,6 +136,9 @@ enum Command { /// Number of steps to start profile. #[clap(long, default_value_t = 0)] start_step: u64, + /// Number of steps to warmup. + #[clap(long, default_value_t = 0)] + warmup: u64, /// Max number of processes to profile. #[clap(long, default_value_t = 3)] process_limit: u32, @@ -281,6 +284,7 @@ fn main() -> Result<()> { iterations, profile_start_time, start_step, + warmup, process_limit, record_shapes, profile_memory, @@ -302,6 +306,7 @@ fn main() -> Result<()> { NpuTraceTriggerConfig::IterationBased { start_step, iterations, + warmup, } } else { NpuTraceTriggerConfig::DurationBased {