diff --git a/ACL_PyTorch/built-in/audio/whisper/README.md b/ACL_PyTorch/built-in/audio/whisper/README.md
index 0e2b164d0380ddd2a9d948133e8aa2e67a3ec707..b7217d973c82708ada55cb884de5602b131f77d6 100644
--- a/ACL_PyTorch/built-in/audio/whisper/README.md
+++ b/ACL_PyTorch/built-in/audio/whisper/README.md
@@ -95,27 +95,11 @@ infer.py推理参数：
 
 warmup结束之后，开始推理librispeech_asr_dummy数据集，推理过程中会打屏输出E2E性能，推理结束后会输出WER精度得分。
 
-**如果你想推理过程中打印encode和decode的耗时，你可以执行以下命令：**
-```SHELL
-# 1. 找到当前的环境路径（简称${location}），Location后面的那一串就是当前环境路径
-pip show openai-whisper | grep Location
-# 2. 记录当前whisper库decoding.py的文件路径
-${decoding_path} = ${location}/whisper/decoding.py
-# 3. 执行patch文件
-patch -p1 < whisper_decoding.patch
-# 可能会提示你
-# cant find file to patch at input line 3
-# ...
-# File to patch:
-# 这时候需要你手动指定文件路径，输入之前得到的
-${decoding_path}
-# 按回车，提示 patching file ${decoding_path} 即成功
-```
 
 ## 性能数据
   在librispeech_asr_dummy/clean数据集上的性能如下：
 
-   | 模型      | 芯片         | 平均encode | 平均decode |平均E2E   |
-   |---------|------------|----------|-----------------|---------|
-   | whisper | 800I A2    | 0.90ms   | 3.25ms          | 67.32ms |
-    注：平均decode 指在decode阶段，生成单个token的平均耗时。
\ No newline at end of file
+   | 模型      | 芯片         | RTF |
+   |---------|------------|----------|
+   | whisper | 800I A2    | 0.0236 |
+    注：RTF表示转录一段音频所需的时间与音频实际长度的比值，多次运行取平均
\ No newline at end of file
diff --git a/ACL_PyTorch/built-in/audio/whisper/infer.py b/ACL_PyTorch/built-in/audio/whisper/infer.py
index ba5da6fa131bd9a0b799ac2c47f64b9c65f3767b..0617aa7eb0e72269246468c6a4f1d43aaa485316 100644
--- a/ACL_PyTorch/built-in/audio/whisper/infer.py
+++ b/ACL_PyTorch/built-in/audio/whisper/infer.py
@@ -17,6 +17,7 @@ import jiwer
 import numpy as np
 import pandas as pd
 from datasets import load_dataset
+import librosa
 
 import torch
 from torch import nn, Tensor
@@ -279,6 +280,12 @@ if __name__ == '__main__':
     npu_backend = tng.get_npu_backend(compiler_config=config)
 
     dataset = LibriSpeechDataset(wsp_args.speech_path, device=device)
+    audios = load_dataset(wsp_args.speech_path, split="validation")
+    duration_seconds = 0
+    for audio in audios:
+        y, audio_sr = audio["audio"]["array"], audio["audio"]["sampling_rate"]
+        duration_seconds += librosa.get_duration(y=y, sr=audio_sr)
+
     loader = torch.utils.data.DataLoader(dataset, batch_size=wsp_args.batch_size)
     options = whisper.DecodingOptions(language='en', without_timestamps=True, fp16=True)
 
@@ -300,5 +307,7 @@ if __name__ == '__main__':
                 print("{}/{} - {}".format(_step, wsp_args.warmup, result[bs].text))
 
         print("LibriSpeech infer, English to English TRANSCRIBE ...")
+        start_time = time.time()
         p_wer = libri_speech_infer(wsp_model, options, loader)
+        print(f"RTF: {(time.time()-start_time)/duration_seconds:.4f}")
         print(f"LibriSpeech infer WER score =  {p_wer * 100:.2f} %")
diff --git a/ACL_PyTorch/built-in/cv/GroundingDINO/README.md b/ACL_PyTorch/built-in/cv/GroundingDINO/README.md
index f0f418ba3cac2a0da9320f29241d9980d04fcc64..b4e3bb1cf634ed87cd56dcee6db153b026253444 100644
--- a/ACL_PyTorch/built-in/cv/GroundingDINO/README.md
+++ b/ACL_PyTorch/built-in/cv/GroundingDINO/README.md
@@ -120,7 +120,7 @@
       python demo/image_demo_npu.py images/animals.png configs/mm_grounding_dino/grounding_dino_swin-b_pretrain_obj365_goldg_v3det.py --weight weights/grounding_dino_swin-b_pretrain_obj365_goldg_v3de-f83eef00.pth --texts '$: coco' --device npu (--loop 10)
 
       # 执行视频推理命令
-      python demo/video_demo_npu.py demo/demo_mot.mp4 configs/mm_grounding_dino/grounding_dino_swin-b_pretrain_obj365_goldg_v3det.py weights/grounding_dino_swin-b_pretrain_obj365_goldg_v3de-f83eef00.pth (--batch_size 16)
+      python demo/video_demo_npu.py demo/demo.mp4 configs/mm_grounding_dino/grounding_dino_swin-b_pretrain_obj365_goldg_v3det.py weights/grounding_dino_swin-b_pretrain_obj365_goldg_v3de-f83eef00.pth (--batch_size 16)
       ```
       在推理开始后，首先会默认执行warm_up，目的是执行首次编译，首次编译时间较长，在warm_up结束后，会执行推理操作，并打屏计算结果和性能数据。
 
diff --git a/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py b/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py
index aa081657c35395a2fd26bb1ae0b8008682040706..f75a18229ca2074ab4bcebac0f27d16d457d76c0 100644
--- a/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py
+++ b/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py
@@ -207,7 +207,7 @@ def main():
     video_tools = init_video_tools(args, model)
     video_reader = video_tools.get("video_reader")
     if len(video_reader) < batch_size:
-        raise AssertionError(f"batch_size must be greater than video frame len, "
+        raise AssertionError(f"video frame len cannot be less than batch_size, "
                              f"now frame len: {len(video_reader)}, batch_size: {batch_size}")
 
     # tokenizer
diff --git a/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md b/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md
index bfe51c10d1841c6e1d0946686c7f7d1568e4a813..9fb490660477b71b45b7a41834ad9761e3425a87 100644
--- a/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md
+++ b/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md
@@ -211,7 +211,7 @@
 
    1. 安装ais_bench推理工具。
 
-      请访问[ais_bench推理工具](https://gitee.com/ascend/tools/tree/master/ais-bench_workload/tool/ais_infer)代码仓，根据readme文档进行工具安装。
+      请访问[ais_bench推理工具](https://gitee.com/ascend/tools/tree/master/ais-bench_workload/tool/ais_bench)代码仓，根据readme文档进行工具安装。
 
    2. 执行推理。
         ```