diff --git a/ACL_PyTorch/built-in/audio/whisper/README.md b/ACL_PyTorch/built-in/audio/whisper/README.md index 0e2b164d0380ddd2a9d948133e8aa2e67a3ec707..b7217d973c82708ada55cb884de5602b131f77d6 100644 --- a/ACL_PyTorch/built-in/audio/whisper/README.md +++ b/ACL_PyTorch/built-in/audio/whisper/README.md @@ -95,27 +95,11 @@ infer.py推理参数: warmup结束之后,开始推理librispeech_asr_dummy数据集,推理过程中会打屏输出E2E性能,推理结束后会输出WER精度得分。 -**如果你想推理过程中打印encode和decode的耗时,你可以执行以下命令:** -```SHELL -# 1. 找到当前的环境路径(简称${location}),Location后面的那一串就是当前环境路径 -pip show openai-whisper | grep Location -# 2. 记录当前whisper库decoding.py的文件路径 -${decoding_path} = ${location}/whisper/decoding.py -# 3. 执行patch文件 -patch -p1 < whisper_decoding.patch -# 可能会提示你 -# cant find file to patch at input line 3 -# ... -# File to patch: -# 这时候需要你手动指定文件路径,输入之前得到的 -${decoding_path} -# 按回车,提示 patching file ${decoding_path} 即成功 -``` ## 性能数据 在librispeech_asr_dummy/clean数据集上的性能如下: - | 模型 | 芯片 | 平均encode | 平均decode |平均E2E | - |---------|------------|----------|-----------------|---------| - | whisper | 800I A2 | 0.90ms | 3.25ms | 67.32ms | - 注:平均decode 指在decode阶段,生成单个token的平均耗时。 \ No newline at end of file + | 模型 | 芯片 | RTF | + |---------|------------|----------| + | whisper | 800I A2 | 0.0236 | + 注:RTF表示转录一段音频所需的时间与音频实际长度的比值,多次运行取平均 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/whisper/infer.py b/ACL_PyTorch/built-in/audio/whisper/infer.py index ba5da6fa131bd9a0b799ac2c47f64b9c65f3767b..0617aa7eb0e72269246468c6a4f1d43aaa485316 100644 --- a/ACL_PyTorch/built-in/audio/whisper/infer.py +++ b/ACL_PyTorch/built-in/audio/whisper/infer.py @@ -17,6 +17,7 @@ import jiwer import numpy as np import pandas as pd from datasets import load_dataset +import librosa import torch from torch import nn, Tensor @@ -279,6 +280,12 @@ if __name__ == '__main__': npu_backend = tng.get_npu_backend(compiler_config=config) dataset = LibriSpeechDataset(wsp_args.speech_path, device=device) + audios = load_dataset(wsp_args.speech_path, split="validation") + duration_seconds = 0 + for audio in audios: + y, audio_sr = audio["audio"]["array"], audio["audio"]["sampling_rate"] + duration_seconds += librosa.get_duration(y=y, sr=audio_sr) + loader = torch.utils.data.DataLoader(dataset, batch_size=wsp_args.batch_size) options = whisper.DecodingOptions(language='en', without_timestamps=True, fp16=True) @@ -300,5 +307,7 @@ if __name__ == '__main__': print("{}/{} - {}".format(_step, wsp_args.warmup, result[bs].text)) print("LibriSpeech infer, English to English TRANSCRIBE ...") + start_time = time.time() p_wer = libri_speech_infer(wsp_model, options, loader) + print(f"RTF: {(time.time()-start_time)/duration_seconds:.4f}") print(f"LibriSpeech infer WER score = {p_wer * 100:.2f} %") diff --git a/ACL_PyTorch/built-in/cv/GroundingDINO/README.md b/ACL_PyTorch/built-in/cv/GroundingDINO/README.md index f0f418ba3cac2a0da9320f29241d9980d04fcc64..b4e3bb1cf634ed87cd56dcee6db153b026253444 100644 --- a/ACL_PyTorch/built-in/cv/GroundingDINO/README.md +++ b/ACL_PyTorch/built-in/cv/GroundingDINO/README.md @@ -120,7 +120,7 @@ python demo/image_demo_npu.py images/animals.png configs/mm_grounding_dino/grounding_dino_swin-b_pretrain_obj365_goldg_v3det.py --weight weights/grounding_dino_swin-b_pretrain_obj365_goldg_v3de-f83eef00.pth --texts '$: coco' --device npu (--loop 10) # 执行视频推理命令 - python demo/video_demo_npu.py demo/demo_mot.mp4 configs/mm_grounding_dino/grounding_dino_swin-b_pretrain_obj365_goldg_v3det.py weights/grounding_dino_swin-b_pretrain_obj365_goldg_v3de-f83eef00.pth (--batch_size 16) + python demo/video_demo_npu.py demo/demo.mp4 configs/mm_grounding_dino/grounding_dino_swin-b_pretrain_obj365_goldg_v3det.py weights/grounding_dino_swin-b_pretrain_obj365_goldg_v3de-f83eef00.pth (--batch_size 16) ``` 在推理开始后,首先会默认执行warm_up,目的是执行首次编译,首次编译时间较长,在warm_up结束后,会执行推理操作,并打屏计算结果和性能数据。 diff --git a/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py b/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py index aa081657c35395a2fd26bb1ae0b8008682040706..f75a18229ca2074ab4bcebac0f27d16d457d76c0 100644 --- a/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py +++ b/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py @@ -207,7 +207,7 @@ def main(): video_tools = init_video_tools(args, model) video_reader = video_tools.get("video_reader") if len(video_reader) < batch_size: - raise AssertionError(f"batch_size must be greater than video frame len, " + raise AssertionError(f"video frame len cannot be less than batch_size, " f"now frame len: {len(video_reader)}, batch_size: {batch_size}") # tokenizer diff --git a/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md b/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md index bfe51c10d1841c6e1d0946686c7f7d1568e4a813..9fb490660477b71b45b7a41834ad9761e3425a87 100644 --- a/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md +++ b/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md @@ -211,7 +211,7 @@ 1. 安装ais_bench推理工具。 - 请访问[ais_bench推理工具](https://gitee.com/ascend/tools/tree/master/ais-bench_workload/tool/ais_infer)代码仓,根据readme文档进行工具安装。 + 请访问[ais_bench推理工具](https://gitee.com/ascend/tools/tree/master/ais-bench_workload/tool/ais_bench)代码仓,根据readme文档进行工具安装。 2. 执行推理。 ```