From c09f09581043e91bbecfab427f0128c53685c06a Mon Sep 17 00:00:00 2001 From: zhongwei_h <14500020+zhongweih@user.noreply.gitee.com> Date: Tue, 4 Jun 2024 10:10:44 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E4=B8=8B=E8=BD=BD?= =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=92=8C=E4=BE=9D=E8=B5=96=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/audio/deepspeech/README.md | 337 ++++++++-------- PyTorch/contrib/audio/deepspeech/data/an4.py | 373 +++++++++--------- .../contrib/audio/deepspeech/requirements.txt | 40 +- 3 files changed, 371 insertions(+), 379 deletions(-) diff --git a/PyTorch/contrib/audio/deepspeech/README.md b/PyTorch/contrib/audio/deepspeech/README.md index 68d979cc46..16dea517e3 100644 --- a/PyTorch/contrib/audio/deepspeech/README.md +++ b/PyTorch/contrib/audio/deepspeech/README.md @@ -1,168 +1,169 @@ -# DeepSpeech for PyTorch - -- [概述](概述.md) -- [准备训练环境](准备训练环境.md) -- [开始训练](开始训练.md) -- [训练结果展示](训练结果展示.md) -- [版本说明](版本说明.md) - - - -# 概述 - -## 简述 - -DeepSpeech2是一个建立在端到端深度学习之上,将大多数模块替换为单个模型的第二代ASR语音系统。其ASR管道在几个基准上的精确度接近甚至超过了Amazon人工的精度,可以再多种语言下工作,并且可以部署在生产环境中。 - -- 参考实现: - - ``` - url=https://github.com/SeanNaren/deepspeech.pytorch - commit_id=b00d17387ca47b05b8a3c0ccc91a133eb4966b40 - ``` - -- 适配昇腾 AI 处理器的实现: - - ``` - url=https://gitee.com/ascend/ModelZoo-PyTorch.git - code_path=PyTorch/contrib/audio - ``` - - -# 准备训练环境 - -## 准备环境 - -- 当前模型支持的 PyTorch 版本和已知三方库依赖如下表所示。 - - **表 1** 版本支持表 - - | Torch_Version | 三方库依赖版本 | - | :--------: | :----------------------------------------------------------: | - | PyTorch 1.5 | - | - | PyTorch 1.8 | - | - -- 环境准备指导。 - - 请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。 - -- 编译安装wrap-ctc模块。 - - ```shell - ### npu环境变量 - source {deepspeech_root}/test/env_npu.sh - git clone https://github.com/SeanNaren/warp-ctc.git - cd warp-ctc - git checkout -b pytorch_bindings origin/pytorch_bindings - mkdir build; cd build; cmake ..; make - cd ../pytorch_binding && python3 setup.py install - ``` - -- 安装依赖。 - - ``` - pip install -r requirements.txt - ``` - -- 如果需要在多机或者多卡上训练该模型,那么需要按照以下步骤安装 `etcd`。 - - ```shell - sudo apt-get install etcd - sudo apt-get install sox - ``` - - -## 准备数据集 - -1. 获取数据集。 - - ```shell - cd data - python3 an4.py - ``` - -2. 或者您还可以自行下载数据集解压至源码包根目录下的 `data/` 文件夹下。 - - 数据集目录结构参考如下所示。 - ``` - ├── data - ├──an4_train_manifest.csv - ├──an4_val_manifest.csv - ├──an4_test_manifest.csv - ├──an4_dataset - ├──train - ├──val - ├──test - ``` - > **说明:** - >该数据集的训练过程脚本只作为一种参考示例。 - -# 开始训练 - -## 训练模型 - -1. 进入解压后的源码包根目录。 - - ``` - cd /${模型文件夹名称} - ``` - -2. 运行训练脚本。 - - 该模型支持单机单卡训练和单机8卡训练。 - - - 单机单卡训练 - - 启动单卡训练。 - - ``` - bash ./test/train_full_1p.sh --data_path=./data/ # 单卡精度 - bash ./test/train_performance_1p.sh --data_path=./data/ # 单卡性能 - ``` - - - 单机8卡训练 - - 启动8卡训练。 - - ``` - bash ./test/train_full_8p.sh --data_path=./data/ # 8卡精度 - bash ./test/train_performance_8p.sh --data_path=./data/ # 8卡性能 - ``` - - --data_path参数填写数据集路径,需写到数据集的一级目录。 - - 模型训练脚本参数说明如下。 - - ``` - --data.num_workers //加载数据进程数 - --training.epochs //重复训练次数 - --data.batch_size //训练批次大小,默认:240 - --optim.learning_rate //初始学习率,默认:1 - --apex.loss_scale //混合精度lossscale大小 - ``` - -# 训练结果展示 - -**表 2** 训练结果展示表 - -| NAME | WER | CER | FPS | Epochs | AMP_Type | Torch_Version | -| :---: | :----: | :----: | :----: | :--: | :--: | :--: | -| 1P-竞品V | 10.349 | 7.076 | 94 | 70 | O2 | 1.5 | -| 8P-竞品V | 15.265 | 9.834 | 377 | 70 | O2 | 1.5 | -| 1P-NPU | 9.444 | 5.723 | 4 | 70 | O2 | 1.8 | -| 8P-NPU | 17.464 | 10.926 | 22 | 70 | O2 | 1.8 | - - -# 版本说明 - -## 变更 - -2022.12.20:整改Readme,重新发布。 - -## FAQ - -无。 - -# 公网地址说明 - -代码涉及公网地址参考 public_address_statement.md +# DeepSpeech for PyTorch + +- [概述](概述.md) +- [准备训练环境](准备训练环境.md) +- [开始训练](开始训练.md) +- [训练结果展示](训练结果展示.md) +- [版本说明](版本说明.md) + + + +# 概述 + +## 简述 + +DeepSpeech2是一个建立在端到端深度学习之上,将大多数模块替换为单个模型的第二代ASR语音系统。其ASR管道在几个基准上的精确度接近甚至超过了Amazon人工的精度,可以再多种语言下工作,并且可以部署在生产环境中。 + +- 参考实现: + + ``` + url=https://github.com/SeanNaren/deepspeech.pytorch + commit_id=b00d17387ca47b05b8a3c0ccc91a133eb4966b40 + ``` + +- 适配昇腾 AI 处理器的实现: + + ``` + url=https://gitee.com/ascend/ModelZoo-PyTorch.git + code_path=PyTorch/contrib/audio + ``` + + +# 准备训练环境 + +## 准备环境 + +- 当前模型支持的 PyTorch 版本和已知三方库依赖如下表所示。 + + **表 1** 版本支持表 + + | Torch_Version | 三方库依赖版本 | + | :--------: | :----------------------------------------------------------: | + | PyTorch 1.5 | - | + | PyTorch 1.8 | - | + | PyTorch 1.11 | numba < 0.50.0 | + +- 环境准备指导。 + + 请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。 + +- 编译安装wrap-ctc模块。 + + ```shell + ### npu环境变量 + source {deepspeech_root}/test/env_npu.sh + git clone https://github.com/SeanNaren/warp-ctc.git + cd warp-ctc + git checkout -b pytorch_bindings origin/pytorch_bindings + mkdir build; cd build; cmake ..; make + cd ../pytorch_binding && python3 setup.py install + ``` + +- 安装依赖。 + + ``` + pip install -r requirements.txt + ``` + +- 如果需要在多机或者多卡上训练该模型,那么需要按照以下步骤安装 `etcd`。 + + ```shell + sudo apt-get install etcd + sudo apt-get install sox + ``` + + +## 准备数据集 + +1. 获取数据集。 + + ```shell + cd data + python3 an4.py + ``` + +2. 或者您还可以自行下载数据集解压至源码包根目录下的 `data/` 文件夹下。 + + 数据集目录结构参考如下所示。 + ``` + ├── data + ├──an4_train_manifest.csv + ├──an4_val_manifest.csv + ├──an4_test_manifest.csv + ├──an4_dataset + ├──train + ├──val + ├──test + ``` + > **说明:** + >该数据集的训练过程脚本只作为一种参考示例。 + +# 开始训练 + +## 训练模型 + +1. 进入解压后的源码包根目录。 + + ``` + cd /${模型文件夹名称} + ``` + +2. 运行训练脚本。 + + 该模型支持单机单卡训练和单机8卡训练。 + + - 单机单卡训练 + + 启动单卡训练。 + + ``` + bash ./test/train_full_1p.sh --data_path=./data/ # 单卡精度 + bash ./test/train_performance_1p.sh --data_path=./data/ # 单卡性能 + ``` + + - 单机8卡训练 + + 启动8卡训练。 + + ``` + bash ./test/train_full_8p.sh --data_path=./data/ # 8卡精度 + bash ./test/train_performance_8p.sh --data_path=./data/ # 8卡性能 + ``` + + --data_path参数填写数据集路径,需写到数据集的一级目录。 + + 模型训练脚本参数说明如下。 + + ``` + --data.num_workers //加载数据进程数 + --training.epochs //重复训练次数 + --data.batch_size //训练批次大小,默认:240 + --optim.learning_rate //初始学习率,默认:1 + --apex.loss_scale //混合精度lossscale大小 + ``` + +# 训练结果展示 + +**表 2** 训练结果展示表 + +| NAME | WER | CER | FPS | Epochs | AMP_Type | Torch_Version | +| :---: | :----: | :----: | :----: | :--: | :--: | :--: | +| 1P-竞品V | 10.349 | 7.076 | 94 | 70 | O2 | 1.5 | +| 8P-竞品V | 15.265 | 9.834 | 377 | 70 | O2 | 1.5 | +| 1P-NPU | 9.444 | 5.723 | 4 | 70 | O2 | 1.8 | +| 8P-NPU | 17.464 | 10.926 | 22 | 70 | O2 | 1.8 | + + +# 版本说明 + +## 变更 + +2024.06.04:整改Readme,重新发布。 + +## FAQ + +镜像推荐使用ubuntu 18.04 + +# 公网地址说明 + +代码涉及公网地址参考 public_address_statement.md diff --git a/PyTorch/contrib/audio/deepspeech/data/an4.py b/PyTorch/contrib/audio/deepspeech/data/an4.py index 2890c86c34..a9b568befd 100644 --- a/PyTorch/contrib/audio/deepspeech/data/an4.py +++ b/PyTorch/contrib/audio/deepspeech/data/an4.py @@ -1,191 +1,182 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import io -import shutil -import tarfile -import sys - -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from sklearn.model_selection import train_test_split -import wget - -from deepspeech_pytorch.data.data_opts import add_data_opts -from deepspeech_pytorch.data.utils import create_manifest - - -def _format_training_data(root_path, - val_fraction, - sample_rate, - target_dir): - wav_path = root_path + 'wav/' - file_ids_path = root_path + 'etc/an4_train.fileids' - transcripts_path = root_path + 'etc/an4_train.transcription' - root_wav_path = wav_path + 'an4_clstk' - - _convert_audio_to_wav(an4_audio_path=root_wav_path, - sample_rate=sample_rate) - file_ids, transcripts = _retrieve_file_ids_and_transcripts(file_ids_path, transcripts_path) - - split_files = train_test_split(file_ids, transcripts, test_size=val_fraction) - train_file_ids, val_file_ids, train_transcripts, val_transcripts = split_files - - _save_wav_transcripts(data_type='train', - file_ids=train_file_ids, - transcripts=train_transcripts, - wav_dir=wav_path, - target_dir=target_dir) - _save_wav_transcripts(data_type='val', - file_ids=val_file_ids, - transcripts=val_transcripts, - wav_dir=wav_path, - target_dir=target_dir) - - -def _format_test_data(root_path, - sample_rate, - target_dir): - wav_path = root_path + 'wav/' - file_ids_path = root_path + 'etc/an4_test.fileids' - transcripts_path = root_path + 'etc/an4_test.transcription' - root_wav_path = wav_path + 'an4test_clstk' - - _convert_audio_to_wav(an4_audio_path=root_wav_path, - sample_rate=sample_rate) - file_ids, transcripts = _retrieve_file_ids_and_transcripts(file_ids_path, transcripts_path) - - _save_wav_transcripts(data_type='test', - file_ids=file_ids, - transcripts=transcripts, - wav_dir=wav_path, - target_dir=target_dir) - - -def _save_wav_transcripts(data_type, - file_ids, - transcripts, - wav_dir, - target_dir): - data_path = os.path.join(target_dir, data_type + '/an4/') - new_transcript_dir = data_path + '/txt/' - new_wav_dir = data_path + '/wav/' - - os.makedirs(new_transcript_dir) - os.makedirs(new_wav_dir) - - _save_files(file_ids=file_ids, - transcripts=transcripts, - wav_dir=wav_dir, - new_wav_dir=new_wav_dir, - new_transcript_dir=new_transcript_dir) - - -def _convert_audio_to_wav(an4_audio_path, sample_rate): - with os.popen('find %s -type f -name "*.raw"' % an4_audio_path) as pipe: - for line in pipe: - raw_path = line.strip() - new_path = line.replace('.raw', '.wav').strip() - cmd = 'sox -t raw -r %d -b 16 -e signed-integer -B -c 1 \"%s\" \"%s\"' % ( - sample_rate, raw_path, new_path) - os.system(cmd) - - -def _save_files(file_ids, transcripts, wav_dir, new_wav_dir, new_transcript_dir): - for file_id, transcript in zip(file_ids, transcripts): - path = wav_dir + file_id.strip() + '.wav' - filename = path.split('/')[-1] - extracted_transcript = _process_transcript(transcript) - new_path = new_wav_dir + filename - text_path = new_transcript_dir + filename.replace('.wav', '.txt') - with io.FileIO(text_path, "w") as file: - file.write(extracted_transcript.encode('utf-8')) - current_path = os.path.abspath(path) - shutil.copy(current_path, new_path) - os.remove(current_path) - - -def _retrieve_file_ids_and_transcripts(file_id_path, transcripts_path): - with open(file_id_path, 'r') as f: - file_ids = f.readlines() - with open(transcripts_path, 'r') as t: - transcripts = t.readlines() - return file_ids, transcripts - - -def _process_transcript(transcript): - """ - Removes tags found in AN4. - """ - extracted_transcript = transcript.split('(')[0].strip("").split('<')[0].strip().upper() - return extracted_transcript - - -def download_an4(target_dir: str, - manifest_dir: str, - min_duration: float, - max_duration: float, - val_fraction: float, - sample_rate: int): - root_path = 'an4/' - raw_tar_path = 'an4_raw.bigendian.tar.gz' - if not os.path.exists(raw_tar_path): - wget.download('http://www.speech.cs.cmu.edu/databases/an4/an4_raw.bigendian.tar.gz') - tar = tarfile.open('an4_raw.bigendian.tar.gz') - tar.extractall() - os.makedirs(target_dir, exist_ok=True) - _format_training_data(root_path=root_path, - val_fraction=val_fraction, - sample_rate=sample_rate, - target_dir=target_dir) - _format_test_data(root_path=root_path, - sample_rate=sample_rate, - target_dir=target_dir) - shutil.rmtree(root_path) - os.remove('an4_raw.bigendian.tar.gz') - train_path = target_dir + '/train/' - val_path = target_dir + '/val/' - test_path = target_dir + '/test/' - - print('Creating manifests...') - create_manifest(data_path=train_path, - output_name='an4_train_manifest.csv', - manifest_path=manifest_dir, - min_duration=min_duration, - max_duration=max_duration) - create_manifest(data_path=val_path, - output_name='an4_val_manifest.csv', - manifest_path=manifest_dir, - min_duration=min_duration, - max_duration=max_duration) - create_manifest(data_path=test_path, - output_name='an4_test_manifest.csv', - manifest_path=manifest_dir) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Processes and downloads an4.') - parser = add_data_opts(parser) - parser.add_argument('--target-dir', default='an4_dataset/', help='Path to save dataset') - parser.add_argument('--val-fraction', default=0.1, type=float, - help='Number of files in the training set to use as validation.') - args = parser.parse_args() - download_an4(target_dir=args.target_dir, - manifest_dir=args.manifest_dir, - min_duration=args.min_duration, - max_duration=args.max_duration, - val_fraction=args.val_fraction, - sample_rate=args.sample_rate) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import io +import shutil +import tarfile +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sklearn.model_selection import train_test_split +import wget + +from deepspeech_pytorch.data.data_opts import add_data_opts +from deepspeech_pytorch.data.utils import create_manifest + + +def _format_training_data(root_path, + val_fraction, + sample_rate, + target_dir): + wav_path = root_path + 'wav/' + file_ids_path = root_path + 'etc/an4_train.fileids' + transcripts_path = root_path + 'etc/an4_train.transcription' + root_wav_path = wav_path + 'an4_clstk' + + _convert_audio_to_wav(an4_audio_path=root_wav_path, + sample_rate=sample_rate) + file_ids, transcripts = _retrieve_file_ids_and_transcripts(file_ids_path, transcripts_path) + + split_files = train_test_split(file_ids, transcripts, test_size=val_fraction) + train_file_ids, val_file_ids, train_transcripts, val_transcripts = split_files + + _save_wav_transcripts(data_type='train', + file_ids=train_file_ids, + transcripts=train_transcripts, + wav_dir=wav_path, + target_dir=target_dir) + _save_wav_transcripts(data_type='val', + file_ids=val_file_ids, + transcripts=val_transcripts, + wav_dir=wav_path, + target_dir=target_dir) + + +def _format_test_data(root_path, + sample_rate, + target_dir): + wav_path = root_path + 'wav/' + file_ids_path = root_path + 'etc/an4_test.fileids' + transcripts_path = root_path + 'etc/an4_test.transcription' + root_wav_path = wav_path + 'an4test_clstk' + + _convert_audio_to_wav(an4_audio_path=root_wav_path, + sample_rate=sample_rate) + file_ids, transcripts = _retrieve_file_ids_and_transcripts(file_ids_path, transcripts_path) + + _save_wav_transcripts(data_type='test', + file_ids=file_ids, + transcripts=transcripts, + wav_dir=wav_path, + target_dir=target_dir) + + +def _save_wav_transcripts(data_type, + file_ids, + transcripts, + wav_dir, + target_dir): + data_path = os.path.join(target_dir, data_type + '/an4/') + new_transcript_dir = data_path + '/txt/' + new_wav_dir = data_path + '/wav/' + + os.makedirs(new_transcript_dir) + os.makedirs(new_wav_dir) + + _save_files(file_ids=file_ids, + transcripts=transcripts, + wav_dir=wav_dir, + new_wav_dir=new_wav_dir, + new_transcript_dir=new_transcript_dir) + + +def _convert_audio_to_wav(an4_audio_path, sample_rate): + with os.popen('find %s -type f -name "*.raw"' % an4_audio_path) as pipe: + for line in pipe: + raw_path = line.strip() + new_path = line.replace('.raw', '.wav').strip() + cmd = 'sox -t raw -r %d -b 16 -e signed-integer -B -c 1 \"%s\" \"%s\"' % ( + sample_rate, raw_path, new_path) + os.system(cmd) + + +def _save_files(file_ids, transcripts, wav_dir, new_wav_dir, new_transcript_dir): + for file_id, transcript in zip(file_ids, transcripts): + path = wav_dir + file_id.strip() + '.wav' + filename = path.split('/')[-1] + extracted_transcript = _process_transcript(transcript) + new_path = new_wav_dir + filename + text_path = new_transcript_dir + filename.replace('.wav', '.txt') + with io.FileIO(text_path, "w") as file: + file.write(extracted_transcript.encode('utf-8')) + current_path = os.path.abspath(path) + shutil.copy(current_path, new_path) + os.remove(current_path) + + +def _retrieve_file_ids_and_transcripts(file_id_path, transcripts_path): + with open(file_id_path, 'r') as f: + file_ids = f.readlines() + with open(transcripts_path, 'r') as t: + transcripts = t.readlines() + return file_ids, transcripts + + +def _process_transcript(transcript): + """ + Removes tags found in AN4. + """ + extracted_transcript = transcript.split('(')[0].strip("").split('<')[0].strip().upper() + return extracted_transcript + + +def download_an4(target_dir: str, + manifest_dir: str, + min_duration: float, + max_duration: float, + val_fraction: float, + sample_rate: int): + raw_tar_path = 'an4.tar.gz' + if not os.path.exists(raw_tar_path): + wget.download('https://github.com/SeanNaren/deepspeech.pytorch/releases/download/V3.0/an4.tar.gz') + tar = tarfile.open('an4.tar.gz') + os.makedirs(target_dir, exist_ok=True) + tar.extractall(target_dir) + + train_path = target_dir + '/train/' + val_path = target_dir + '/val/' + test_path = target_dir + '/test/' + + print('Creating manifests...') + create_manifest(data_path=train_path, + output_name='an4_train_manifest.csv', + manifest_path=manifest_dir, + min_duration=min_duration, + max_duration=max_duration) + create_manifest(data_path=val_path, + output_name='an4_val_manifest.csv', + manifest_path=manifest_dir, + min_duration=min_duration, + max_duration=max_duration) + create_manifest(data_path=test_path, + output_name='an4_test_manifest.csv', + manifest_path=manifest_dir) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Processes and downloads an4.') + parser = add_data_opts(parser) + parser.add_argument('--target-dir', default='an4_dataset/', help='Path to save dataset') + parser.add_argument('--val-fraction', default=0.1, type=float, + help='Number of files in the training set to use as validation.') + args = parser.parse_args() + download_an4(target_dir=args.target_dir, + manifest_dir=args.manifest_dir, + min_duration=args.min_duration, + max_duration=args.max_duration, + val_fraction=args.val_fraction, + sample_rate=args.sample_rate) diff --git a/PyTorch/contrib/audio/deepspeech/requirements.txt b/PyTorch/contrib/audio/deepspeech/requirements.txt index d14e6f886f..2501f23d08 100644 --- a/PyTorch/contrib/audio/deepspeech/requirements.txt +++ b/PyTorch/contrib/audio/deepspeech/requirements.txt @@ -1,20 +1,20 @@ -scipy -numpy -soundfile -python-levenshtein -torchelastic -visdom -wget -librosa -numba==0.54.0 -llvmlite==0.37.0 -tqdm -matplotlib -flask -sox -sklearn -soundfile -pytest -hydra-core -google-cloud-storage==1.42.3 -jupyter +scipy +numpy +soundfile +python-levenshtein +torchelastic +visdom +wget +librosa +numba==0.49.1 +llvmlite==0.32.1 +tqdm +matplotlib +flask +sox +sklearn +soundfile +pytest +hydra-core +google-cloud-storage==1.42.3 +jupyter -- Gitee From 43716b2fea14d3fbbfcfe349a90b838e538f82d1 Mon Sep 17 00:00:00 2001 From: zhongwei_h <18350320518@163.com> Date: Wed, 5 Jun 2024 00:38:10 +0000 Subject: [PATCH 2/3] update PyTorch/contrib/audio/deepspeech/README.md. Signed-off-by: zhongwei_h <18350320518@163.com> --- PyTorch/contrib/audio/deepspeech/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/PyTorch/contrib/audio/deepspeech/README.md b/PyTorch/contrib/audio/deepspeech/README.md index 16dea517e3..820a8d260c 100644 --- a/PyTorch/contrib/audio/deepspeech/README.md +++ b/PyTorch/contrib/audio/deepspeech/README.md @@ -43,6 +43,15 @@ DeepSpeech2是一个建立在端到端深度学习之上,将大多数模块替 | PyTorch 1.8 | - | | PyTorch 1.11 | numba < 0.50.0 | +- 当前模型支持的 CANN版本和hdk版本如下表所示。 + + **表 2** CANN与HDK版本要求 + | 软件名称 | 版本要求 | + | :--------: | :----------------------------------------------------------: | + | CANN | >=7.0.0 | + | hdK | >=23.0.0 | + + - 环境准备指导。 请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。 -- Gitee From 69d19ddf439b6a4c1583b86d378263e3cd90c586 Mon Sep 17 00:00:00 2001 From: zhongwei_h <18350320518@163.com> Date: Wed, 5 Jun 2024 01:40:57 +0000 Subject: [PATCH 3/3] =?UTF-8?q?update=20PyTorch/contrib/audio/deepspeech/R?= =?UTF-8?q?EADME.md.=20=E6=9B=B4=E6=96=B0pr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zhongwei_h <18350320518@163.com> --- PyTorch/contrib/audio/deepspeech/data/an4.py | 114 +------------------ 1 file changed, 4 insertions(+), 110 deletions(-) diff --git a/PyTorch/contrib/audio/deepspeech/data/an4.py b/PyTorch/contrib/audio/deepspeech/data/an4.py index a9b568befd..abb1df448a 100644 --- a/PyTorch/contrib/audio/deepspeech/data/an4.py +++ b/PyTorch/contrib/audio/deepspeech/data/an4.py @@ -1,4 +1,4 @@ -# Copyright 2021 Huawei Technologies Co., Ltd +# Copyright 2024 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,112 +28,6 @@ from deepspeech_pytorch.data.data_opts import add_data_opts from deepspeech_pytorch.data.utils import create_manifest -def _format_training_data(root_path, - val_fraction, - sample_rate, - target_dir): - wav_path = root_path + 'wav/' - file_ids_path = root_path + 'etc/an4_train.fileids' - transcripts_path = root_path + 'etc/an4_train.transcription' - root_wav_path = wav_path + 'an4_clstk' - - _convert_audio_to_wav(an4_audio_path=root_wav_path, - sample_rate=sample_rate) - file_ids, transcripts = _retrieve_file_ids_and_transcripts(file_ids_path, transcripts_path) - - split_files = train_test_split(file_ids, transcripts, test_size=val_fraction) - train_file_ids, val_file_ids, train_transcripts, val_transcripts = split_files - - _save_wav_transcripts(data_type='train', - file_ids=train_file_ids, - transcripts=train_transcripts, - wav_dir=wav_path, - target_dir=target_dir) - _save_wav_transcripts(data_type='val', - file_ids=val_file_ids, - transcripts=val_transcripts, - wav_dir=wav_path, - target_dir=target_dir) - - -def _format_test_data(root_path, - sample_rate, - target_dir): - wav_path = root_path + 'wav/' - file_ids_path = root_path + 'etc/an4_test.fileids' - transcripts_path = root_path + 'etc/an4_test.transcription' - root_wav_path = wav_path + 'an4test_clstk' - - _convert_audio_to_wav(an4_audio_path=root_wav_path, - sample_rate=sample_rate) - file_ids, transcripts = _retrieve_file_ids_and_transcripts(file_ids_path, transcripts_path) - - _save_wav_transcripts(data_type='test', - file_ids=file_ids, - transcripts=transcripts, - wav_dir=wav_path, - target_dir=target_dir) - - -def _save_wav_transcripts(data_type, - file_ids, - transcripts, - wav_dir, - target_dir): - data_path = os.path.join(target_dir, data_type + '/an4/') - new_transcript_dir = data_path + '/txt/' - new_wav_dir = data_path + '/wav/' - - os.makedirs(new_transcript_dir) - os.makedirs(new_wav_dir) - - _save_files(file_ids=file_ids, - transcripts=transcripts, - wav_dir=wav_dir, - new_wav_dir=new_wav_dir, - new_transcript_dir=new_transcript_dir) - - -def _convert_audio_to_wav(an4_audio_path, sample_rate): - with os.popen('find %s -type f -name "*.raw"' % an4_audio_path) as pipe: - for line in pipe: - raw_path = line.strip() - new_path = line.replace('.raw', '.wav').strip() - cmd = 'sox -t raw -r %d -b 16 -e signed-integer -B -c 1 \"%s\" \"%s\"' % ( - sample_rate, raw_path, new_path) - os.system(cmd) - - -def _save_files(file_ids, transcripts, wav_dir, new_wav_dir, new_transcript_dir): - for file_id, transcript in zip(file_ids, transcripts): - path = wav_dir + file_id.strip() + '.wav' - filename = path.split('/')[-1] - extracted_transcript = _process_transcript(transcript) - new_path = new_wav_dir + filename - text_path = new_transcript_dir + filename.replace('.wav', '.txt') - with io.FileIO(text_path, "w") as file: - file.write(extracted_transcript.encode('utf-8')) - current_path = os.path.abspath(path) - shutil.copy(current_path, new_path) - os.remove(current_path) - - -def _retrieve_file_ids_and_transcripts(file_id_path, transcripts_path): - with open(file_id_path, 'r') as f: - file_ids = f.readlines() - with open(transcripts_path, 'r') as t: - transcripts = t.readlines() - return file_ids, transcripts - - -def _process_transcript(transcript): - """ - Removes tags found in AN4. - """ - extracted_transcript = transcript.split('(')[0].strip("").split('<')[0].strip().upper() - return extracted_transcript - - def download_an4(target_dir: str, manifest_dir: str, min_duration: float, @@ -147,9 +41,9 @@ def download_an4(target_dir: str, os.makedirs(target_dir, exist_ok=True) tar.extractall(target_dir) - train_path = target_dir + '/train/' - val_path = target_dir + '/val/' - test_path = target_dir + '/test/' + train_path = os.path.join(target_dir, 'train/') + val_path = os.path.join(target_dir, 'val/') + test_path = os.path.join(target_dir, 'test/') print('Creating manifests...') create_manifest(data_path=train_path, -- Gitee