From ed0ac115f47d8fd8932ca22b249f8f4a998d742e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=83=A8=E6=A7=9F?= Date: Sat, 25 May 2024 14:29:42 +0800 Subject: [PATCH 1/2] =?UTF-8?q?[=E8=87=AA=E7=A0=94][Pytorch]pytorch?= =?UTF-8?q?=E7=89=88=E6=9C=AC=E6=95=B4=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/README.md | 2 -- PyTorch/contrib/audio/wav2vec2.0/README.md | 2 -- 2 files changed, 4 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/README.md b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/README.md index c6d77a73cb..4b6004da05 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/README.md +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/README.md @@ -39,8 +39,6 @@ BERT-Large模型是一个24层,1024维,24个自注意头(self attention he | Torch_Version | 三方库依赖版本 | | :--------: | :----------------------------------------------------------: | - | PyTorch 1.5 | - | - | PyTorch 1.8 | - | | PyTorch 1.11 | - | | PyTorch 2.1 | - | diff --git a/PyTorch/contrib/audio/wav2vec2.0/README.md b/PyTorch/contrib/audio/wav2vec2.0/README.md index 706b8f8427..31bee83807 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/README.md +++ b/PyTorch/contrib/audio/wav2vec2.0/README.md @@ -36,8 +36,6 @@ Wav2vec2.0是Meta在2020年发表的无监督语音预训练模型。它的核 | Torch_Version | 三方库依赖版本 | | :--------: | :----------------------------------------------------------: | - | PyTorch 1.5 | - | - | PyTorch 1.8 | - | | PyTorch 1.11 | - | - 环境准备指导。 -- Gitee From 50713b63bb5523a83f45b35f1af60fa2cfaa6961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=83=A8=E6=A7=9F?= Date: Tue, 28 May 2024 10:27:46 +0800 Subject: [PATCH 2/2] disable data_shuffle for performance training --- .../examples/wav2vec/config/finetuning/base_100h.yaml | 1 + .../contrib/audio/wav2vec2.0/fairseq/dataclass/configs.py | 6 ++++++ PyTorch/contrib/audio/wav2vec2.0/fairseq_cli/train.py | 2 +- .../contrib/audio/wav2vec2.0/test/train_performance_1p.sh | 1 + .../contrib/audio/wav2vec2.0/test/train_performance_8p.sh | 1 + 5 files changed, 10 insertions(+), 1 deletion(-) diff --git a/PyTorch/contrib/audio/wav2vec2.0/examples/wav2vec/config/finetuning/base_100h.yaml b/PyTorch/contrib/audio/wav2vec2.0/examples/wav2vec/config/finetuning/base_100h.yaml index 37b4bfc0c8..a7ea2db503 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/examples/wav2vec/config/finetuning/base_100h.yaml +++ b/PyTorch/contrib/audio/wav2vec2.0/examples/wav2vec/config/finetuning/base_100h.yaml @@ -31,6 +31,7 @@ dataset: validate_interval: 50 validate_interval_updates: 1000 valid_subset: valid + shuffle: true distributed_training: ddp_backend: legacy_ddp diff --git a/PyTorch/contrib/audio/wav2vec2.0/fairseq/dataclass/configs.py b/PyTorch/contrib/audio/wav2vec2.0/fairseq/dataclass/configs.py index 697a0503e2..fbb376126a 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/fairseq/dataclass/configs.py +++ b/PyTorch/contrib/audio/wav2vec2.0/fairseq/dataclass/configs.py @@ -582,6 +582,12 @@ class DatasetConfig(FairseqDataclass): "help": "if true then increment seed with epoch for getting batch iterators, defautls to False.", }, ) + shuffle: bool = field( + default=True, + metadata={ + "help": "You can disable data_shuffle for performance training.", + }, + ) @dataclass diff --git a/PyTorch/contrib/audio/wav2vec2.0/fairseq_cli/train.py b/PyTorch/contrib/audio/wav2vec2.0/fairseq_cli/train.py index dca1da454a..9fc4355e33 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/fairseq_cli/train.py +++ b/PyTorch/contrib/audio/wav2vec2.0/fairseq_cli/train.py @@ -269,7 +269,7 @@ def train( # Initialize data iterator itr = epoch_itr.next_epoch_itr( fix_batches_to_gpus=cfg.distributed_training.fix_batches_to_gpus, - shuffle=(epoch_itr.next_epoch_idx > cfg.dataset.curriculum), + shuffle=(epoch_itr.next_epoch_idx > cfg.dataset.curriculum) and cfg.dataset.shuffle, ) update_freq = ( cfg.optimization.update_freq[epoch_itr.epoch - 1] diff --git a/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_1p.sh b/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_1p.sh index b5d977b9a1..4402294fcf 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_1p.sh +++ b/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_1p.sh @@ -127,6 +127,7 @@ echo "$data_path" fairseq-hydra-train \ task.data=./data/manifest \ dataset.batch_size=$batch_size \ + dataset.shuffle=false \ hydra.run.dir=$PWD \ distributed_training.distributed_world_size=1 \ optimization.max_update=2000 \ diff --git a/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_8p.sh b/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_8p.sh index e9ef13f8a8..6bb4742a09 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_8p.sh +++ b/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_8p.sh @@ -128,6 +128,7 @@ echo "$data_path" fairseq-hydra-train \ task.data=./data/manifest \ dataset.batch_size=$batch_size \ + dataset.shuffle=false \ hydra.run.dir=$PWD \ distributed_training.distributed_world_size=8 \ optimization.max_update=800 \ -- Gitee