diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/README.md b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/README.md index c6d77a73cbfede5c885e3efde41e569e4defbb45..4b6004da05113836648d14e9ce7c0d4bf166d365 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/README.md +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/README.md @@ -39,8 +39,6 @@ BERT-Large模型是一个24层,1024维,24个自注意头(self attention he | Torch_Version | 三方库依赖版本 | | :--------: | :----------------------------------------------------------: | - | PyTorch 1.5 | - | - | PyTorch 1.8 | - | | PyTorch 1.11 | - | | PyTorch 2.1 | - | diff --git a/PyTorch/contrib/audio/wav2vec2.0/README.md b/PyTorch/contrib/audio/wav2vec2.0/README.md index 706b8f84275af2018ca5d3e27e72bef369af4d5b..31bee838077f5cea9d91815506f55672ea3d8e8b 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/README.md +++ b/PyTorch/contrib/audio/wav2vec2.0/README.md @@ -36,8 +36,6 @@ Wav2vec2.0是Meta在2020年发表的无监督语音预训练模型。它的核 | Torch_Version | 三方库依赖版本 | | :--------: | :----------------------------------------------------------: | - | PyTorch 1.5 | - | - | PyTorch 1.8 | - | | PyTorch 1.11 | - | - 环境准备指导。 diff --git a/PyTorch/contrib/audio/wav2vec2.0/examples/wav2vec/config/finetuning/base_100h.yaml b/PyTorch/contrib/audio/wav2vec2.0/examples/wav2vec/config/finetuning/base_100h.yaml index 37b4bfc0c8c88d76fe754e04f207e052bf2a857a..a7ea2db503887b2866bad8c8a254d5f389d386fc 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/examples/wav2vec/config/finetuning/base_100h.yaml +++ b/PyTorch/contrib/audio/wav2vec2.0/examples/wav2vec/config/finetuning/base_100h.yaml @@ -31,6 +31,7 @@ dataset: validate_interval: 50 validate_interval_updates: 1000 valid_subset: valid + shuffle: true distributed_training: ddp_backend: legacy_ddp diff --git a/PyTorch/contrib/audio/wav2vec2.0/fairseq/dataclass/configs.py b/PyTorch/contrib/audio/wav2vec2.0/fairseq/dataclass/configs.py index 697a0503e28e9af0b0aa3a08ba2d600d36e83a00..fbb376126a00f0b055512a4b88f4032209377c14 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/fairseq/dataclass/configs.py +++ b/PyTorch/contrib/audio/wav2vec2.0/fairseq/dataclass/configs.py @@ -582,6 +582,12 @@ class DatasetConfig(FairseqDataclass): "help": "if true then increment seed with epoch for getting batch iterators, defautls to False.", }, ) + shuffle: bool = field( + default=True, + metadata={ + "help": "You can disable data_shuffle for performance training.", + }, + ) @dataclass diff --git a/PyTorch/contrib/audio/wav2vec2.0/fairseq_cli/train.py b/PyTorch/contrib/audio/wav2vec2.0/fairseq_cli/train.py index dca1da454acbda97a6344eb95341d0c1b5e16f7f..9fc4355e33127875d665f1d219964a06880d3c00 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/fairseq_cli/train.py +++ b/PyTorch/contrib/audio/wav2vec2.0/fairseq_cli/train.py @@ -269,7 +269,7 @@ def train( # Initialize data iterator itr = epoch_itr.next_epoch_itr( fix_batches_to_gpus=cfg.distributed_training.fix_batches_to_gpus, - shuffle=(epoch_itr.next_epoch_idx > cfg.dataset.curriculum), + shuffle=(epoch_itr.next_epoch_idx > cfg.dataset.curriculum) and cfg.dataset.shuffle, ) update_freq = ( cfg.optimization.update_freq[epoch_itr.epoch - 1] diff --git a/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_1p.sh b/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_1p.sh index b5d977b9a18c1c2d2fc47cc5758b8ab8aa91d776..4402294fcf68c68debc98e7a5b5c2e187372c4c3 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_1p.sh +++ b/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_1p.sh @@ -127,6 +127,7 @@ echo "$data_path" fairseq-hydra-train \ task.data=./data/manifest \ dataset.batch_size=$batch_size \ + dataset.shuffle=false \ hydra.run.dir=$PWD \ distributed_training.distributed_world_size=1 \ optimization.max_update=2000 \ diff --git a/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_8p.sh b/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_8p.sh index e9ef13f8a891060ca74eb73d8b8710298c1afd43..6bb4742a09614896efdfea5d66108d7e244983db 100644 --- a/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_8p.sh +++ b/PyTorch/contrib/audio/wav2vec2.0/test/train_performance_8p.sh @@ -128,6 +128,7 @@ echo "$data_path" fairseq-hydra-train \ task.data=./data/manifest \ dataset.batch_size=$batch_size \ + dataset.shuffle=false \ hydra.run.dir=$PWD \ distributed_training.distributed_world_size=8 \ optimization.max_update=800 \