From 2192270ba3f3d96207376e0db7d1ca898e6d064f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 01:12:32 +0000 Subject: [PATCH 01/22] update --- .../Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh index 118737861f..70300b545e 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh @@ -3,7 +3,7 @@ cur_path=`pwd` #集合通信参数,不需要修改 export RANK_SIZE=1 - +export BMMV2_ENABLE=1 # 数据集路径,保持为空,不需要修改 data_path="" -- Gitee From bc9f56f2367d11e07ec72c05de525813320438f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 01:37:21 +0000 Subject: [PATCH 02/22] update --- .../models/transformer.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py index 3995699393..eefbd6489f 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py @@ -29,7 +29,7 @@ from torch import Tensor from typing import Optional, Dict torch.set_printoptions(threshold=500000000, linewidth=1024) from utils import options - +import torch.nn.functional as F from modules import ( MultiheadAttention, SinusoidalPositionalEmbedding ) @@ -184,8 +184,8 @@ class TransformerEncoder(nn.Module): x = self.embed_scale * self.embed_tokens(src_tokens) if self.embed_positions is not None: x += self.embed_positions(src_tokens) - if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + # if self.training: + x = F.dropout(x, self.seed, p=self.dropout) # B:batch size ; T: seq length ; C: embedding dim 512 # B x T x C -> T x B x C @@ -266,8 +266,8 @@ class TransformerDecoder(IncrementalDecoder): x = self.embed_scale * self.embed_tokens(prev_output_tokens) if positions is not None: x += positions - if self.training: - x,_,_ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + # if self.training: + x= F.dropout(x, self.seed, p=self.dropout) # B x T x C -> T x B x C x = x.transpose(0, 1) @@ -319,18 +319,18 @@ class TransformerEncoderLayer(nn.Module): incremental_state=None, need_weights=False, static_kv=False) - if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + # if self.training: + x = F.dropout(x, self.seed, p=self.dropout) x = residual + x x = self.ln1(x) residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) - if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.relu_dropout) + # if self.training: + x = F.dropout(x, self.seed, p=self.relu_dropout) x = self.fc2(x) - if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p =self.dropout) + # if self.training: + x = F.dropout(x, self.seed, p =self.dropout) x = residual + x x = self.ln2(x) return x @@ -383,8 +383,8 @@ class TransformerDecoderLayer(nn.Module): static_kv=False ) - if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + # if self.training: + x = F.dropout(x, self.seed, p=self.dropout) x = residual + x x = self.self_attn_layer_norm(x) @@ -402,19 +402,19 @@ class TransformerDecoderLayer(nn.Module): mask_future_timesteps=False, need_weights=(not self.training and self.need_attn), ) - if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + # if self.training: + x = F.dropout(x, self.seed, p=self.dropout) x = residual + x x = self.encoder_attn_layer_norm(x) residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) - if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.relu_dropout) + # if self.training: + x = F.dropout(x, self.seed, p=self.relu_dropout) x = self.fc2(x) - if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + # if self.training: + x = F.dropout(x, self.seed, p=self.dropout) x = residual + x x = self.layer_norm(x) return x, attn -- Gitee From e574fd1f4374d54ad43b437375cd63503ceb70eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 01:39:37 +0000 Subject: [PATCH 03/22] update --- .../train_1p.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py index b1c4229807..fc7e4352b6 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py @@ -217,15 +217,16 @@ def train(args, trainer, datasets, epoch_itr): trainer.get_throughput_meter().reset() for i, sample in enumerate(itr): - if i>100:pass - if i < num_batches - 1 and (i + 1) % update_freq > 0: - # buffer updates according to --update-freq - loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) - continue - else: - loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) - if loss != None: - losses.update(loss) + if i>20:break + with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): + if i < num_batches - 1 and (i + 1) % update_freq > 0: + # buffer updates according to --update-freq + loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) + continue + else: + loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) + if loss != None: + losses.update(loss) if i >= 10: t = time.time() -- Gitee From fe0312febcf599a6fbdad70a8e659280ed280f02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 01:40:56 +0000 Subject: [PATCH 04/22] update --- .../nlp/Transformer_ID0105_for_PyTorch/models/transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py index eefbd6489f..3edd80f899 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py @@ -320,7 +320,7 @@ class TransformerEncoderLayer(nn.Module): need_weights=False, static_kv=False) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout) + x = F.dropout(x, self.seed, p=self.dropout) x = residual + x x = self.ln1(x) -- Gitee From 69f8322f42f831ace0898c9499d2044ebbac7bca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 01:42:05 +0000 Subject: [PATCH 05/22] update --- .../train_1p.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py index fc7e4352b6..d0e1cb7b2e 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py @@ -217,16 +217,16 @@ def train(args, trainer, datasets, epoch_itr): trainer.get_throughput_meter().reset() for i, sample in enumerate(itr): - if i>20:break - with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): - if i < num_batches - 1 and (i + 1) % update_freq > 0: - # buffer updates according to --update-freq - loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) - continue - else: - loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) - if loss != None: - losses.update(loss) + if i>100:pass + # with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): + if i < num_batches - 1 and (i + 1) % update_freq > 0: + # buffer updates according to --update-freq + loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) + continue + else: + loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) + if loss != None: + losses.update(loss) if i >= 10: t = time.time() -- Gitee From 7601a346e67ac9baddae6b5741b1c679d3c1e522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 01:49:14 +0000 Subject: [PATCH 06/22] update --- .../nlp/Transformer_ID0105_for_PyTorch/models/transformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py index 3edd80f899..0a29e71f2b 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py @@ -185,7 +185,7 @@ class TransformerEncoder(nn.Module): if self.embed_positions is not None: x += self.embed_positions(src_tokens) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout) + x = F.dropout(x, self.seed, p=self.dropout) # B:batch size ; T: seq length ; C: embedding dim 512 # B x T x C -> T x B x C @@ -267,7 +267,7 @@ class TransformerDecoder(IncrementalDecoder): if positions is not None: x += positions # if self.training: - x= F.dropout(x, self.seed, p=self.dropout) + x= F.dropout(x, self.seed, p=self.dropout) # B x T x C -> T x B x C x = x.transpose(0, 1) -- Gitee From 5697d58faf90abb1613b863fdf58197e46c5aec3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 01:50:29 +0000 Subject: [PATCH 07/22] update --- .../train_1p.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py index d0e1cb7b2e..00713d6a95 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py @@ -217,16 +217,16 @@ def train(args, trainer, datasets, epoch_itr): trainer.get_throughput_meter().reset() for i, sample in enumerate(itr): - if i>100:pass - # with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): - if i < num_batches - 1 and (i + 1) % update_freq > 0: - # buffer updates according to --update-freq - loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) - continue - else: - loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) - if loss != None: - losses.update(loss) + if i>10:break + with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): + if i < num_batches - 1 and (i + 1) % update_freq > 0: + # buffer updates according to --update-freq + loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) + continue + else: + loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) + if loss != None: + losses.update(loss) if i >= 10: t = time.time() -- Gitee From 8ca916d4b9dbd34ba1960894af5fc2ebb8bb9052 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 01:58:30 +0000 Subject: [PATCH 08/22] update --- .../models/transformer.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py index 0a29e71f2b..3d080b4725 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py @@ -185,7 +185,7 @@ class TransformerEncoder(nn.Module): if self.embed_positions is not None: x += self.embed_positions(src_tokens) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout) + x = F.dropout(x, self.seed, p=self.dropout,training=self.training) # B:batch size ; T: seq length ; C: embedding dim 512 # B x T x C -> T x B x C @@ -267,7 +267,7 @@ class TransformerDecoder(IncrementalDecoder): if positions is not None: x += positions # if self.training: - x= F.dropout(x, self.seed, p=self.dropout) + x= F.dropout(x, self.seed, p=self.dropout, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) @@ -320,17 +320,17 @@ class TransformerEncoderLayer(nn.Module): need_weights=False, static_kv=False) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout) + x = F.dropout(x, self.seed, p=self.dropout, training=self.training) x = residual + x x = self.ln1(x) residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) # if self.training: - x = F.dropout(x, self.seed, p=self.relu_dropout) + x = F.dropout(x, self.seed, p=self.relu_dropout, training=self.training) x = self.fc2(x) # if self.training: - x = F.dropout(x, self.seed, p =self.dropout) + x = F.dropout(x, self.seed, p =self.dropout, training=self.training) x = residual + x x = self.ln2(x) return x @@ -384,7 +384,7 @@ class TransformerDecoderLayer(nn.Module): ) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout) + x = F.dropout(x, self.seed, p=self.dropout, training=self.training) x = residual + x x = self.self_attn_layer_norm(x) @@ -403,7 +403,7 @@ class TransformerDecoderLayer(nn.Module): need_weights=(not self.training and self.need_attn), ) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout) + x = F.dropout(x, self.seed, p=self.dropout, training=self.training) x = residual + x x = self.encoder_attn_layer_norm(x) @@ -411,10 +411,10 @@ class TransformerDecoderLayer(nn.Module): residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) # if self.training: - x = F.dropout(x, self.seed, p=self.relu_dropout) + x = F.dropout(x, self.seed, p=self.relu_dropout, training=self.training) x = self.fc2(x) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout) + x = F.dropout(x, self.seed, p=self.dropout, training=self.training) x = residual + x x = self.layer_norm(x) return x, attn -- Gitee From 54e395c01867e096e027103ee771cfb7e03bc4ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 01:59:47 +0000 Subject: [PATCH 09/22] update --- .../train_1p.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py index 00713d6a95..d0e1cb7b2e 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py @@ -217,16 +217,16 @@ def train(args, trainer, datasets, epoch_itr): trainer.get_throughput_meter().reset() for i, sample in enumerate(itr): - if i>10:break - with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): - if i < num_batches - 1 and (i + 1) % update_freq > 0: - # buffer updates according to --update-freq - loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) - continue - else: - loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) - if loss != None: - losses.update(loss) + if i>100:pass + # with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): + if i < num_batches - 1 and (i + 1) % update_freq > 0: + # buffer updates according to --update-freq + loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) + continue + else: + loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) + if loss != None: + losses.update(loss) if i >= 10: t = time.time() -- Gitee From 06f3e88cab4fafb02982dc4363cf25776b91e20c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 02:07:13 +0000 Subject: [PATCH 10/22] update --- .../models/transformer.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py index 3d080b4725..f8a7d0a6d9 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py @@ -185,7 +185,7 @@ class TransformerEncoder(nn.Module): if self.embed_positions is not None: x += self.embed_positions(src_tokens) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout,training=self.training) + x = F.dropout(x, p=self.dropout,training=self.training) # B:batch size ; T: seq length ; C: embedding dim 512 # B x T x C -> T x B x C @@ -267,7 +267,7 @@ class TransformerDecoder(IncrementalDecoder): if positions is not None: x += positions # if self.training: - x= F.dropout(x, self.seed, p=self.dropout, training=self.training) + x= F.dropout(x, p=self.dropout, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) @@ -320,17 +320,17 @@ class TransformerEncoderLayer(nn.Module): need_weights=False, static_kv=False) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout, training=self.training) + x = F.dropout(x, p=self.dropout, training=self.training) x = residual + x x = self.ln1(x) residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) # if self.training: - x = F.dropout(x, self.seed, p=self.relu_dropout, training=self.training) + x = F.dropout(x, p=self.relu_dropout, training=self.training) x = self.fc2(x) # if self.training: - x = F.dropout(x, self.seed, p =self.dropout, training=self.training) + x = F.dropout(x, p =self.dropout, training=self.training) x = residual + x x = self.ln2(x) return x @@ -384,7 +384,7 @@ class TransformerDecoderLayer(nn.Module): ) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout, training=self.training) + x = F.dropout(x, p=self.dropout, training=self.training) x = residual + x x = self.self_attn_layer_norm(x) @@ -403,7 +403,7 @@ class TransformerDecoderLayer(nn.Module): need_weights=(not self.training and self.need_attn), ) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout, training=self.training) + x = F.dropout(x, p=self.dropout, training=self.training) x = residual + x x = self.encoder_attn_layer_norm(x) @@ -411,10 +411,10 @@ class TransformerDecoderLayer(nn.Module): residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) # if self.training: - x = F.dropout(x, self.seed, p=self.relu_dropout, training=self.training) + x = F.dropout(x, p=self.relu_dropout, training=self.training) x = self.fc2(x) # if self.training: - x = F.dropout(x, self.seed, p=self.dropout, training=self.training) + x = F.dropout(x, p=self.dropout, training=self.training) x = residual + x x = self.layer_norm(x) return x, attn -- Gitee From 20448fd14e3229ffa51869731a53f47f18c91c59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 02:17:44 +0000 Subject: [PATCH 11/22] update --- .../train_1p.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py index d0e1cb7b2e..00713d6a95 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py @@ -217,16 +217,16 @@ def train(args, trainer, datasets, epoch_itr): trainer.get_throughput_meter().reset() for i, sample in enumerate(itr): - if i>100:pass - # with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): - if i < num_batches - 1 and (i + 1) % update_freq > 0: - # buffer updates according to --update-freq - loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) - continue - else: - loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) - if loss != None: - losses.update(loss) + if i>10:break + with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): + if i < num_batches - 1 and (i + 1) % update_freq > 0: + # buffer updates according to --update-freq + loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) + continue + else: + loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) + if loss != None: + losses.update(loss) if i >= 10: t = time.time() -- Gitee From 7136f330dd4d6da943abe28068862800b40f1edc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 02:31:19 +0000 Subject: [PATCH 12/22] update --- .../train_1p.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py index 00713d6a95..d0e1cb7b2e 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py @@ -217,16 +217,16 @@ def train(args, trainer, datasets, epoch_itr): trainer.get_throughput_meter().reset() for i, sample in enumerate(itr): - if i>10:break - with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): - if i < num_batches - 1 and (i + 1) % update_freq > 0: - # buffer updates according to --update-freq - loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) - continue - else: - loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) - if loss != None: - losses.update(loss) + if i>100:pass + # with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): + if i < num_batches - 1 and (i + 1) % update_freq > 0: + # buffer updates according to --update-freq + loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) + continue + else: + loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) + if loss != None: + losses.update(loss) if i >= 10: t = time.time() -- Gitee From 041a59a586f6ca597c11eb6063189932b3fc83c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 02:59:19 +0000 Subject: [PATCH 13/22] update --- .../train_1p.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py index d0e1cb7b2e..00713d6a95 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py @@ -217,16 +217,16 @@ def train(args, trainer, datasets, epoch_itr): trainer.get_throughput_meter().reset() for i, sample in enumerate(itr): - if i>100:pass - # with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): - if i < num_batches - 1 and (i + 1) % update_freq > 0: - # buffer updates according to --update-freq - loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) - continue - else: - loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) - if loss != None: - losses.update(loss) + if i>10:break + with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): + if i < num_batches - 1 and (i + 1) % update_freq > 0: + # buffer updates according to --update-freq + loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) + continue + else: + loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) + if loss != None: + losses.update(loss) if i >= 10: t = time.time() -- Gitee From f30b7fcf6078c329f05118135b7d50cead7e94c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 03:16:51 +0000 Subject: [PATCH 14/22] update --- .../train_1p.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py index 00713d6a95..d0e1cb7b2e 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py @@ -217,16 +217,16 @@ def train(args, trainer, datasets, epoch_itr): trainer.get_throughput_meter().reset() for i, sample in enumerate(itr): - if i>10:break - with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): - if i < num_batches - 1 and (i + 1) % update_freq > 0: - # buffer updates according to --update-freq - loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) - continue - else: - loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) - if loss != None: - losses.update(loss) + if i>100:pass + # with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): + if i < num_batches - 1 and (i + 1) % update_freq > 0: + # buffer updates according to --update-freq + loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) + continue + else: + loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) + if loss != None: + losses.update(loss) if i >= 10: t = time.time() -- Gitee From 9fc776477f496c187dd28b4c343c3faa161e13d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 03:17:34 +0000 Subject: [PATCH 15/22] update --- .../nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh index 06828617bf..9ccd3f32ff 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh @@ -10,7 +10,7 @@ data_path="" #网络名称,同目录名称,需要模型审视修改 Network="Transformer_ID0105_for_PyTorch" - +export BMMV2_ENABLE=1 #训练epoch train_epochs=30 #训练batch_size,,需要模型审视修改 -- Gitee From c4edb381e676c97960bbd421c9b9192732b50a74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 03:25:15 +0000 Subject: [PATCH 16/22] updat --- .../nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh index 8942bbc29c..b1ef8b10f4 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh @@ -3,7 +3,7 @@ cur_path=`pwd` #集合通信参数,不需要修改 export RANK_SIZE=1 - +export BMMV2_ENABLE=1 # 数据集路径,保持为空,不需要修改 data_path="" -- Gitee From b8d18d39e71eccfaa24809031da5ccb79d76ec6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 08:29:52 +0000 Subject: [PATCH 17/22] updat --- .../Transformer_ID0105_for_PyTorch/models/transformer.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py index f8a7d0a6d9..2671386f66 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/models/transformer.py @@ -184,7 +184,6 @@ class TransformerEncoder(nn.Module): x = self.embed_scale * self.embed_tokens(src_tokens) if self.embed_positions is not None: x += self.embed_positions(src_tokens) - # if self.training: x = F.dropout(x, p=self.dropout,training=self.training) # B:batch size ; T: seq length ; C: embedding dim 512 @@ -266,7 +265,6 @@ class TransformerDecoder(IncrementalDecoder): x = self.embed_scale * self.embed_tokens(prev_output_tokens) if positions is not None: x += positions - # if self.training: x= F.dropout(x, p=self.dropout, training=self.training) # B x T x C -> T x B x C @@ -319,17 +317,14 @@ class TransformerEncoderLayer(nn.Module): incremental_state=None, need_weights=False, static_kv=False) - # if self.training: x = F.dropout(x, p=self.dropout, training=self.training) x = residual + x x = self.ln1(x) residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) - # if self.training: x = F.dropout(x, p=self.relu_dropout, training=self.training) x = self.fc2(x) - # if self.training: x = F.dropout(x, p =self.dropout, training=self.training) x = residual + x x = self.ln2(x) @@ -383,7 +378,6 @@ class TransformerDecoderLayer(nn.Module): static_kv=False ) - # if self.training: x = F.dropout(x, p=self.dropout, training=self.training) x = residual + x x = self.self_attn_layer_norm(x) @@ -402,7 +396,6 @@ class TransformerDecoderLayer(nn.Module): mask_future_timesteps=False, need_weights=(not self.training and self.need_attn), ) - # if self.training: x = F.dropout(x, p=self.dropout, training=self.training) x = residual + x @@ -410,10 +403,8 @@ class TransformerDecoderLayer(nn.Module): residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) - # if self.training: x = F.dropout(x, p=self.relu_dropout, training=self.training) x = self.fc2(x) - # if self.training: x = F.dropout(x, p=self.dropout, training=self.training) x = residual + x x = self.layer_norm(x) -- Gitee From b1cc6b0359883a291fc2bfa67d81bd62dd600f13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 08:31:45 +0000 Subject: [PATCH 18/22] update --- .../dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh index b1ef8b10f4..2b6134790e 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh @@ -3,6 +3,7 @@ cur_path=`pwd` #集合通信参数,不需要修改 export RANK_SIZE=1 + export BMMV2_ENABLE=1 # 数据集路径,保持为空,不需要修改 data_path="" -- Gitee From 2eea6acee416aa45f11ff0a65ee0d48619bfa526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 08:32:25 +0000 Subject: [PATCH 19/22] update --- .../dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh index 9ccd3f32ff..dc8f9b79a7 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh @@ -10,6 +10,7 @@ data_path="" #网络名称,同目录名称,需要模型审视修改 Network="Transformer_ID0105_for_PyTorch" + export BMMV2_ENABLE=1 #训练epoch train_epochs=30 -- Gitee From 6928741d7845f831ed2fd4ba5ed0ec0408ef6a74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 08:32:59 +0000 Subject: [PATCH 20/22] update --- .../Transformer_ID0105_for_PyTorch/test/train_performance_8p.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_8p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_8p.sh index b10fe5ec3b..f4b26836cd 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_8p.sh @@ -7,6 +7,7 @@ export RANK_SIZE=8 export MASTER_ADDR=localhost export MASTER_PORT=29688 export HCCL_WHITELIST_DISABLE=1 +export BMMV2_ENABLE=1 # 数据集路径,保持为空,不需要修改 data_path="" -- Gitee From 5678dd480fb03f94c746f420f4b96fc8aad65070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 08:33:31 +0000 Subject: [PATCH 21/22] updat --- .../Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh index 70300b545e..48e7fb3af7 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh @@ -3,6 +3,7 @@ cur_path=`pwd` #集合通信参数,不需要修改 export RANK_SIZE=1 + export BMMV2_ENABLE=1 # 数据集路径,保持为空,不需要修改 data_path="" -- Gitee From 54ae5bf667b9ca61956e95953f5e847b9e6904e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=89=E5=AE=8F=E6=A2=85?= <591861959@qq.com> Date: Wed, 30 Mar 2022 08:35:05 +0000 Subject: [PATCH 22/22] update --- PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py | 1 - 1 file changed, 1 deletion(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py index d0e1cb7b2e..b1c4229807 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.py @@ -218,7 +218,6 @@ def train(args, trainer, datasets, epoch_itr): for i, sample in enumerate(itr): if i>100:pass - # with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): if i < num_batches - 1 and (i + 1) % update_freq > 0: # buffer updates according to --update-freq loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) -- Gitee