diff --git a/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/main.py b/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/main.py index 7b5057c7d3e1b9de64986c32de7fba53f069175e..cec7c5f1da40fd2f74c491f126597d8d2a14526c 100644 --- a/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/main.py +++ b/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/main.py @@ -236,14 +236,14 @@ def main_worker(gpu, ngpus_per_node, args): model = vgg16() model = model.to(loc) - optimizer = torch.optim.SGD(model.parameters(), args.lr, + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss().to(loc) if args.amp: - model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale_value) + model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale_value,combine_grad=True) #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False) # optionally resume from a checkpoint @@ -293,7 +293,7 @@ def main_worker(gpu, ngpus_per_node, args): train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), - num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) + num_workers=128, pin_memory=True, sampler=train_sampler, drop_last=True) val_loader = torch.utils.data.DataLoader( val_dataset, diff --git a/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/vgg.py b/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/vgg.py index b28b64969f98d5635ce816c237baa2d53fb45d7c..68be33f88a4df285d75f13c1a29583cf0e2062c3 100644 --- a/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/vgg.py +++ b/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/vgg.py @@ -55,13 +55,15 @@ class VGG(nn.Module): x = self.fc1(x) x = self.relu(x) if self.training: - x = x.cpu() - x = self.drop(x).npu() + # x = x.cpu() + # x = self.drop(x).npu() + x = self.drop(x) x = self.fc2(x) x = self.relu(x) if self.training: - x = x.cpu() - x = self.drop(x).npu() + # x = x.cpu() + # x = self.drop(x).npu() + x = self.drop(x) x = self.fc3(x) return x