From 8626c08af092789c281ada873409493519eef85a Mon Sep 17 00:00:00 2001 From: zhangyi Date: Thu, 2 Jun 2022 17:01:24 +0800 Subject: [PATCH] modify the files --- tutorials/application/source_en/cv/dcgan.md | 79 ++++++++++--------- tutorials/application/source_en/index.rst | 4 + .../application/source_zh_cn/cv/dcgan.ipynb | 2 +- 3 files changed, 46 insertions(+), 39 deletions(-) diff --git a/tutorials/application/source_en/cv/dcgan.md b/tutorials/application/source_en/cv/dcgan.md index 72614e0dac..a193aa47d0 100644 --- a/tutorials/application/source_en/cv/dcgan.md +++ b/tutorials/application/source_en/cv/dcgan.md @@ -1,7 +1,9 @@ -# Generative Adversarial Network +# Generating Cartoon Head Portrait via DCGAN +In the following tutorial, we will use sample code to show how to set up the network, optimizer, calculate the loss function, and initialize the model weight. This [Anime Avatar Face Image Dataset](https://download.mindspore.cn/dataset/Faces/faces.zip) contains 70,171 96 x 96 anime avatar face images. + ## GAN Basic Principle Generative Adversarial Network (GAN) is a deep learning model, and is recently one of the most promising methods for unsupervised learning in complex distribution. @@ -42,7 +44,7 @@ In the preceding figure, the blue dotted line indicates the discriminator, the b 3. The generator generates data that is closer to the actual data distribution through optimization. 4. The data generated by the generator reaches the same distribution as the real data. In this case, the output of the discriminator is 1/2. -## DCGAN +## DCGAN Basic Principle Deep Convolutional Generative Adversarial Network (DCGAN) is a direct extension of GAN. The difference is that DCGAN uses convolution and transposed convolutional layers in the discriminator and generator, respectively. @@ -50,9 +52,9 @@ It was first proposed by Radford et al. in paper [Unsupervised Representation Le This tutorial uses the anime face dataset to train a GAN, which is then used to generate anime avatar face images. -## DCGAN Practice +## Data Preparation and Processing -In the following tutorial, we will use sample code to show how to set up the network, optimizer, calculate the loss function, and initialize the model weight. This [Anime Avatar Face Image Dataset](https://download.mindspore.cn/dataset/Faces/faces.zip) contains 70,171 96 x 96 anime avatar face images. First, download the dataset to the specified directory and decompress it. The sample code is as follows: +First, download the dataset to the specified directory and decompress it. The sample code is as follows: ```python from mindvision import dataset @@ -89,10 +91,9 @@ from mindspore import set_context, GRAPH_MODE set_context(mode=GRAPH_MODE, device_target="GPU") data_root = "./datasets" # Dataset root directory -workers = 4 # Number of data loading threads batch_size = 128 # Batch size -image_size = 64 # Size of the training image. All images are adjusted to this size. -nc = 3 # Number of color channels. The value is 3 for color images. +image_size = 64 # Size of the training image. +nc = 3 # Number of color channels. nz = 100 # Length of the implicit vector ngf = 64 # Size of the feature map in the generator ndf = 64 # Size of the feature map in the discriminator @@ -106,34 +107,33 @@ Define the `create_dataset_imagenet` function to process and augment data. ```python import numpy as np import mindspore.dataset as ds -import mindspore.dataset.vision.c_transforms as vision +import mindspore.dataset.vision.c_transforms as trans from mindspore import nn, ops, Tensor from mindspore import dtype as mstype -def create_dataset_imagenet(dataset_path, num_parallel_workers=None): +def create_dataset_imagenet(dataset_path): """Data loading""" - data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=num_parallel_workers, shuffle=True, + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=4, shuffle=True, decode=True) # Data augmentation transform_img = [ - vision.Resize(image_size), - vision.CenterCrop(image_size), - vision.HWC2CHW(), + trans.Resize(image_size), + trans.CenterCrop(image_size), + trans.HWC2CHW(), lambda x: ((x / 255).astype("float32"), np.random.normal(size=(nz, 1, 1)).astype("float32")) ] # Data mapping - data_set = data_set.map(input_columns="image", num_parallel_workers=num_parallel_workers, operations=transform_img, - output_columns=["image", "latent_code"], column_order=["image", "latent_code"]) + data_set = data_set.map(input_columns="image", num_parallel_workers=4, operations=transform_img, column_order=[\"image\", \"latent_code\"]) # Batch operation data_set = data_set.batch(batch_size) return data_set # Obtain the processed dataset. -data = create_dataset_imagenet(data_root, num_parallel_workers=workers) +data = create_dataset_imagenet(data_root, num_parallel_workers=4) # Obtain the dataset size. size = data.get_dataset_size() @@ -150,19 +150,19 @@ data_iter = next(data.create_dict_iterator(output_numpy=True)) # Visualize some training data. plt.figure(figsize=(10, 3), dpi=140) for i, image in enumerate(data_iter['image'][:30], 1): - plt.subplot(3, 10, i) plt.axis("off") + plt.subplot(3, 10, i) plt.imshow(image.transpose(1, 2, 0)) plt.show() ``` ![png](images/output_81_0.png) -### Setting Up a GAN +## Setting Up a GAN After the data is processed, you can set up a GAN. According to the DCGAN paper, all model weights should be randomly initialized from a normal distribution with `mean` of 0 and `sigma` of 0.02. -#### Generator +### Generator Generator `G` maps the implicit vector `z` to the data space. Because the data is an image, this process also creates an RGB image with the same size as the real image. In practice, this function is implemented by using a series of `Conv2dTranspose` transposed convolutional layers. Each layer is paired with the `BatchNorm2d` layer and `ReLu` activation layer. The output data passes through the `tanh` function and returns a value within the data range of `[–1,1]`. @@ -177,18 +177,18 @@ The generator structure in the code is determined by `nz`, `ngf`, and `nc` set i The code implementation of the generator is as follows: ```python -from mindspore.common.initializer import Normal +from mindspore.common import initializer as init def conv_t(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="pad"): """Define the transposed convolutional layer."" - weight_init = Normal(mean=0, sigma=0.02) + weight_init = init.Normal(mean=0, sigma=0.02) return nn.Conv2dTranspose(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, weight_init=weight_init, has_bias=False, pad_mode=pad_mode) def bn(num_features): """Define the BatchNorm2d layer."" - gamma_init = Normal(mean=1, sigma=0.02) + gamma_init = init.Normal(mean=1, sigma=0.02) return nn.BatchNorm2d(num_features=num_features, gamma_init=gamma_init) class Generator(nn.Cell): @@ -219,7 +219,7 @@ class Generator(nn.Cell): netG = Generator() ``` -#### Discriminator +### Discriminator As described above, discriminator `D` is a binary network model, and outputs the probability that the image is determined as a real image. It is processed through a series of `Conv2d`, `BatchNorm2d`, and `LeakyReLU` layers and obtains the final probability through the Sigmoid activation function. @@ -230,7 +230,7 @@ The code implementation of the discriminator is as follows: ```python def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="pad"): """Define the convolutional layers.""" - weight_init = Normal(mean=0, sigma=0.02) + weight_init = init.Normal(mean=0, sigma=0.02) return nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, weight_init=weight_init, has_bias=False, pad_mode=pad_mode) @@ -262,13 +262,20 @@ class Discriminator(nn.Cell): netD = Discriminator() ``` -### Connecting the Loss Function to the GAN +### Loss and Optimizer MindSpore encapsulates the loss function and optimizer into cells. Due to the particularity of the GAN structure, the loss of the GAN is the multi-output form of the discriminator and generator, which makes the GAN different from a common classification network. Therefore, we need to customize the `WithLossCell` class to connect the loss function to the GAN. +## Loss Function + +When `D` and `G` are defined, the binary cross-entropy loss function [BCELoss](https://www.mindspore.cn/docs/zh-CN/master/api_python/nn/mindspore.nn.BCELoss.html) defined in MindSpore will be used to add the loss function and optimizer to `D` and `G`. + - Connect the generator and loss function. The code is as follows: ```python +# define loss function +loss = nn.BCELoss(reduction='mean') + class WithLossCellG(nn.Cell): """Connect the generator and loss function.""" @@ -313,19 +320,13 @@ class WithLossCellD(nn.Cell): return loss_real + loss_fake ``` -### Loss Function and Optimizer - -After `D` and `G` are defined, use the binary cross entropy loss function [BCELoss](https://www.mindspore.cn/docs/zh-CN/master/api_python/nn/mindspore.nn.BCELoss.html) -to add a loss function and an optimizer to `D` and `G`. +### Optimizer Two separate optimizers are set up here, one for `D` and the other for `G`. Both are Adam optimizers with `lr = 0.0002` and `beta1 = 0.5`. To trace the learning progress of the generator, during the training process, a batch of fixed implicit vectors `fixed_noise` that comply with Gaussian distribution are periodically input to `G`. We can see the images generated by the implicit vector. ```python -# Define the loss function. -criterion = nn.BCELoss(reduction='mean') - # Create a batch of implicit vectors to observe G. np.random.seed(1) fixed_noise = Tensor(np.random.randn(64, nz, 1, 1), dtype=mstype.float32) @@ -335,7 +336,7 @@ optimizerD = nn.Adam(netD.trainable_params(), learning_rate=lr, beta1=beta1) optimizerG = nn.Adam(netG.trainable_params(), learning_rate=lr, beta1=beta1) ``` -### Training +## Training Mode Training is divided into two parts: discriminator training and generator training. @@ -347,7 +348,9 @@ Training is divided into two parts: discriminator training and generator trainin As stated in the DCGAN paper, we want to train the generator by minimizing the value of $log(1 - D(G(z)))$ to produce better fake images. -In the preceding two processes, the training loss is obtained, and statistics are collected at the end of each epoch. A batch of `fixed_noise` is pushed to the generator to intuitively trace the training progress of `G`. The training process is as follows: +In the preceding two processes, the training loss is obtained, and statistics are collected at the end of each epoch. A batch of `fixed_noise` is pushed to the generator to intuitively trace the training progress of `G`. + +The training process is as follows: ```python class DCGAN(nn.Cell): @@ -370,8 +373,8 @@ Instantiate `WithLossCell` and `TrainOneStepCell` of the generator and discrimin ```python # Instantiate `WithLossCell`. -netD_with_criterion = WithLossCellD(netD, netG, criterion) -netG_with_criterion = WithLossCellG(netD, netG, criterion) +netD_with_criterion = WithLossCellD(netD, netG, loss) +netG_with_criterion = WithLossCellG(netD, netG, loss) # Instantiate `TrainOneStepCell`. myTrainOneStepCellForD = nn.TrainOneStepCell(netD_with_criterion, optimizerD) @@ -431,9 +434,9 @@ for epoch in range(num_epochs): [10/10][501/549] Loss_D: 0.4301 Loss_G: 2.1187 [10/10][549/549] Loss_D: 0.6756 Loss_G: 1.2940 -``` +``` -### Result +## Result Run the following code to describe the relationship between the `D` and `G` loss and the training iteration: diff --git a/tutorials/application/source_en/index.rst b/tutorials/application/source_en/index.rst index 26ebc1d53a..046c89173f 100644 --- a/tutorials/application/source_en/index.rst +++ b/tutorials/application/source_en/index.rst @@ -11,6 +11,10 @@ Application :maxdepth: 1 :caption: CV + cv/resnet50 + cv/transfer_learning + cv/fgsm + cv/dcgan .. toctree:: :glob: diff --git a/tutorials/application/source_zh_cn/cv/dcgan.ipynb b/tutorials/application/source_zh_cn/cv/dcgan.ipynb index ddc5288b59..0a946ed3df 100644 --- a/tutorials/application/source_zh_cn/cv/dcgan.ipynb +++ b/tutorials/application/source_zh_cn/cv/dcgan.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# DCGAN生成漫画女友\n", + "# DCGAN生成漫画头像\n", "\n", "[![下载Notebook](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/website-images/master/resource/_static/logo_notebook.png)](https://obs.dualstack.cn-north-4.myhuaweicloud.com/mindspore-website/notebook/master/tutorials/application/zh_cn/cv/mindspore_dcgan.ipynb) [![下载样例代码](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/website-images/master/resource/_static/logo_download_code.png)](https://obs.dualstack.cn-north-4.myhuaweicloud.com/mindspore-website/notebook/master/tutorials/application/zh_cn/cv/mindspore_dcgan.py) [![查看源文件](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/website-images/master/resource/_static/logo_source.png)](https://gitee.com/mindspore/docs/blob/master/tutorials/application/source_zh_cn/cv/dcgan.ipynb)\n", "\n", -- Gitee