From 47cb16f5b193d7ea81bf3f69d8cd7fb496353609 Mon Sep 17 00:00:00 2001 From: jiangzhiwen Date: Wed, 22 Jul 2020 10:15:12 +0800 Subject: [PATCH] optimize GetDatasize --- .../data_loading_enhance/data_loading_enhancement.ipynb | 2 +- .../mindinsight_model_lineage_and_data_lineage.ipynb | 4 ++-- tutorials/notebook/nlp_application.ipynb | 4 ++-- .../source_en/advanced_use/computer_vision_application.md | 2 +- tutorials/source_en/advanced_use/differential_privacy.md | 3 +-- tutorials/source_en/advanced_use/distributed_training.md | 2 +- tutorials/source_en/advanced_use/nlp_application.md | 2 +- tutorials/source_en/quick_start/quick_start.md | 2 +- .../use/data_preparation/data_processing_and_augmentation.md | 2 +- .../source_zh_cn/advanced_use/computer_vision_application.md | 2 +- tutorials/source_zh_cn/advanced_use/differential_privacy.md | 3 +-- tutorials/source_zh_cn/advanced_use/distributed_training.md | 2 +- tutorials/source_zh_cn/advanced_use/nlp_application.md | 2 +- tutorials/source_zh_cn/quick_start/quick_start.md | 2 +- .../use/data_preparation/data_processing_and_augmentation.md | 2 +- .../distributed_training/resnet50_distributed_training.py | 2 +- tutorials/tutorial_code/lenet.py | 2 +- tutorials/tutorial_code/resnet/cifar_resnet50.py | 4 ++-- tutorials/tutorial_code/sample_for_cloud/resnet50_train.py | 2 +- 19 files changed, 22 insertions(+), 24 deletions(-) diff --git a/tutorials/notebook/data_loading_enhance/data_loading_enhancement.ipynb b/tutorials/notebook/data_loading_enhance/data_loading_enhancement.ipynb index 963496357f..fb56262084 100644 --- a/tutorials/notebook/data_loading_enhance/data_loading_enhancement.ipynb +++ b/tutorials/notebook/data_loading_enhance/data_loading_enhancement.ipynb @@ -119,7 +119,7 @@ "source": [ "- ### repeat\n", "\n", - "在有限的数据集内,为了优化网络,通常会将一个数据集训练多次。加倍数据集,通常用在多个`epoch`训练中,通过`repeat`来加倍数据量。\n", + "在有限的数据集内,为了优化网络,通常会将一个数据集训练多次。加倍数据集,通过`repeat`来加倍数据量。\n", "\n", "我们可以定义`ds2`数据集,调用`repeat`来加倍数据量。其中,将倍数设为2,故`ds3`数据量为原始数据集`ds2`的2倍。" ] diff --git a/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb b/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb index b1d3a2a1d1..29100b841a 100644 --- a/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb +++ b/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb @@ -425,7 +425,7 @@ " mnist_path = \"./MNIST_Data\"\n", " \n", " net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')\n", - " repeat_size = epoch_size\n", + " repeat_size = 1\n", " # create the network\n", " network = LeNet5()\n", "\n", @@ -702,4 +702,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/tutorials/notebook/nlp_application.ipynb b/tutorials/notebook/nlp_application.ipynb index d09dd09ff4..0d9c01f007 100644 --- a/tutorials/notebook/nlp_application.ipynb +++ b/tutorials/notebook/nlp_application.ipynb @@ -567,7 +567,7 @@ "\n", " return data_set\n", "\n", - "ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs)" + "ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size)" ] }, { @@ -5143,4 +5143,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/tutorials/source_en/advanced_use/computer_vision_application.md b/tutorials/source_en/advanced_use/computer_vision_application.md index 4da8db42e7..0aa811a7aa 100644 --- a/tutorials/source_en/advanced_use/computer_vision_application.md +++ b/tutorials/source_en/advanced_use/computer_vision_application.md @@ -203,7 +203,7 @@ The trained model file (such as `resnet.ckpt`) can be used to predict the class ```python param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) -eval_dataset = create_dataset(1, training=False) +eval_dataset = create_dataset(training=False) res = model.eval(eval_dataset) print("result: ", res) ``` diff --git a/tutorials/source_en/advanced_use/differential_privacy.md b/tutorials/source_en/advanced_use/differential_privacy.md index c85f4c6b3c..836b52b16c 100644 --- a/tutorials/source_en/advanced_use/differential_privacy.md +++ b/tutorials/source_en/advanced_use/differential_privacy.md @@ -234,8 +234,7 @@ ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", # get training dataset ds_train = generate_mnist_dataset(os.path.join(cfg.data_path, "train"), - cfg.batch_size, - cfg.epoch_size) + cfg.batch_size) ``` ### Introducing the Differential Privacy diff --git a/tutorials/source_en/advanced_use/distributed_training.md b/tutorials/source_en/advanced_use/distributed_training.md index 119c82e952..d98bdf3e31 100644 --- a/tutorials/source_en/advanced_use/distributed_training.md +++ b/tutorials/source_en/advanced_use/distributed_training.md @@ -247,7 +247,7 @@ context.set_context(device_id=device_id) # set device_id def test_train_cifar(epoch_size=10): context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, mirror_mean=True) loss_cb = LossMonitor() - dataset = create_dataset(data_path, epoch_size) + dataset = create_dataset(data_path) batch_size = 32 num_classes = 10 net = resnet50(batch_size, num_classes) diff --git a/tutorials/source_en/advanced_use/nlp_application.md b/tutorials/source_en/advanced_use/nlp_application.md index 02cdea4c2c..bc531f4963 100644 --- a/tutorials/source_en/advanced_use/nlp_application.md +++ b/tutorials/source_en/advanced_use/nlp_application.md @@ -204,7 +204,7 @@ Load the corresponding dataset, configure the CheckPoint generation information, model = Model(network, loss, opt, {'acc': Accuracy()}) print("============== Starting Training ==============") -ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs) +ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size) config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck) diff --git a/tutorials/source_en/quick_start/quick_start.md b/tutorials/source_en/quick_start/quick_start.md index 500c9614e3..7599bd4b92 100644 --- a/tutorials/source_en/quick_start/quick_start.md +++ b/tutorials/source_en/quick_start/quick_start.md @@ -355,7 +355,7 @@ if __name__ == "__main__": epoch_size = 1 mnist_path = "./MNIST_Data" - repeat_size = epoch_size + repeat_size = 1 model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) train_net(args, model, epoch_size, mnist_path, repeat_size, ckpoint_cb, dataset_sink_mode) ... diff --git a/tutorials/source_en/use/data_preparation/data_processing_and_augmentation.md b/tutorials/source_en/use/data_preparation/data_processing_and_augmentation.md index c87e486379..b3fe180a5b 100644 --- a/tutorials/source_en/use/data_preparation/data_processing_and_augmentation.md +++ b/tutorials/source_en/use/data_preparation/data_processing_and_augmentation.md @@ -88,7 +88,7 @@ In limited datasets, to optimize the network, a dataset is usually trained for m > In machine learning, an epoch refers to one cycle through the full training dataset. -During multiple epochs, `repeat` can be used to increase the data size. The definition of `repeat` is as follows: +During training, `repeat` can be used to increase the data size. The definition of `repeat` is as follows: ```python def repeat(self, count=None): ``` diff --git a/tutorials/source_zh_cn/advanced_use/computer_vision_application.md b/tutorials/source_zh_cn/advanced_use/computer_vision_application.md index 41a1e2613b..d8b85a56fe 100644 --- a/tutorials/source_zh_cn/advanced_use/computer_vision_application.md +++ b/tutorials/source_zh_cn/advanced_use/computer_vision_application.md @@ -205,7 +205,7 @@ model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb]) ```python param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) -eval_dataset = create_dataset(1, training=False) +eval_dataset = create_dataset(training=False) res = model.eval(eval_dataset) print("result: ", res) ``` diff --git a/tutorials/source_zh_cn/advanced_use/differential_privacy.md b/tutorials/source_zh_cn/advanced_use/differential_privacy.md index 74998d9928..2ffd694462 100644 --- a/tutorials/source_zh_cn/advanced_use/differential_privacy.md +++ b/tutorials/source_zh_cn/advanced_use/differential_privacy.md @@ -220,8 +220,7 @@ ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", # get training dataset ds_train = generate_mnist_dataset(os.path.join(cfg.data_path, "train"), - cfg.batch_size, - cfg.epoch_size) + cfg.batch_size) ``` ### 引入差分隐私 diff --git a/tutorials/source_zh_cn/advanced_use/distributed_training.md b/tutorials/source_zh_cn/advanced_use/distributed_training.md index a0636cc666..423c979c4c 100644 --- a/tutorials/source_zh_cn/advanced_use/distributed_training.md +++ b/tutorials/source_zh_cn/advanced_use/distributed_training.md @@ -248,7 +248,7 @@ context.set_context(device_id=device_id) # set device_id def test_train_cifar(epoch_size=10): context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, mirror_mean=True) loss_cb = LossMonitor() - dataset = create_dataset(data_path, epoch_size) + dataset = create_dataset(data_path) batch_size = 32 num_classes = 10 net = resnet50(batch_size, num_classes) diff --git a/tutorials/source_zh_cn/advanced_use/nlp_application.md b/tutorials/source_zh_cn/advanced_use/nlp_application.md index ae9f9bb7c1..540f4e1909 100644 --- a/tutorials/source_zh_cn/advanced_use/nlp_application.md +++ b/tutorials/source_zh_cn/advanced_use/nlp_application.md @@ -204,7 +204,7 @@ loss_cb = LossMonitor() model = Model(network, loss, opt, {'acc': Accuracy()}) print("============== Starting Training ==============") -ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs) +ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size) config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck) diff --git a/tutorials/source_zh_cn/quick_start/quick_start.md b/tutorials/source_zh_cn/quick_start/quick_start.md index 19a7c8e7a4..365c41ea81 100644 --- a/tutorials/source_zh_cn/quick_start/quick_start.md +++ b/tutorials/source_zh_cn/quick_start/quick_start.md @@ -357,7 +357,7 @@ if __name__ == "__main__": epoch_size = 1 mnist_path = "./MNIST_Data" - repeat_size = epoch_size + repeat_size = 1 model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) train_net(args, model, epoch_size, mnist_path, repeat_size, ckpoint_cb, dataset_sink_mode) ... diff --git a/tutorials/source_zh_cn/use/data_preparation/data_processing_and_augmentation.md b/tutorials/source_zh_cn/use/data_preparation/data_processing_and_augmentation.md index b350eac747..fad8413cfd 100644 --- a/tutorials/source_zh_cn/use/data_preparation/data_processing_and_augmentation.md +++ b/tutorials/source_zh_cn/use/data_preparation/data_processing_and_augmentation.md @@ -89,7 +89,7 @@ ds1 = ds1.repeat(10) > 在机器学习中,每训练完一个完整的数据集,我们称为训练完了一个epoch。 -加倍数据集,通常用在多个epoch(迭代)训练中,通过`repeat`来加倍数据量。`repeat`定义如下: +加倍数据集,通常用在训练中,通过`repeat`来加倍数据量。`repeat`定义如下: ```python def repeat(self, count=None): ``` diff --git a/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py b/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py index 2d7db4e52a..ec152dc17f 100644 --- a/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py +++ b/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py @@ -120,7 +120,7 @@ def test_train_cifar(epoch_size=10): context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, mirror_mean=True) loss_cb = LossMonitor() data_path = os.getenv('DATA_PATH') - dataset = create_dataset(data_path, epoch_size) + dataset = create_dataset(data_path) batch_size = 32 num_classes = 10 net = resnet50(batch_size, num_classes) diff --git a/tutorials/tutorial_code/lenet.py b/tutorials/tutorial_code/lenet.py index 441f423360..5f5dfffb22 100644 --- a/tutorials/tutorial_code/lenet.py +++ b/tutorials/tutorial_code/lenet.py @@ -206,7 +206,7 @@ if __name__ == "__main__": mnist_path = "./MNIST_Data" # define the loss function net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') - repeat_size = epoch_size + repeat_size = 1 # create the network network = LeNet5() # define the optimizer diff --git a/tutorials/tutorial_code/resnet/cifar_resnet50.py b/tutorials/tutorial_code/resnet/cifar_resnet50.py index c77059041c..94cca8b461 100644 --- a/tutorials/tutorial_code/resnet/cifar_resnet50.py +++ b/tutorials/tutorial_code/resnet/cifar_resnet50.py @@ -118,7 +118,7 @@ if __name__ == '__main__': # as for train, users could use model.train if args_opt.do_train: - dataset = create_dataset(epoch_size) + dataset = create_dataset() batch_num = dataset.get_dataset_size() config_ck = CheckpointConfig(save_checkpoint_steps=batch_num, keep_checkpoint_max=35) ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10", directory="./", config=config_ck) @@ -130,6 +130,6 @@ if __name__ == '__main__': if args_opt.checkpoint_path: param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) - eval_dataset = create_dataset(1, training=False) + eval_dataset = create_dataset(training=False) res = model.eval(eval_dataset) print("result: ", res) diff --git a/tutorials/tutorial_code/sample_for_cloud/resnet50_train.py b/tutorials/tutorial_code/sample_for_cloud/resnet50_train.py index 5d5d8b8506..2ec27f4862 100644 --- a/tutorials/tutorial_code/sample_for_cloud/resnet50_train.py +++ b/tutorials/tutorial_code/sample_for_cloud/resnet50_train.py @@ -130,7 +130,7 @@ def resnet50_train(args_opt): # create dataset print('Create train and evaluate dataset.') train_dataset = create_dataset(dataset_path=local_data_path, do_train=True, - repeat_num=epoch_size, batch_size=batch_size) + repeat_num=1, batch_size=batch_size) eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False, repeat_num=1, batch_size=batch_size) train_step_size = train_dataset.get_dataset_size() -- Gitee