From 2865d6593b70e51ea8923a2d63fbbf5d6c5d8aa8 Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:46:40 +0000 Subject: [PATCH 01/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20Te?= =?UTF-8?q?nsorFlow/contrib/cv/MEAN-TEACHER=5FID0789=5Ffor=5FTensorFlow/tr?= =?UTF-8?q?ain=5Fsvhn.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../train_svhn.py | 65 ------------------- 1 file changed, 65 deletions(-) delete mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_svhn.py diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_svhn.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_svhn.py deleted file mode 100644 index b10d7a5e1..000000000 --- a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_svhn.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2018, Curious AI Ltd. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Train ConvNet Mean Teacher on SVHN training set and evaluate against a validation set - -This runner converges quickly to a fairly good accuracy. -On the other hand, the runner experiments/svhn_final_eval.py -contains the hyperparameters used in the paper, and converges -much more slowly but possibly to a slightly better accuracy. -""" -from npu_bridge.npu_init import * - -import logging -from datetime import datetime - -from experiments.run_context import RunContext -from datasets import SVHN -from mean_teacher.model import Model -from mean_teacher import minibatching - - -logging.basicConfig(level=logging.INFO) -LOG = logging.getLogger('main') - - -def run(data_seed=0): - n_labeled = 500 - n_extra_unlabeled = 0 - - model = Model(RunContext(__file__, 0)) - model['rampdown_length'] = 0 - model['rampup_length'] = 5000 - model['training_length'] = 40000 - model['max_consistency_cost'] = 50.0 - - tensorboard_dir = model.save_tensorboard_graph() - LOG.info("Saved tensorboard graph to %r", tensorboard_dir) - - svhn = SVHN(data_seed, n_labeled, n_extra_unlabeled) - training_batches = minibatching.training_batches(svhn.training, n_labeled_per_batch=50) - evaluation_batches_fn = minibatching.evaluation_epoch_generator(svhn.evaluation) - - model.train(training_batches, evaluation_batches_fn) - - -if __name__ == "__main__": - run() - -- Gitee From e8c30a31b33ffb8290eedb5b89ece6a4fdc9ff16 Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:46:48 +0000 Subject: [PATCH 02/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20Te?= =?UTF-8?q?nsorFlow/contrib/cv/MEAN-TEACHER=5FID0789=5Ffor=5FTensorFlow/ci?= =?UTF-8?q?far10=5Ffinal=5Feval.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cifar10_final_eval.py | 112 ------------------ 1 file changed, 112 deletions(-) delete mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/cifar10_final_eval.py diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/cifar10_final_eval.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/cifar10_final_eval.py deleted file mode 100644 index 7b602d715..000000000 --- a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/cifar10_final_eval.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2018, Curious AI Ltd. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""CIFAR-10 final evaluation""" -from npu_bridge.npu_init import * - -import logging -import sys - -from experiments.run_context import RunContext -import tensorflow as tf - -from datasets import Cifar10ZCA -from mean_teacher.model import Model -from mean_teacher import minibatching - - -LOG = logging.getLogger('main') - - -def parameters(): - test_phase = True - for n_labeled in [1000, 2000, 4000, 'all']: - for model_type in ['mean_teacher', 'pi']: - if n_labeled == 'all': - n_runs = 4 - else: - n_runs = 10 - for data_seed in range(2000, 2000 + n_runs): - yield { - 'test_phase': test_phase, - 'model_type': model_type, - 'n_labeled': n_labeled, - 'data_seed': data_seed - } - - -def model_hyperparameters(model_type, n_labeled): - assert model_type in ['mean_teacher', 'pi'] - if n_labeled == 'all': - return { - 'n_labeled_per_batch': 100, - 'max_consistency_cost': 100.0, - 'apply_consistency_to_labeled': True, - 'ema_consistency': model_type == 'mean_teacher' - } - elif isinstance(n_labeled, int): - return { - 'n_labeled_per_batch': 'vary', - 'max_consistency_cost': 100.0 * n_labeled / 50000, - 'apply_consistency_to_labeled': True, - 'ema_consistency': model_type == 'mean_teacher' - } - else: - msg = "Unexpected combination: {model_type}, {n_labeled}" - assert False, msg.format(locals()) - - -def run(test_phase, n_labeled, data_seed, model_type): - minibatch_size = 100 - hyperparams = model_hyperparameters(model_type, n_labeled) - - tf.reset_default_graph() - model = Model(RunContext(__file__, data_seed)) - - cifar = Cifar10ZCA(n_labeled=n_labeled, - data_seed=data_seed, - test_phase=test_phase) - - model['flip_horizontally'] = True - model['ema_consistency'] = hyperparams['ema_consistency'] - model['max_consistency_cost'] = hyperparams['max_consistency_cost'] - model['apply_consistency_to_labeled'] = hyperparams['apply_consistency_to_labeled'] - model['adam_beta_2_during_rampup'] = 0.999 - model['ema_decay_during_rampup'] = 0.999 - model['normalize_input'] = False # Keep ZCA information - model['rampdown_length'] = 25000 - model['training_length'] = 150000 - - training_batches = minibatching.training_batches(cifar.training, - minibatch_size, - hyperparams['n_labeled_per_batch']) - evaluation_batches_fn = minibatching.evaluation_epoch_generator(cifar.evaluation, - minibatch_size) - - tensorboard_dir = model.save_tensorboard_graph() - LOG.info("Saved tensorboard graph to %r", tensorboard_dir) - - model.train(training_batches, evaluation_batches_fn) - - -if __name__ == "__main__": - for run_params in parameters(): - run(**run_params) - -- Gitee From 87d9fd87e71ef58ca0c41ba8a1d8046ac7ca6272 Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:46:53 +0000 Subject: [PATCH 03/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20Te?= =?UTF-8?q?nsorFlow/contrib/cv/MEAN-TEACHER=5FID0789=5Ffor=5FTensorFlow/RE?= =?UTF-8?q?ADME.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../README.md | 45 ------------------- 1 file changed, 45 deletions(-) delete mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/README.md diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/README.md b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/README.md deleted file mode 100644 index 949033f71..000000000 --- a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/README.md +++ /dev/null @@ -1,45 +0,0 @@ -# 概述 - mean-teacher是一种用于图像分类的半监督学习方法,能够在拥有少量有标签数据的情况下训练出分类准确率很高的网络模型。 - -- 论文链接: [Weight-averaged consistency targets improve semi-supervised deep learning results](https://arxiv.org/abs/1703.01780) - -- 官方代码仓: [链接](https://github.com/CuriousAI/mean-teacher/) - -- 精度性能比较: - -| | 论文 | GPU | Ascend | -| ------ | ------ | ------ | ------ | -| error | 12.31% | 13.50% | 14.20% | -| 性能(s/steps) | | | | -# 环境 - - python 3.7.5 - - Tensorflow 1.15 - - Ascend910 - -# 训练 -## 数据集 - 使用./prepare_data.sh脚本预处理数据集 -##训练超参见train.py参数列表 -## 单卡训练命令 -```commandline -sh ./test/train_full_1p.sh -``` - -# 功能测试 -少量step(单epoch)运行 -```commandline -sh ./test/train_performance_1p.sh -``` - -# 模型固化 - -# 部分脚本和示例代码 -```text -├── README.md //说明文档 -├── requirements.txt //依赖 -├──test //训练脚本目录 -│ ├──train_performance_1p.sh -│ ├──train_full_1p.sh -├──train_cifar10.py //训练脚本 -``` - -- Gitee From 59b32ef59838030e46cbaf27632a70addcb424dd Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:46:59 +0000 Subject: [PATCH 04/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20Te?= =?UTF-8?q?nsorFlow/contrib/cv/MEAN-TEACHER=5FID0789=5Ffor=5FTensorFlow/pr?= =?UTF-8?q?epare=5Fdata.sh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../prepare_data.sh | 46 ------------------- 1 file changed, 46 deletions(-) delete mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/prepare_data.sh diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/prepare_data.sh b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/prepare_data.sh deleted file mode 100644 index 96d256e9f..000000000 --- a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/prepare_data.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (c) 2018, Curious AI Ltd. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -echo "Downloading SVHN" -mkdir -p data/images/svhn -( - cd data/images/svhn - curl -O 'http://ufldl.stanford.edu/housenumbers/{train,test,extra}_32x32.mat' -) - -echo -echo "Downloading CIFAR-10" -mkdir -p data/images/cifar/cifar10 -( - cd data/images/cifar/cifar10 - curl -O 'https://www.cs.toronto.edu/~kriz/cifar-10-matlab.tar.gz' - tar xvzf cifar-10-matlab.tar.gz - mv cifar-10-batches-mat/* . - rmdir cifar-10-batches-mat -) - -echo -echo "Preprocessing CIFAR-10" -python datasets/preprocess_cifar10.py - -echo -echo "All done!" -- Gitee From f4b9f1a1338040931df4a64a2c81f2cad5f2acd7 Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:47:06 +0000 Subject: [PATCH 05/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20Te?= =?UTF-8?q?nsorFlow/contrib/cv/MEAN-TEACHER=5FID0789=5Ffor=5FTensorFlow/tr?= =?UTF-8?q?ain=5Fcifar10.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../train_cifar10.py | 68 ------------------- 1 file changed, 68 deletions(-) delete mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_cifar10.py diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_cifar10.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_cifar10.py deleted file mode 100644 index f81d12444..000000000 --- a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_cifar10.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2018, Curious AI Ltd. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Train ConvNet Mean Teacher on CIFAR-10 training set and evaluate against a validation set - -This runner converges quickly to a fairly good accuracy. -On the other hand, the runner experiments/cifar10_final_eval.py -contains the hyperparameters used in the paper, and converges -much more slowly but possibly to a slightly better accuracy. -""" -from npu_bridge.npu_init import * -import argparse -import logging - -from experiments.run_context import RunContext -from datasets import Cifar10ZCA -from mean_teacher.model import Model -from mean_teacher import minibatching - -logging.basicConfig(level=logging.INFO) -LOG = logging.getLogger('main') -parser = argparse.ArgumentParser() -parser.add_argument('--data_path',type=str, default='data',help='The path of dataset') -parser.add_argument('--n_labeled',type=int, default=4000,help='The num of labeled images') -parser.add_argument('--training_length',type=int, default=40000,help='The steps o training') - -#running function -def run(data_seed, args): - n_labeled = args.n_labeled - data_path = args.data_path - model = Model(RunContext(__file__, 0)) - model['flip_horizontally'] = True - model['normalize_input'] = False # Keep ZCA information - model['rampdown_length'] = 0 - model['rampup_length'] = 5000 - model['training_length'] = args.training_length - model['max_consistency_cost'] = 50.0 - - tensorboard_dir = model.save_tensorboard_graph() - LOG.info("Saved tensorboard graph to %r", tensorboard_dir) - - cifar = Cifar10ZCA(data_seed, n_labeled, data_path) - training_batches = minibatching.training_batches(cifar.training, n_labeled_per_batch=50) - evaluation_batches_fn = minibatching.evaluation_epoch_generator(cifar.evaluation) - - model.train(training_batches, evaluation_batches_fn) - -if __name__ == "__main__": - args = parser.parse_args() - run(0,args) - -- Gitee From f01867e691f8f1e819efe81091fc6264201d7516 Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:47:14 +0000 Subject: [PATCH 06/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20Te?= =?UTF-8?q?nsorFlow/contrib/cv/MEAN-TEACHER=5FID0789=5Ffor=5FTensorFlow/?= =?UTF-8?q?=5F=5Finit=5F=5F.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../MEAN-TEACHER_ID0789_for_TensorFlow/__init__.py | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/__init__.py diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/__init__.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/__init__.py deleted file mode 100644 index 89552b1d3..000000000 --- a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file -- Gitee From 4fbf8ce6d1206f2d4a49420f159196fca85d6244 Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:47:19 +0000 Subject: [PATCH 07/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20Te?= =?UTF-8?q?nsorFlow/contrib/cv/MEAN-TEACHER=5FID0789=5Ffor=5FTensorFlow/mo?= =?UTF-8?q?delzoo=5Flevel.txt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../MEAN-TEACHER_ID0789_for_TensorFlow/modelzoo_level.txt | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelzoo_level.txt diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelzoo_level.txt deleted file mode 100644 index 3aaa91c1d..000000000 --- a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelzoo_level.txt +++ /dev/null @@ -1,6 +0,0 @@ -GPUStatus:OK -NPUMigrationStatus:POK -FuncStatus:OK -PrecisionStatus:POK -AutoTune:NOK -PerfStatus:POK \ No newline at end of file -- Gitee From 86e53ff5e7084f591638793d9d9823a9748d415e Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:47:55 +0000 Subject: [PATCH 08/14] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../README.md | 54 +++++++++ .../__init__.py | 13 ++ .../cifar10_final_eval.py | 112 ++++++++++++++++++ .../freeze_graph.py | 71 +++++++++++ .../modelarts_entry_acc.py | 63 ++++++++++ .../modelarts_entry_perf.py | 63 ++++++++++ .../modelzoo_level.txt | 3 + .../prepare_data.sh | 46 +++++++ .../train_cifar10.py | 70 +++++++++++ .../train_svhn.py | 65 ++++++++++ 10 files changed, 560 insertions(+) create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/README.md create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/__init__.py create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/cifar10_final_eval.py create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/freeze_graph.py create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelarts_entry_acc.py create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelarts_entry_perf.py create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelzoo_level.txt create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/prepare_data.sh create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_cifar10.py create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_svhn.py diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/README.md b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/README.md new file mode 100644 index 000000000..b4d1b16bb --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/README.md @@ -0,0 +1,54 @@ +# 概述 + mean-teacher是一种用于图像分类的半监督学习方法,能够在拥有少量有标签数据的情况下训练出分类准确率很高的网络模型。 + +- 论文链接: [Weight-averaged consistency targets improve semi-supervised deep learning results](https://arxiv.org/abs/1703.01780) + +- 官方代码仓: [链接](https://github.com/CuriousAI/mean-teacher/) + +- 精度性能比较: + +| | 论文 | GPU | Ascend | +| ------ | ------ | -- | ------ | +| error | 12.3% | 13.5% | 14.6% | +| 性能(s/steps) | | 1.17 | 0.30 | +# 环境 + - python 3.7.5 + - Tensorflow 1.15 + - Ascend910 + +# 训练 +## 数据集 + 使用./prepare_data.sh脚本预处理数据集 +## 训练超参见train_cifar10.py参数列表 +## 单卡训练命令 +首先在脚本test/train_full_1p.sh中,配置train_steps、data_path等参数,请用户根据实际路径配置data_path,或者在启动训练的命令行中以参数形式下发 + +-启动训练 +```commandline +bash train_full_1p.sh --data_path=../data +``` + +# 功能测试 +少量step运行 +```commandline +bash ./test/train_performance_1p.sh +``` + +# 模型固化 +准备checkpoint,默认为 ./ckpt/checkpoint-40000 +- 执行脚本,结果将保存在 +```commandline +python3 freeze_graph.py +``` +# 部分脚本和示例代码 +```text +├── README.md //说明文档 +├── requirements.txt //依赖 +├──test //训练脚本目录 +│ ├──train_performance_1p.sh +│ ├──train_full_1p.sh +├──train_cifar10.py //训练脚本 +|——freeze_graph.py //固化脚本 +``` +# 输出 +模型存储路径为test/output/ASCEND_DEVICE_ID,包括训练的log以及checkpoints文件。loss信息在文件test/output/{ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log中。 模型固化输出为pb_model/milking_cowmask.pb diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/__init__.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/__init__.py new file mode 100644 index 000000000..a5f8598aa --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/cifar10_final_eval.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/cifar10_final_eval.py new file mode 100644 index 000000000..306539822 --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/cifar10_final_eval.py @@ -0,0 +1,112 @@ +# Copyright (c) 2018, Curious AI Ltd. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""CIFAR-10 final evaluation""" +from npu_bridge.npu_init import * + +import logging +import sys + +from experiments.run_context import RunContext +import tensorflow as tf + +from datasets import Cifar10ZCA +from mean_teacher.model import Model +from mean_teacher import minibatching + + +LOG = logging.getLogger('main') + + +def parameters(): + test_phase = True + for n_labeled in [1000, 2000, 4000, 'all']: + for model_type in ['mean_teacher', 'pi']: + if n_labeled == 'all': + n_runs = 4 + else: + n_runs = 10 + for data_seed in range(2000, 2000 + n_runs): + yield { + 'test_phase': test_phase, + 'model_type': model_type, + 'n_labeled': n_labeled, + 'data_seed': data_seed + } + + +def model_hyperparameters(model_type, n_labeled): + assert model_type in ['mean_teacher', 'pi'] + if n_labeled == 'all': + return { + 'n_labeled_per_batch': 100, + 'max_consistency_cost': 100.0, + 'apply_consistency_to_labeled': True, + 'ema_consistency': model_type == 'mean_teacher' + } + elif isinstance(n_labeled, int): + return { + 'n_labeled_per_batch': 'vary', + 'max_consistency_cost': 100.0 * n_labeled / 50000, + 'apply_consistency_to_labeled': True, + 'ema_consistency': model_type == 'mean_teacher' + } + else: + msg = "Unexpected combination: {model_type}, {n_labeled}" + assert False, msg.format(locals()) + + +def run(test_phase, n_labeled, data_seed, model_type): + minibatch_size = 100 + hyperparams = model_hyperparameters(model_type, n_labeled) + + tf.reset_default_graph() + model = Model(RunContext(__file__, data_seed, './output')) + + cifar = Cifar10ZCA(n_labeled=n_labeled, + data_seed=data_seed, + test_phase=test_phase) + + model['flip_horizontally'] = True + model['ema_consistency'] = hyperparams['ema_consistency'] + model['max_consistency_cost'] = hyperparams['max_consistency_cost'] + model['apply_consistency_to_labeled'] = hyperparams['apply_consistency_to_labeled'] + model['adam_beta_2_during_rampup'] = 0.999 + model['ema_decay_during_rampup'] = 0.999 + model['normalize_input'] = False # Keep ZCA information + model['rampdown_length'] = 25000 + model['training_length'] = 150000 + + training_batches = minibatching.training_batches(cifar.training, + minibatch_size, + hyperparams['n_labeled_per_batch']) + evaluation_batches_fn = minibatching.evaluation_epoch_generator(cifar.evaluation, + minibatch_size) + + tensorboard_dir = model.save_tensorboard_graph() + LOG.info("Saved tensorboard graph to %r", tensorboard_dir) + + model.train(training_batches, evaluation_batches_fn) + + +if __name__ == "__main__": + for run_params in parameters(): + run(**run_params) + diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/freeze_graph.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/freeze_graph.py new file mode 100644 index 000000000..7fc00ebc2 --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/freeze_graph.py @@ -0,0 +1,71 @@ +# coding=utf-8 +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from tensorflow.python.tools import freeze_graph +import argparse +import logging +import tensorflow as tf +from experiments.run_context import RunContext +from mean_teacher.model import Model + +logging.basicConfig(level=logging.INFO) +LOG = logging.getLogger('main') +parser = argparse.ArgumentParser() +parser.add_argument('--ckpt_path',type=str, default='./ckpt/checkpoint-40000',help='The path of checkpoint') + + +#running function +def run(args): + ckpt_path = args.ckpt_path + model = Model(RunContext(__file__, 0, './output')) + + LOG.info("Saved tensorboard graph to ./pb_model") + + + logits = model.class_logits_ema + output = tf.argmax(logits, -1, output_type=tf.int32, name="output") #output node will be used to inference + with tf.Session() as sess: + tf.train.write_graph(sess.graph_def, './pb_model', 'output_empty.pb') # save pb file with output node + freeze_graph.freeze_graph( + input_graph='./pb_model/output_empty.pb', # the pb file with output node + input_saver='', + input_binary=False, + input_checkpoint=ckpt_path, # input checkpoint file path + output_node_names='output', # the name of output node in pb file + restore_op_name='save/restore_all', + filename_tensor_name='save/Const:0', + output_graph='./pb_model/mean-teacher.pb', # path of output graph + clear_devices=False, + initializer_nodes='') + logging.info('done') +if __name__ == "__main__": + args = parser.parse_args() + run(args) + + diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelarts_entry_acc.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelarts_entry_acc.py new file mode 100644 index 000000000..13077b10e --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelarts_entry_acc.py @@ -0,0 +1,63 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import sys + +# 解析输入参数data_url +parser = argparse.ArgumentParser() +parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0") +parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/") +config = parser.parse_args() + +print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0])) +code_dir = sys.path[0] +os.chdir(code_dir) +print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd())) + +print("[CANN-Modelzoo] before train - list my run files:") +os.system("ls -al /usr/local/Ascend/ascend-toolkit/") + +print("[CANN-Modelzoo] before train - list my dataset files:") +os.system("ls -al %s" % config.data_url) + +print("[CANN-Modelzoo] start run train shell") +# 设置sh文件格式为linux可执行 +os.system("dos2unix ./test/*") + +# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定 +# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS +os.system("bash ./test/train_full_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url)) + +print("[CANN-Modelzoo] finish run train shell") + +# 将当前执行目录所有文件拷贝到obs的output进行备份 +print("[CANN-Modelzoo] after train - list my output files:") +os.system("cp -r %s %s " % (code_dir, config.train_url)) +os.system("ls -al %s" % config.train_url) diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelarts_entry_perf.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelarts_entry_perf.py new file mode 100644 index 000000000..14384e227 --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelarts_entry_perf.py @@ -0,0 +1,63 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import sys + +# 解析输入参数data_url +parser = argparse.ArgumentParser() +parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0") +parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/") +config = parser.parse_args() + +print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0])) +code_dir = sys.path[0] +os.chdir(code_dir) +print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd())) + +print("[CANN-Modelzoo] before train - list my run files:") +os.system("ls -al /usr/local/Ascend/ascend-toolkit/") + +print("[CANN-Modelzoo] before train - list my dataset files:") +os.system("ls -al %s" % config.data_url) + +print("[CANN-Modelzoo] start run train shell") +# 设置sh文件格式为linux可执行 +os.system("dos2unix ./test/*") + +# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定 +# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS +os.system("bash ./test/train_performance_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url)) + +print("[CANN-Modelzoo] finish run train shell") + +# 将当前执行目录所有文件拷贝到obs的output进行备份 +print("[CANN-Modelzoo] after train - list my output files:") +os.system("cp -r %s %s " % (code_dir, config.train_url)) +os.system("ls -al %s" % config.train_url) diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelzoo_level.txt new file mode 100644 index 000000000..7eeb8d729 --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/modelzoo_level.txt @@ -0,0 +1,3 @@ +FuncStatus:OK +PerfStatus:OK +PrecisionStatus:OK diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/prepare_data.sh b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/prepare_data.sh new file mode 100644 index 000000000..c2d9c1ade --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/prepare_data.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018, Curious AI Ltd. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +echo "Downloading SVHN" +mkdir -p data/images/svhn +( + cd data/images/svhn + curl -O 'http://ufldl.stanford.edu/housenumbers/{train,test,extra}_32x32.mat' +) + +echo +echo "Downloading CIFAR-10" +mkdir -p data/images/cifar/cifar10 +( + cd data/images/cifar/cifar10 + curl -O 'https://www.cs.toronto.edu/~kriz/cifar-10-matlab.tar.gz' + tar xvzf cifar-10-matlab.tar.gz + mv cifar-10-batches-mat/* . + rmdir cifar-10-batches-mat +) + +echo +echo "Preprocessing CIFAR-10" +python3 datasets/preprocess_cifar10.py + +echo +echo "All done!" diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_cifar10.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_cifar10.py new file mode 100644 index 000000000..58bea02f6 --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_cifar10.py @@ -0,0 +1,70 @@ +# Copyright (c) 2018, Curious AI Ltd. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Train ConvNet Mean Teacher on CIFAR-10 training set and evaluate against a validation set + +This runner converges quickly to a fairly good accuracy. +On the other hand, the runner experiments/cifar10_final_eval.py +contains the hyperparameters used in the paper, and converges +much more slowly but possibly to a slightly better accuracy. +""" +from npu_bridge.npu_init import * +import argparse +import logging + +from experiments.run_context import RunContext +from datasets import Cifar10ZCA +from mean_teacher.model import Model +from mean_teacher import minibatching + +logging.basicConfig(level=logging.INFO) +LOG = logging.getLogger('main') +parser = argparse.ArgumentParser() +parser.add_argument('--data_path',type=str, default='data',help='The path of dataset') +parser.add_argument('--n_labeled',type=int, default=4000,help='The num of labeled images') +parser.add_argument('--training_length',type=int, default=40000,help='The steps o training') +parser.add_argument('--output_path',type=str, default='output',help='The path of output') + +#running function +def run(data_seed, args): + n_labeled = args.n_labeled + data_path = args.data_path + output_path = args.output_path + model = Model(RunContext(__file__, 0, output_path)) + model['flip_horizontally'] = True + model['normalize_input'] = False # Keep ZCA information + model['rampdown_length'] = 0 + model['rampup_length'] = 5000 + model['training_length'] = args.training_length + model['max_consistency_cost'] = 50.0 + + tensorboard_dir = model.save_tensorboard_graph() + LOG.info("Saved tensorboard graph to %r", tensorboard_dir) + + cifar = Cifar10ZCA(data_seed, n_labeled, data_path) + training_batches = minibatching.training_batches(cifar.training, n_labeled_per_batch=50) + evaluation_batches_fn = minibatching.evaluation_epoch_generator(cifar.evaluation) + + model.train(training_batches, evaluation_batches_fn) + +if __name__ == "__main__": + args = parser.parse_args() + run(0,args) + diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_svhn.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_svhn.py new file mode 100644 index 000000000..812315e32 --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/train_svhn.py @@ -0,0 +1,65 @@ +# Copyright (c) 2018, Curious AI Ltd. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Train ConvNet Mean Teacher on SVHN training set and evaluate against a validation set + +This runner converges quickly to a fairly good accuracy. +On the other hand, the runner experiments/svhn_final_eval.py +contains the hyperparameters used in the paper, and converges +much more slowly but possibly to a slightly better accuracy. +""" +from npu_bridge.npu_init import * + +import logging +from datetime import datetime + +from experiments.run_context import RunContext +from datasets import SVHN +from mean_teacher.model import Model +from mean_teacher import minibatching + + +logging.basicConfig(level=logging.INFO) +LOG = logging.getLogger('main') + + +def run(data_seed=0): + n_labeled = 500 + n_extra_unlabeled = 0 + + model = Model(RunContext(__file__, 0, './output')) + model['rampdown_length'] = 0 + model['rampup_length'] = 5000 + model['training_length'] = 40000 + model['max_consistency_cost'] = 50.0 + + tensorboard_dir = model.save_tensorboard_graph() + LOG.info("Saved tensorboard graph to %r", tensorboard_dir) + + svhn = SVHN(data_seed, n_labeled, n_extra_unlabeled) + training_batches = minibatching.training_batches(svhn.training, n_labeled_per_batch=50) + evaluation_batches_fn = minibatching.evaluation_epoch_generator(svhn.evaluation) + + model.train(training_batches, evaluation_batches_fn) + + +if __name__ == "__main__": + run() + -- Gitee From e8fd19916ae1403437b1fae7cb3f93829d0d4873 Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:48:29 +0000 Subject: [PATCH 09/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20Te?= =?UTF-8?q?nsorFlow/contrib/cv/MEAN-TEACHER=5FID0789=5Ffor=5FTensorFlow/te?= =?UTF-8?q?st/train=5Fperformance=5F1p.sh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_performance_1p.sh | 161 ------------------ 1 file changed, 161 deletions(-) delete mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_performance_1p.sh diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_performance_1p.sh deleted file mode 100644 index 6fdadae8b..000000000 --- a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_performance_1p.sh +++ /dev/null @@ -1,161 +0,0 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd`/../ - -#集合通信参数,不需要修改 - -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path='' -ckpt_path='' - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 -#export ASCEND_DEVICE_ID=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="MEAN-TEACHER_ID0789_for_TensorFlow" -#训练epoch -train_epochs= -#训练batch_size -batch_size=256 -#训练step -train_steps=100 -#学习率 -learning_rate= - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/test/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/test/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/test/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - - elif [[ $para == --ckpt_path* ]];then - ckpt_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 - -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt - fi - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - - python3 train_cifar10.py \ - --data_path=${data_path} \ - --training_length=${train_steps} > ${cur_path}test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 - - -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -TrainingTime=`grep "Perf: " $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print $11}'` - - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${TrainingTime}'}'` - -#获取模型精度,该网络为错误率 -train_accuracy=`grep "train/error" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print $5}'|sed 's/,//g'|sed 's/%//g'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep 'train/class_cost' $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $7}'|sed 's/,//g'|sed 's/%//g' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file -- Gitee From 3616ee6888a3ba9ccee44f23b4f77753a6071c33 Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:48:48 +0000 Subject: [PATCH 10/14] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_performance_1p.sh | 187 ++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_performance_1p.sh diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_performance_1p.sh new file mode 100644 index 000000000..fbbb4a269 --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_performance_1p.sh @@ -0,0 +1,187 @@ +#!/bin/bash + +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} +if [ x"${modelarts_flag}" != x ]; +then + echo "running with modelarts..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +train_epochs=1 +train_steps=100 +batch_size=100 + +if [ x"${modelarts_flag}" != x ]; +then + python3.7 ./train_cifar10.py --data_path=${data_path} --output_path=${output_path} --training_length=${train_steps} +else + python3.7 ./train_cifar10.py --data_path=${data_path} --output_path=${output_path} --training_length=${train_steps} 1>${print_log} 2>&1 +fi + +# 性能相关数据计算 +StepTime=`grep "Perf: " ${print_log} |awk 'END {print $11}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算 +train_accuracy=`grep "Evaluate" ${print_log} |awk 'END {print $5}'|sed 's/,//g'|sed 's/%//g'` + +# 提取所有loss打印信息 +grep "train/class_cost/1:" ${print_log} | awk '{print $7}'|sed 's/,//g'|sed 's/%//g' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : $train_accuracy" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file -- Gitee From 4cd8094238ca2b5e1c00d3b8de39a1a606c99bce Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:51:03 +0000 Subject: [PATCH 11/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20Te?= =?UTF-8?q?nsorFlow/contrib/cv/MEAN-TEACHER=5FID0789=5Ffor=5FTensorFlow/te?= =?UTF-8?q?st/train=5Ffull=5F1p.sh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_full_1p.sh | 160 ------------------ 1 file changed, 160 deletions(-) delete mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_full_1p.sh diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_full_1p.sh deleted file mode 100644 index 59315a880..000000000 --- a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_full_1p.sh +++ /dev/null @@ -1,160 +0,0 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd`/../ - -#集合通信参数,不需要修改 - -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path='' -ckpt_path='' - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 -#export ASCEND_DEVICE_ID=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="MEAN-TEACHER_ID0789_for_TensorFlow" -#训练epoch -train_epochs= -#训练batch_size -batch_size=256 -#训练step -train_steps= -#学习率 -learning_rate= - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/test/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/test/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/test/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - - elif [[ $para == --ckpt_path* ]];then - ckpt_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 - -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt - fi - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - - python3 train_cifar10.py \ - --data_path=${data_path} > ${cur_path}test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 - - -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -TrainingTime=`grep "Perf: " $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print $11}'` - - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${TrainingTime}'}'` - -#获取模型精度,该网络为错误率 -train_accuracy=`grep "train/error" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print $5}'|sed 's/,//g'|sed 's/%//g'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep 'train/class_cost' $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $7}'|sed 's/,//g'|sed 's/%//g' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file -- Gitee From fc6f84083be5278162a3090a25a5f9fc47f15c79 Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:51:18 +0000 Subject: [PATCH 12/14] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_full_1p.sh | 183 ++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_full_1p.sh diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_full_1p.sh new file mode 100644 index 000000000..f77819ab1 --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/test/train_full_1p.sh @@ -0,0 +1,183 @@ +#!/bin/bash + +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} +if [ x"${modelarts_flag}" != x ]; +then + echo "running without etp..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +batch_size=100 +train_steps=40000 +if [ x"${modelarts_flag}" != x ]; +then + python3.7 ./train_cifar10.py --data_path=${data_path} --output_path=${output_path} --training_length=${train_steps} +else + python3.7 ./train_cifar10.py --data_path=${data_path} --output_path=${output_path} --training_length=${train_steps} 1>${print_log} 2>&1 +fi + +# 性能相关数据计算 +StepTime=`grep "Perf: " ${print_log} |awk 'END {print $11}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算 +train_accuracy=`grep "Evaluate" ${print_log} |awk 'END {print $5}'|sed 's/,//g'|sed 's/%//g'` +# 提取所有loss打印信息 +grep "train/class_cost/1:" ${print_log} | awk '{print $7}'|sed 's/,//g'|sed 's/%//g' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : $train_accuracy" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file -- Gitee From a7f9a1579128b708c184510ac622dd9310f9086f Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:51:34 +0000 Subject: [PATCH 13/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20Te?= =?UTF-8?q?nsorFlow/contrib/cv/MEAN-TEACHER=5FID0789=5Ffor=5FTensorFlow/ex?= =?UTF-8?q?periments/run=5Fcontext.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../experiments/run_context.py | 84 ------------------- 1 file changed, 84 deletions(-) delete mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/experiments/run_context.py diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/experiments/run_context.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/experiments/run_context.py deleted file mode 100644 index 5cbadd25f..000000000 --- a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/experiments/run_context.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) 2018, Curious AI Ltd. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from npu_bridge.npu_init import * -from datetime import datetime -from collections import defaultdict -import threading -import time -import logging -import os - -from pandas import DataFrame -from collections import defaultdict - - -class TrainLog: - """Saves training logs in Pandas msgpacks""" - - INCREMENTAL_UPDATE_TIME = 300 - - def __init__(self, directory, name): - self.log_file_path = "{}/{}.msgpack".format(directory, name) - self._log = defaultdict(dict) - self._log_lock = threading.RLock() - self._last_update_time = time.time() - self.INCREMENTAL_UPDATE_TIME - - def record_single(self, step, column, value): - self._record(step, {column: value}) - - def record(self, step, col_val_dict): - self._record(step, col_val_dict) - - #def save(self): - #df = self._as_dataframe() - #df.to_msgpack(self.log_file_path, compress='zlib') - - def _record(self, step, col_val_dict): - with self._log_lock: - self._log[step].update(col_val_dict) - if time.time() - self._last_update_time >= self.INCREMENTAL_UPDATE_TIME: - self._last_update_time = time.time() - #self.save() - - def _as_dataframe(self): - with self._log_lock: - return DataFrame.from_dict(self._log, orient='index') - - -class RunContext: - """Creates directories and files for the run""" - - def __init__(self, runner_file, run_idx): - logging.basicConfig(level=logging.INFO, format='%(message)s') - runner_name = os.path.basename(runner_file).split(".")[0] - self.result_dir = "{root}/{runner_name}/{date:%Y-%m-%d_%H:%M:%S}/{run_idx}".format( - root='results', - runner_name=runner_name, - date=datetime.now(), - run_idx=run_idx - ) - self.transient_dir = self.result_dir + "/transient" - os.makedirs(self.result_dir) - os.makedirs(self.transient_dir) - - def create_train_log(self, name): - return TrainLog(self.result_dir, name) - -- Gitee From 1a66134ffb6f1a386a4df50f760e19c080f9adcd Mon Sep 17 00:00:00 2001 From: lizhenyun Date: Fri, 25 Mar 2022 07:51:53 +0000 Subject: [PATCH 14/14] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../experiments/run_context.py | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/experiments/run_context.py diff --git a/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/experiments/run_context.py b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/experiments/run_context.py new file mode 100644 index 000000000..f85760560 --- /dev/null +++ b/TensorFlow/contrib/cv/MEAN-TEACHER_ID0789_for_TensorFlow/experiments/run_context.py @@ -0,0 +1,86 @@ +# Copyright (c) 2018, Curious AI Ltd. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +from datetime import datetime +from collections import defaultdict +import threading +import time +import logging +import os + +#from pandas import DataFrame +from collections import defaultdict + + +class TrainLog: + """Saves training logs in Pandas msgpacks""" + + INCREMENTAL_UPDATE_TIME = 300 + + def __init__(self, directory, name): + self.log_file_path = "{}/{}.msgpack".format(directory, name) + self._log = defaultdict(dict) + self._log_lock = threading.RLock() + self._last_update_time = time.time() - self.INCREMENTAL_UPDATE_TIME + + def record_single(self, step, column, value): + self._record(step, {column: value}) + + def record(self, step, col_val_dict): + self._record(step, col_val_dict) + + #def save(self): + #df = self._as_dataframe() + #df.to_msgpack(self.log_file_path, compress='zlib') + + def _record(self, step, col_val_dict): + with self._log_lock: + self._log[step].update(col_val_dict) + if time.time() - self._last_update_time >= self.INCREMENTAL_UPDATE_TIME: + self._last_update_time = time.time() + #self.save() + + # def _as_dataframe(self): + # with self._log_lock: + # return DataFrame.from_dict(self._log, orient='index') + + +class RunContext: + """Creates directories and files for the run""" + + def __init__(self, runner_file, run_idx, result_pah): + logging.basicConfig(level=logging.INFO, format='%(message)s') + # runner_name = os.path.basename(runner_file).split(".")[0] + # self.result_dir = "{root}/{runner_name}/{date:%Y-%m-%d_%H:%M:%S}/{run_idx}".format( + # root='results', + # runner_name=runner_name, + # date=datetime.now(), + # run_idx=run_idx + # ) + self.result_dir = result_pah + self.transient_dir = self.result_dir + "/transient" + if not os.path.exists(self.result_dir): + os.makedirs(self.result_dir) + os.makedirs(self.transient_dir) + + def create_train_log(self, name): + return TrainLog(self.result_dir, name) + -- Gitee