From 950eea37468553670bd3d1faf29e5c0061b9e618 Mon Sep 17 00:00:00 2001 From: ykxia Date: Mon, 21 Nov 2022 19:10:13 +0800 Subject: [PATCH 1/3] =?UTF-8?q?Pix2Pix=5FID1467=5Ffor=5FTensorFlow?= =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Pix2Pix_ID1467_for_TensorFlow/demo_rt.py | 127 ++++++++++++++ .../test/train_RT2_performance_1p.sh | 141 +++++++++++++++ .../test/train_RT2_performance_8p.sh | 166 ++++++++++++++++++ 3 files changed, 434 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/demo_rt.py create mode 100644 TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/test/train_RT2_performance_1p.sh create mode 100644 TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/test/train_RT2_performance_8p.sh diff --git a/TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/demo_rt.py b/TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/demo_rt.py new file mode 100644 index 000000000..8e1a79ae9 --- /dev/null +++ b/TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/demo_rt.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" +Created on Sun Feb 24 22:32:37 2019 + +@author: wmy +""" +#***** NPU modify begin***** +from npu_bridge.npu_init import * +import tensorflow as tf +import tensorflow.python.keras as keras +from tensorflow.python.keras import backend as K +#***** NPU modify end***** + +import scipy +from keras.datasets import mnist +from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Concatenate +from keras.layers import BatchNormalization, Activation, ZeroPadding2D +from keras.layers.advanced_activations import LeakyReLU +from keras.layers.convolutional import UpSampling2D, Conv2D +from keras.models import Sequential, Model +from keras.optimizers import Adam +import matplotlib.pyplot as plt +import numpy as np +import os +import sys +from model import Pix2Pix +from PIL import Image +import argparse +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +def imread(path): + return scipy.misc.imread(path, mode='RGB').astype(np.float) + +def predict_single_image(pix2pix, image_path, save_path, weights_path): + pix2pix.generator.load_weights(weights_path + '/generator_weights.h5') + image_B = imread(image_path) + image_B = scipy.misc.imresize(image_B, (pix2pix.nW, pix2pix.nH)) + images_B = [] + images_B.append(image_B) + images_B = np.array(images_B)/127.5 - 1. + generates_A = pix2pix.generator.predict(images_B) + generate_A = generates_A[0] + generate_A = np.uint8((np.array(generate_A) * 0.5 + 0.5) * 255) + generate_A = Image.fromarray(generate_A) + generated_image = Image.new('RGB', (pix2pix.nW, pix2pix.nH)) + generated_image.paste(generate_A, (0, 0, pix2pix.nW, pix2pix.nH)) + generated_image.save(save_path, quality=95) + pass + +def convert_to_gray_single_image(image_path, save_path, resize_height=256, resize_weidth=256): + img = Image.open(image_path) + img_color = img.resize((resize_weidth, resize_height), Image.ANTIALIAS) + img_gray = img_color.convert('L') + img_gray = img_gray.convert('RGB') + img_gray.save(save_path, quality=95) + +#*****path to save weight**** +flags.DEFINE_integer("epochs", 1200, "train epochs") +flags.DEFINE_string("new_weights_path", "./test/output/new_weights", "new weights file path") +flags.DEFINE_integer("batch_size", 4, "train batchsize") +flags.DEFINE_string("precision_mode", "allow_fp32_to_fp16", "train precision mode") +flags.DEFINE_integer("sample_interval", 10, "sample_interval") +flags.DEFINE_boolean("load_pretrained", False, "load_pretrained") + +#***** NPU modify begin***** +sess_config = tf.ConfigProto() +custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add() +custom_op.name = "NpuOptimizer" +# custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(FLAGS.precision_mode) +custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + +# 动态输入 +# custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") +# custom_op.parameter_map["dynamic_input"].b = True +# custom_op.parameter_map["use_off_line"].b = True + +sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF +#sess = tf.Session(config=sess_config) +#K.set_session(sess) +#***** NPU modify end***** + +gan = Pix2Pix() +print("weights_path:{},epochs:{},batchsize:{}".format(FLAGS.new_weights_path,FLAGS.epochs,FLAGS.batch_size)) +with tf.Session(config=sess_config) as sess: + K.set_session(sess) + gan.train(weights_path=FLAGS.new_weights_path, epochs=FLAGS.epochs, batch_size=FLAGS.batch_size, sample_interval=FLAGS.sample_interval, load_pretrained=FLAGS.load_pretrained) + predict_single_image(gan, './images/test_1.jpg', './images/generate_test_1.jpg', FLAGS.new_weights_path) + +#gan.train(epochs=1200, batch_size=4, sample_interval=10, load_pretrained=True) +#gan.train(weights_path=FLAGS.weights_path, epochs=5, batch_size=3, sample_interval=10, load_pretrained=True) +#print("weights_path:{},epochs:{},batchsize:{}".format(FLAGS.weights_path,FLAGS.epochs,FLAGS.batch_size)) +#gan.train(weights_path=FLAGS.weights_path, epochs=int(FLAGS.epochs), batch_size=int(FLAGS.batch_size), sample_interval=10, load_pretrained=True) + +# predict_single_image(gan, './images/test_1.jpg', './images/generate_test_1.jpg') +# sess.close() diff --git a/TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..a9622835f --- /dev/null +++ b/TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,141 @@ +#!/bin/bash +cur_path=`pwd`/../ + +#设置默认日志级别,不需要修改 +#export ASCEND_GLOBAL_LOG_LEVEL=3 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#基础参数,需要模型审视修改 +#Batch Size +batch_size=4 +#网络名称,同目录名称 +Network="Pix2Pix_ID1467_for_TensorFlow" +#Device数量,单卡默认为1 +export RANK_SIZE=1 +#训练epoch,可选 +train_epochs=5 +#训练step +train_steps= +#学习率 +learning_rate= + +#维测参数,precision_mode需要模型审视修改 +# precision_mode="allow_mix_precision" +#参数配置 +data_path="./datasets" +# ckpt_path="./weights" + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=./datasets --ckpt_path=./weights" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path \" must be config" + exit 1 +fi +##############执行训练########## +cd $cur_path +sed -i "s|./datasets/|${data_path}/|g" prepare.py +sed -i "s|./datasets/|${data_path}/|g" utils.py +#sed -i "s|./weights/|${ckpt_path}/|g" model.py +wait + +if [ -d $cur_path/test/output ];then + rm -rf $cur_path/test/output/* + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +fi +wait + +if [ -d $cur_path/test/output/new_weights ];then + rm -rf $cur_path/test/output/new_weights/* +else + mkdir -p $cur_path/test/output/new_weights +fi +wait + +start=$(date +%s) +echo "data_path=${data_path};epochs=${train_epochs};batch_size=${batch_size}" +nohup python3 demo_rt.py \ + --epochs=${train_epochs} \ + --batch_size=${batch_size} > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +end=$(date +%s) +e2e_time=$(( $end - $start )) + +#echo "Final Performance ms/step : $average_perf" +echo "Final Training Duration sec : $e2e_time" + +#参数回改 +sed -i "s|${data_path}/|./datasets/|g" prepare.py +sed -i "s|${data_path}/|./datasets/|g" utils.py +#sed -i "s|${ckpt_path}/|./weights/|g" model.py + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +# TrainingTime=`grep "time:" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $14}' |cut -d ']' -f -1` +step_sec=(`grep "Epoch*" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk 'END{print $15}' |cut -d 'e' -f 2 |sed 's/: //;s/]//g'`) +wait +# FPS=`awk 'BEGIN{printf "%.2f\n",'1000'*'${batch_size}'/'${TrainingTime}'}'` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${step_sec}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +# train_accuracy=`grep "acc:" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $9}'|cut -d ']' -f 1` +train_accuracy=(`grep "Epoch*" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $9}'|cut -d% -f1 |sed -r 's/.*(.{3})/\1/'`%) +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" + + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +# TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` +TrainingTime=${step_sec} +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +# grep "G loss" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $12}'|cut -d ']' -f 1 >> $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep "Epoch*" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk '{print $12}' |cut -d: -f 4 |sed 's/].*//g' >> $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +# ActualLoss=`awk 'END {print $1}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +gloss_result=(`grep "Epoch*" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk 'END{print $15}' |cut -d: -f 4 |sed 's/].*//g'`) +ActualLoss=${gloss_result} + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/test/train_RT2_performance_8p.sh new file mode 100644 index 000000000..26252c759 --- /dev/null +++ b/TensorFlow/built-in/cv/image_synthesis/Pix2Pix_ID1467_for_TensorFlow/test/train_RT2_performance_8p.sh @@ -0,0 +1,166 @@ +#!/bin/bash +cur_path=`pwd`/../ + +#设置默认日志级别,不需要修改 +#export ASCEND_GLOBAL_LOG_LEVEL_ETP_ETP=3 +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 +export JOB_ID=10086 +#export ASCEND_DEVICE_ID=0 +export RANK_ID=0 +RANK_ID_START=0 +export RANK_SIZE=8 +export RANK_TABLE_FILE="./test/8p.json" + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#mkdir $cur_path/test/output/new_weights +#cp -r /npu/traindata/Pix2Pix_weights/* $cur_path/test/output/new_weights +#基础参数,需要模型审视修改 +#Batch Size +batch_size=4 +#网络名称,同目录名称 +Network="Pix2Pix_ID1467_for_TensorFlow" +#Device数量,单卡默认为1 +RANK_SIZE=8 +#训练epoch,可选 +train_epochs=5 +#训练step +train_steps= +#学习率 +learning_rate= + +#参数配置 +#data_path="/npu/traindata/Pix2Pix_datas" +#ckpt_path="./weights" +#weights_path="/npu/traindata/Pix2Pix_weights" +data_path="./datasets" + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_8p.sh --data_path=./datasets --ckpt_path=./weights" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + fi +done + + +##############执行训练########## +cd $cur_path +sed -i "s|./datasets/|${data_path}/|g" prepare.py +sed -i "s|./datasets/|${data_path}/|g" utils.py +#sed -i "s|./weights/|${ckpt_path}/|g" model.py +#sed -i "90s|1200|$train_epochs|g" demo.py +wait + +#if [ -d $cur_path/test/output ];then +# rm -rf $cur_path/test/output/* +# mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +#else +# mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +#fi +#wait + +start=$(date +%s) +cd $cur_path +for i in 0 1 2 3 4 5 6 7 +do + export RANK_ID=$i + export ASCEND_DEVICE_ID=$i + $ASCEND_DEVICE_ID=$i + echo "Device ID : $ASCEND_DEVICE_ID" + if [ -d $cur_path/test/output/$ASCEND_DEVICE_ID ];then + rm -rf $cur_path/test/output/$ASCEND_DEVICE_ID + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID + else + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID + fi + echo $ASCEND_DEVICE_ID + corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l` + let a=RANK_ID*${corenum}/8 + let b=RANK_ID+1 + let c=b*${corenum}/8-1 + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + #nohup ${bind_core} python3 demo.py --weights_path=${ckpt_path} > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & + nohup ${bind_core} python3 demo_rt.py \ + --new_weights_path=$cur_path/test/output/$ASCEND_DEVICE_ID \ + --epochs=${train_epochs} \ + --batch_size=${batch_size} > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +end=$(date +%s) +e2e_time=$(( $end - $start )) + +#echo "Final Performance ms/step : $average_perf" +echo "Final Training Duration sec : $e2e_time" + +#参数回改 +sed -i "s|${data_path}/|./datasets/|g" prepare.py +sed -i "s|${data_path}/|./datasets/|g" utils.py +sed -i "s|${ckpt_path}/|./weights/|g" model.py +sed -i "90s|$train_epochs|1200|g" demo.py +wait +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#TrainingTime=`grep "time:" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $15}' |cut -d ']' -f -1` +#wait +#FPS=`awk 'BEGIN{printf "%.2f\n",'1000'*'${batch_size}'/'${TrainingTime}'}'` +#打印,不需要修改 +#echo "Final Performance images/sec : $FPS" + +# TrainingTime=`grep "time:" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $14}' |cut -d ']' -f -1` +step_sec=(`grep "Epoch*" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk 'END{print $15}' |cut -d 'e' -f 2 |sed 's/: //;s/]//g'`) +wait +# FPS=`awk 'BEGIN{printf "%.2f\n",'1000'*'${batch_size}'/'${TrainingTime}'}'` +echo=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${RANK_SIZE}'}'` +FPS=`awk 'BEGIN{printf "%.2f\n",'${echo}'/'${step_sec}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep "acc:" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $9}'|cut -d ']' -f 1` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" + + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*8000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "G loss" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $12}'|cut -d ']' -f 1 >> $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print $1}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From 683ac3dd30a1f9d381bc6fefa7b24c2eee5e9c94 Mon Sep 17 00:00:00 2001 From: ykxia Date: Mon, 21 Nov 2022 19:16:04 +0800 Subject: [PATCH 2/3] =?UTF-8?q?StarGAN=5Fv2=5FID1188=5Ffor=5FTensorFlow?= =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_RT2_performance_1p.sh | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 TensorFlow/contrib/cv/StarGAN_v2_ID1188_for_TensorFlow/test/train_RT2_performance_1p.sh diff --git a/TensorFlow/contrib/cv/StarGAN_v2_ID1188_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/contrib/cv/StarGAN_v2_ID1188_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..65849dccb --- /dev/null +++ b/TensorFlow/contrib/cv/StarGAN_v2_ID1188_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,171 @@ +#!/bin/bash +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 + +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +# 数据集路径,保持为空,不需要修改 +data_path="" + + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="StarGAN_v2_ID1188_for_TensorFlow" +# 训练的batch_size +batch_size=4 +# 控制训练时长的参数,视各模型修改---少量epoch +epochs=4 +# case名称 少量epoch-train_performance_1p.sh传入perf,全量-train_full_1p.sh传入acc +# file_name as your file name + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,需要修改 file_name as your file name train_performance_1p or train_full_1p +if [[ $1 == --help || $1 == -h ]];then + echo "usage:./train_performance_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --autotune* ]];then + autotune=`echo ${para#*=}` + mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak + mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak + autotune_dump_path=${cur_path}/output/autotune_dump + mkdir -p ${autotune_dump_path}/GA + mkdir -p ${autotune_dump_path}/rl + cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/ + cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) +cd $cur_path/../ + +# 该网络训练脚本需要的文件夹定义 需要修改 + +#进入训练脚本目录,需要模型审视修改 +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + python3 $cur_path/../main.py \ + --data_path ${data_path}/dataset \ + --dataset afhq-raw \ + --checkpoint_dir ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ + --batch_size ${batch_size} \ + --iteration ${epochs} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +TrainingTime=`grep 'time' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk END'{print $6}'` +FPS=`grep 'fps' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk END'{print $12}'` +#FPS=`awk 'BEGIN{printf "%.2f\n",'${RANK_SIZE}'*'${single_fps}'}'` +#打印,不需要修改 +echo "Final Performance TrainingTime : $TrainingTime" +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +# 如果精度存在 检索出精度 +# train_accuracy=`grep 'log loss' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $8}'` +# 如果精度不存在 输出None +train_accuracy="None" +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +#TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'd_loss' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $8}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From a3d4c38e0a4f1afb3a70e29ec02e130e47745fb0 Mon Sep 17 00:00:00 2001 From: ykxia Date: Mon, 21 Nov 2022 19:21:37 +0800 Subject: [PATCH 3/3] =?UTF-8?q?Pix2pose=5FID1164=5Ffor=5FTensorFlow?= =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_RT2_performance_1p.sh | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 TensorFlow/contrib/cv/Pix2pose_ID1164_for_TensorFlow/test/train_RT2_performance_1p.sh diff --git a/TensorFlow/contrib/cv/Pix2pose_ID1164_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/contrib/cv/Pix2pose_ID1164_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..5369e98f9 --- /dev/null +++ b/TensorFlow/contrib/cv/Pix2pose_ID1164_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,165 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd`/../ + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +# 数据集路径,保持为空,不需要修改 +data_path='' +#预训练模型地址 +ckpt_path='' + +#设置默认日志级别,不需要改 +#export ASCEND_GLOBAL_LOG_LEVEL=3 +#export ASCEND_DEVICE_ID=4 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Pix2pose_ID1164_for_TensorFlow" +#训练epoch +epochs= +#训练batch_size +batch_size=50 + + +#TF2.X独有,需要模型审视修改 +export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + --ckpt_path model + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/test/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/test/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/test/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + fi +done +# #校验是否传入data_path,不需要修改 +# if [[$data_path == ""]];then +# echo "[Error] para \"data_path\" must be confing" +# exit 1 +# fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/ + +#创建DeviceID输出目录,不需要修改 +if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt +else + mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt +fi + +#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +echo ${data_path} +sed -i "s@"./data"@"${data_path}"@g" src/tools_pix2pose/cfg_tless_paper.json +tail -n 20 src/tools_pix2pose/cfg_tless_paper.json +python3 src/tools_pix2pose/3_train_pix2pose.py \ +--obj_id='15' \ +--max_epoch_case=10 \ +--n_batch_per_epoch_case=2000 \ +--back_dir=${data_path}/tless/train2017/ > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 +wait +sed -i "s@"${data_path}"@"./data"@g" src/tools_pix2pose/cfg_tless_paper.json + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +TrainingTime=`grep 'perf-' $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}'` +FPS=`grep 'fps-' $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $9}'` +#打印,不需要修改 +echo "Final Performance TrainingTime : $TrainingTime" +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep "Mean-" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'` + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +#TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${FPS}'/69}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'Disc-' $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $12}' >> $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "Accuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee