diff --git a/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/.keep b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/Truemain.py b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/Truemain.py new file mode 100644 index 0000000000000000000000000000000000000000..875317a01c6fbdcd2e71312128ff1610899918ac --- /dev/null +++ b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/Truemain.py @@ -0,0 +1,518 @@ +# +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Imports and Global Variables +# -*- coding: UTF-8 -*- +from npu_bridge.npu_init import * +import os +import sys +import numpy as np +import tensorflow.compat.v1 as tf + +tf.disable_v2_behavior() +import cv2 +import math +import warnings +import argparse +# import datetime 此为格式化输出时间,但为方便之后计算,使用以秒为单位的包如下 +import time +# 加两句 +# import sys +# sys.path.remove('/usr/local/python3.7.5/lib/python3.7/site-packages') +from cell import ConvLSTMCell # added + +USE_CUDA = True + +LSTM_HIDDEN_SIZE = 550 +TIME_STEPS = 1 +K = 100 + + +# Build Model +class DeepVONet(object): + def __init__(self, args, data): + rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [LSTM_HIDDEN_SIZE, LSTM_HIDDEN_SIZE]] + # multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) # changed + multi_convlstm_cell = ConvLSTMCell(shape=[6, 20], filters=12, kernel=[3, 3]) # added + rnn_inputs = [] + reuse = None + for stacked_img in data: + rnn_inputs.append(self.forward(stacked_img, reuse=reuse)) + reuse = True + + sess = tf.Session(config=npu_config_proto()) + # print(sess.run(tf.shape(stacked_img))) + rnn_inputs = tf.transpose(rnn_inputs,perm = [1,0,2,3,4]) + print(sess.run(tf.shape(rnn_inputs))) + + # rnn_inputs = [tf.reshape(rnn_inputs[i], [-1, 20 * 6 * 1024]) for i in range(len(rnn_inputs))] #changed + # assert rnn_inputs[0].shape == (args.bsize, 20 * 6 * 1024) #changed + # rnn_inputs = (32,5,rnn_inputs) + + # self.outputs, self.state = tf.nn.static_rnn(cell=multi_rnn_cell, inputs=rnn_inputs, dtype=tf.float32) # changed + self.outputs, self.state = tf.nn.dynamic_rnn(cell=multi_convlstm_cell, inputs=rnn_inputs, dtype=tf.float32) # added + + # print("1") + # print(sess.run(tf.shape(self.outputs))) + # print("2") + + self.outputs = [tf.reshape(self.outputs[i], [-1, 20 * 6 * 12]) for i in range(32)] # added + self.outputs = tf.reshape(self.outputs, [32, 20 * 6 * 12]) # added (32,1440) + + # self.outputs = tf.transpose(self.outputs, perm=[1,0,2])#added + # print("3") + # print(sess.run(tf.shape(self.outputs))) + # print("4") + # assert self.outputs[0].shape == (args.bsize, LSTM_HIDDEN_SIZE) + + def forward(self, x, reuse=None): + with tf.variable_scope("cnns", reuse=reuse): + x = tf.layers.conv2d( + inputs=x, + filters=64, + kernel_size=[7, 7], + padding="same", + strides=2, + reuse=reuse, + activation=tf.nn.relu, name='conv1') + x = tf.layers.conv2d( + inputs=x, + filters=128, + kernel_size=[5, 5], + padding="same", + strides=2, + reuse=reuse, + activation=tf.nn.relu, name='conv2') + x = tf.layers.conv2d( + inputs=x, + filters=256, + kernel_size=[5, 5], + padding="same", + strides=2, + reuse=reuse, + activation=tf.nn.relu, name='conv3') + x = tf.layers.conv2d( + inputs=x, + filters=256, + kernel_size=[3, 3], + padding="same", + strides=1, + reuse=reuse, + activation=tf.nn.relu, name='conv3_1') + x = tf.layers.conv2d( + inputs=x, + filters=512, + kernel_size=[3, 3], + padding="same", + strides=2, + reuse=reuse, + activation=tf.nn.relu, name='conv4') + x = tf.layers.conv2d( + inputs=x, + filters=512, + kernel_size=[3, 3], + padding="same", + strides=1, + reuse=reuse, + activation=tf.nn.relu, name='conv4_1') + x = tf.layers.conv2d( + inputs=x, + filters=512, + kernel_size=[3, 3], + padding="same", + strides=2, + reuse=reuse, + activation=tf.nn.relu, name='conv5') + x = tf.layers.conv2d( + inputs=x, + filters=512, + kernel_size=[3, 3], + padding="same", + strides=1, + reuse=reuse, + activation=tf.nn.relu, name='conv5_1') + x = tf.layers.conv2d( + inputs=x, + filters=1024, + kernel_size=[3, 3], + padding="same", + reuse=reuse, + strides=2, name='conv6') + + return x + + +# Train Model +def initialize_uninitialized(sess): + global_vars = tf.global_variables() + is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars]) + not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f] + + for i in not_initialized_vars: # only for testing + print(i.name) + + if len(not_initialized_vars): + sess.run(tf.variables_initializer(not_initialized_vars)) + + +def train_model(data_loader, sess, merged, loss_op, train_op, input_data, labels_, i, test_writer, train_writer): + print('Current epoch : %d----' % data_loader.current_epoch, end="") + print('step : %d----' % i, end="") + if i % 10 == 0: # Record summaries and test-set accuracy + batch_x, batch_y = data_loader.get_next_batch() + summary, acc = sess.run( + [merged, loss_op], feed_dict={input_data: batch_x, labels_: batch_y}) + test_writer.add_summary(summary, i) + print('Accuracy : %s----' % acc, end="") + else: # Record train set summaries, and train + batch_x, batch_y = data_loader.get_next_batch() + summary, _ = sess.run( + [merged, train_op], feed_dict={input_data: batch_x, labels_: batch_y}) + train_writer.add_summary(summary, i) + train_loss = sess.run(loss_op, + feed_dict={input_data: batch_x, labels_: batch_y}) + print('Train_error : %s----' % train_loss, end="") + + +def train(args, datapath, outputpath, epoches, trajectory_length): + # configuration + data_loader = VisualOdometryDataLoader(args, datapath, trajectory_length) + if USE_CUDA: + sess = tf.Session(config=npu_config_proto()) + else: + config_proto = tf.ConfigProto(device_count={'GPU': 0}) + sess = tf.Session(config=npu_config_proto(config_proto=config_proto)) + pose_size = 6 + + # only for gray scale dataset, for colored channels will be 6 + height, width, channels = 384, 1280, 6 + + with tf.name_scope('input'):#定义命名空间 + # placeholder for input + input_data = tf.placeholder(tf.float32, [args.bsize, args.time_steps, height, width, channels]) + # placeholder for labels + labels_ = tf.placeholder(tf.float32, [args.bsize, args.time_steps, pose_size]) + + with tf.name_scope('unstacked_input'): + # Unstacking the input into list of time series + data = tf.unstack(input_data, args.time_steps, 1) + # Unstacking the labels into the time series + pose_labels = tf.unstack(labels_, args.time_steps, 1) + + # Building the RCNN Network which + # which returns the time series of output layers + with tf.name_scope('RCNN'): + model = DeepVONet(args, data) + (outputs, _) = (model.outputs, model.state) + ## Output layer to compute the output + with tf.name_scope('weights'): + regression_w = tf.get_variable('regression_w', shape=[1440, pose_size], dtype=tf.float32) + with tf.name_scope('biases'): + regression_b = tf.get_variable("regression_b", shape=[pose_size], dtype=tf.float32) + + # Pose estimate by multiplication with RCNN_output and Output layer + with tf.name_scope('Wx_plus_b'): + sess = tf.Session(config=npu_config_proto()) + print(sess.run(tf.shape(outputs))) + print("--------------------------") + # with tf.Session(): + # print(outputs[3].dtype)#added + # print("2") + + if isinstance([outputs],list): + print("outputs is list") + # pose_estimated = [tf.nn.xw_plus_b([tf.reshape(output_state, [1, 1440])], [regression_w], + # [regression_b]) for output_state in outputs] + pose_estimated = [tf.nn.xw_plus_b(outputs, regression_w, regression_b)] + max_time = len(pose_estimated) + + # Converting the list of tensor into a tensor + # Probably this is the part that is unnecessary and causing problems (slowing down the computations) + + # Loss function for all the frames in a batch + with tf.name_scope('loss_l2_norm'): + position = [pose_es[:, :3] - pose_lab[:, :3] for pose_es, pose_lab in zip(pose_estimated, pose_labels)] + angles = [pose_es[:, 3:6] - pose_lab[:, 3:6] for pose_es, pose_lab in zip(pose_estimated, pose_labels)] + pose_error = (tf.square(position)) + angle_error = (tf.square(angles)) + loss_op = tf.reduce_sum(pose_error + K * angle_error, name='loss') + tf.summary.scalar('loss_l2_norm', loss_op) + + # optimizer + with tf.name_scope('train'): + optimizer = tf.train.AdamOptimizer(learning_rate=args.lr, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + use_locking=False, + name='Adam') + + train_op = optimizer.minimize(loss_op) + # Merge all the summeries and write them out to args.datapath + # by default ./args.datapath + merged = tf.summary.merge_all() + + global_vars = tf.global_variables() + is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars]) + not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f] + + if len(not_initialized_vars): + sess.run(tf.variables_initializer(not_initialized_vars)) + + saver = tf.train.Saver() + initialize_uninitialized(sess) + train_writer = tf.summary.FileWriter(outputpath + 'train', sess.graph) + test_writer = tf.summary.FileWriter(outputpath + 'test') + + i = 0 + while data_loader.current_epoch < epoches: + start = time.time() + train_model(data_loader, sess, merged, loss_op, train_op, input_data, labels_, i, test_writer, train_writer) + i += 1 + end = time.time() + print('sec/step : %s' % (end - start))#单步时间计算 + if data_loader.current_epoch % 40 == 0:#added,每过40个epoch存储一下 + save_path = saver.save(sess, args.outputpath + 'model' + str(data_loader.current_epoch))#added + save_path = saver.save(sess, args.outputpath + 'model') + print("Model saved in file: %s" % save_path) + print("epochs trained: " + str(data_loader.current_epoch)) + train_writer.close() + test_writer.close() + + +# Dataset +def default_image_loader(path): + img = cv2.imread(path) + if img is not None: + # Normalizing and Subtracting mean intensity value of the corresponding image + img = img / np.max(img) + img = img - np.mean(img) + img = cv2.resize(img, (1280, 384), fx=0, fy=0) + return img + + +class VisualOdometryDataLoader(object): + def __init__(self, args, datapath, trajectory_length, loader=default_image_loader): + self.args = args + self._current_initial_frame = 0 + self._current_trajectory_index = 0 + self.current_epoch = 0 + + self.sequences = [0,2,8,9] + + self.base_path = datapath + self.poses = self.load_poses() + self.trajectory_length = len(self.sequences) + self.loader = loader + + def get_image(self, sequence, index): + image_path = os.path.join(self.base_path, 'sequences', '%02d' % sequence, 'image_0', '%06d' % index + '.png') + image = self.loader(image_path) + return image + + def load_poses(self): + all_poses = [] + for sequence in self.sequences: + with open(os.path.join(self.base_path, 'poses/', ('%02d' % sequence) + '.txt')) as f: + poses = np.array([[float(x) for x in line.split()] for line in f], dtype=np.float32) + all_poses.append(poses) + return all_poses + + def _set_next_trajectory(self): + if (self._current_trajectory_index < self.trajectory_length-1): + self._current_trajectory_index += 1 + else: + self.current_epoch += 1 + self._current_trajectory_index = 0 + + self._current_initial_frame = 0 + + def get_next_batch(self): + img_batch = [] + label_batch = [] + + poses = self.poses[self._current_trajectory_index] + + for j in range(self.args.bsize): + img_stacked_series = [] + labels_series = [] + + read_img = self.get_image(self.sequences[self._current_trajectory_index], + self._current_initial_frame + self.args.time_steps) + if (read_img is None): self._set_next_trajectory() + + for i in range(self._current_initial_frame, self._current_initial_frame + self.args.time_steps): + img1 = self.get_image(self.sequences[self._current_trajectory_index], i) + img2 = self.get_image(self.sequences[self._current_trajectory_index], i + 1) + # print(self.sequences[self._current_trajectory_index]) + # print(img1.size()) + # print(img2.size()) + img_aug = np.concatenate([img1, img2], -1) + img_stacked_series.append(img_aug) + # pose = self.get6DoFPose(poses[i, :]) - self.get6DoFPose(poses[self._current_initial_frame, :])#changed + pose = self.get6DoFPose(poses[i + 1, :]) - self.get6DoFPose(poses[i, :])#位姿差 + labels_series.append(pose) + img_batch.append(img_stacked_series) + label_batch.append(labels_series) + self._current_initial_frame += self.args.time_steps + label_batch = np.array(label_batch)#创建数组 + img_batch = np.array(img_batch) + return img_batch, label_batch#返回每个batch的拼接图片,位姿差真值 + + def isRotationMatrix(self, R): + Rt = np.transpose(R) + shouldBeIdentity = np.dot(Rt, R) + I = np.identity(3, dtype=R.dtype) + n = np.linalg.norm(I - shouldBeIdentity) + return n < 1e-6 + + def rotationMatrixToEulerAngles(self, R): + assert (self.isRotationMatrix(R)) + sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0]) + singular = sy < 1e-6 + + if not singular: + x = math.atan2(R[2, 1], R[2, 2]) + y = math.atan2(-R[2, 0], sy) + z = math.atan2(R[1, 0], R[0, 0]) + else: + x = math.atan2(-R[1, 2], R[1, 1]) + y = math.atan2(-R[2, 0], sy) + z = 0 + + return np.array([x, y, z]) + + def get6DoFPose(self, p): + pos = np.array([p[3], p[7], p[11]]) + R = np.array([[p[0], p[1], p[2]], [p[4], p[5], p[6]], [p[8], p[9], p[10]]]) + angles = self.rotationMatrixToEulerAngles(R) + return np.concatenate((pos, angles)) + + +# Main_Args Class +class MyArgs(): + def __init__(self, datapath, bsize, trajectory_length, lr=0.001, time_steps=100, train_iter=5): + self.datapath = datapath + self.bsize = bsize + self.trajectory_length = trajectory_length + self.lr = lr + self.time_steps = time_steps + self.train_iter = train_iter + + +# Main_code +# args = MyArgs(datapath='/root/convlstm/DeepVO/data/color/dataset/', +# # args = MyArgs(datapath='/content/drive/My Drive/dataset/', +# bsize=32, +# trajectory_length=2, +# lr=0.0001, +# train_iter=10000, +# time_steps=TIME_STEPS) +# outputpath = '/root/convlstm/DeepVO/data/color/dataset/output/' +# train(args, args.datapath, args.train_iter, args.trajectory_length) +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description=''' + This script computes the relative pose error from the ground truth trajectory and the estimated trajectory. + ''') + parser.add_argument('--datapath') + parser.add_argument('--outputpath') + parser.add_argument('--bsize', default=32) + parser.add_argument('--trajectory_length', default=4)#此处在326已改好,不影响后面 + parser.add_argument('--lr', default=0.0001) + parser.add_argument('--train_iter', default=1) + parser.add_argument('--time_steps', default=1) + args = parser.parse_args() + train(args, args.datapath, args.outputpath, args.train_iter, args.trajectory_length) +"""以下为测试模块,可以注释掉上面if __name__ == '__main__'块代码,换好路径后进行测试""" +# args = MyArgs(datapath='/root/convlstm/DeepVO/data/color/dataset/', +# outputpath='/root/convlstm/DeepVO/data/color/dataset/output4/', +# bsize=32, +# trajectory_length=4, +# train_iter=1, +# time_steps=TIME_STEPS) +# +# # configuration +# config_proto = tf.ConfigProto(device_count = {'GPU': 0}) +# sess = tf.Session(config=config_proto) +# data_loader = VisualOdometryDataLoader(args,args.datapath,args.trajectory_length) +# """ input_batch must be in shape of [?, TIME_STEPS, 384, 1280, 6] """ +# #tf.reset_default_graph() +# print('Restoring Entire Session from checkpoint : %s'%args.outputpath+"model80.meta") +# imported_meta = tf.train.import_meta_graph(args.outputpath + "model80.meta") +# print('Success') +# imported_meta.restore(sess, tf.train.latest_checkpoint(args.outputpath))#查找最新保存的ckpt文件名,并加载,latest_filename=model240 +# input_data = tf.get_default_graph().get_tensor_by_name("input/Placeholder:0")#以下5句是加载变量的过程 +# # placeholder for labels 下两句的作用?? +# labels_ = tf.get_default_graph().get_tensor_by_name("input/Placeholder_1:0") +# loss_op = tf.get_default_graph().get_tensor_by_name("loss_l2_norm/loss:0") +# poses = [] +# poses.append(tf.get_default_graph().get_tensor_by_name("Wx_plus_b/xw_plus_b:0")) +# # poses.append(tf.get_default_graph().get_tensor_by_name("Wx_plus_b/xw_plus_b_1:0")) +# # poses.append(tf.get_default_graph().get_tensor_by_name("Wx_plus_b/xw_plus_b_2:0")) +# # poses.append(tf.get_default_graph().get_tensor_by_name("Wx_plus_b/xw_plus_b_3:0")) +# # poses.append(tf.get_default_graph().get_tensor_by_name("Wx_plus_b/xw_plus_b_4:0")) +# while data_loader.current_epoch < args.train_iter: +# input_, ground_truth_batch = data_loader.get_next_batch()#图片,位姿差真值 +# #print("shape(ground_truth_batch)",ground_truth_batch.shape) +# output = sess.run(poses, feed_dict={input_data:input_})#位姿差预估值 +# #print("shape(output)",np.array(output).shape) +# print('Current epoch : %d' % data_loader.current_epoch) +# print('output length : %d' % len(output)) +# for i in range(len(output)): +# for j in range(32): +# fh = open("//root//convlstm//DeepVO//txtcsv//output_file.txt", "a")#位姿真值存放文件 +# fh.write("%f %f %f %f %f %f\n"%(ground_truth_batch[j,i,0], +# ground_truth_batch[j,i,1], +# ground_truth_batch[j,i,2], +# ground_truth_batch[j,i,3], +# ground_truth_batch[j,i,4], +# ground_truth_batch[j,i,5])) +# fh.close() +# fh = open("//root//convlstm//DeepVO//txtcsv//estimated.txt","a")#位姿预估值存放文件 +# fh.write("%f %f %f %f %f %f\n"%(output[i][j,0], +# output[i][j,1], +# output[i][j,2], +# output[i][j,3], +# output[i][j,4], +# output[i][j,5])) +# fh.close() +# file_old = open("//root//convlstm//DeepVO//txtcsv//output_file.txt", 'rb+') +# lines = file_old.readlines() +# # 定位到最后一行的行首,若要删除后N行,将lines[-1]改为lines[-N:]即可 +# file_old.seek(-len(lines[-1]), os.SEEK_END) +# file_old.truncate() # 截断之后的数据 +# file_old.close() +# +# file_oldd = open("//root//convlstm//DeepVO//txtcsv//estimated.txt", 'rb+') +# liness = file_oldd.readlines() +# # 定位到最后一行的行首,若要删除后N行,将lines[-1]改为lines[-N:]即可 +# file_oldd.seek(-len(liness[-1]), os.SEEK_END) +# file_oldd.truncate() # 截断之后的数据 +# file_oldd.close() diff --git a/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/cell.py b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/cell.py new file mode 100644 index 0000000000000000000000000000000000000000..e5f2fd94a29b772f831753480c1653aea2d4edb2 --- /dev/null +++ b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/cell.py @@ -0,0 +1,166 @@ +# +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from npu_bridge.npu_init import * +import tensorflow as tf + +class ConvLSTMCell(tf.nn.rnn_cell.RNNCell): + """A LSTM cell with convolutions instead of multiplications. + + Reference: + Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015. + """ + + def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None): + super(ConvLSTMCell, self).__init__(_reuse=reuse) + self._kernel = kernel + self._filters = filters + self._forget_bias = forget_bias + self._activation = activation + self._normalize = normalize + self._peephole = peephole + if data_format == 'channels_last': + self._size = tf.TensorShape(shape + [self._filters]) + self._feature_axis = self._size.ndims + self._data_format = None + elif data_format == 'channels_first': + self._size = tf.TensorShape([self._filters] + shape) + self._feature_axis = 0 + self._data_format = 'NC' + else: + raise ValueError('Unknown data_format') + + @property + def state_size(self): + return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size) + + @property + def output_size(self): + return self._size + + def call(self, x, state): + c, h = state + + x = tf.concat([x, h], axis=self._feature_axis) + n = x.shape[-1].value + m = 4 * self._filters if self._filters > 1 else 4 + W = tf.get_variable('kernel', self._kernel + [n, m]) + y = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format) + if not self._normalize: + y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer()) + j, i, f, o = tf.split(y, 4, axis=self._feature_axis) + + if self._peephole: + i += tf.get_variable('W_ci', c.shape[1:]) * c + f += tf.get_variable('W_cf', c.shape[1:]) * c + + if self._normalize: + j = tf.contrib.layers.layer_norm(j) + i = tf.contrib.layers.layer_norm(i) + f = tf.contrib.layers.layer_norm(f) + + f = tf.sigmoid(f + self._forget_bias) + i = tf.sigmoid(i) + c = c * f + i * self._activation(j) + + if self._peephole: + o += tf.get_variable('W_co', c.shape[1:]) * c + + if self._normalize: + o = tf.contrib.layers.layer_norm(o) + c = tf.contrib.layers.layer_norm(c) + + o = tf.sigmoid(o) + h = o * self._activation(c) + + state = tf.nn.rnn_cell.LSTMStateTuple(c, h) + + return h, state + + +class ConvGRUCell(tf.nn.rnn_cell.RNNCell): + """A GRU cell with convolutions instead of multiplications.""" + + def __init__(self, shape, filters, kernel, activation=tf.tanh, normalize=True, data_format='channels_last', reuse=None): + super(ConvGRUCell, self).__init__(_reuse=reuse) + self._filters = filters + self._kernel = kernel + self._activation = activation + self._normalize = normalize + if data_format == 'channels_last': + self._size = tf.TensorShape(shape + [self._filters]) + self._feature_axis = self._size.ndims + self._data_format = None + elif data_format == 'channels_first': + self._size = tf.TensorShape([self._filters] + shape) + self._feature_axis = 0 + self._data_format = 'NC' + else: + raise ValueError('Unknown data_format') + + @property + def state_size(self): + return self._size + + @property + def output_size(self): + return self._size + + def call(self, x, h): + channels = x.shape[self._feature_axis].value + + with tf.variable_scope('gates'): + inputs = tf.concat([x, h], axis=self._feature_axis) + n = channels + self._filters + m = 2 * self._filters if self._filters > 1 else 2 + W = tf.get_variable('kernel', self._kernel + [n, m]) + y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format) + if self._normalize: + r, u = tf.split(y, 2, axis=self._feature_axis) + r = tf.contrib.layers.layer_norm(r) + u = tf.contrib.layers.layer_norm(u) + else: + y += tf.get_variable('bias', [m], initializer=tf.ones_initializer()) + r, u = tf.split(y, 2, axis=self._feature_axis) + r, u = tf.sigmoid(r), tf.sigmoid(u) + + with tf.variable_scope('candidate'): + inputs = tf.concat([x, r * h], axis=self._feature_axis) + n = channels + self._filters + m = self._filters + W = tf.get_variable('kernel', self._kernel + [n, m]) + y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format) + if self._normalize: + y = tf.contrib.layers.layer_norm(y) + else: + y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer()) + h = u * h + (1 - u) * self._activation(y) + + return h, h + diff --git a/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/modelarts_entry_acc.py b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/modelarts_entry_acc.py new file mode 100644 index 0000000000000000000000000000000000000000..59673529a665de35a67b4d2f9f852e7b5d5b26df --- /dev/null +++ b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/modelarts_entry_acc.py @@ -0,0 +1,63 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import sys + +# 解析输入参数data_url +parser = argparse.ArgumentParser() +parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0")#真正的npu环境上的数据集路径 +parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/")#真正的npu环境上的输出路径 +config = parser.parse_args() + +print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0])) +code_dir = sys.path[0] +os.chdir(code_dir) +print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd())) + +print("[CANN-Modelzoo] before train - list my run files:") +os.system("ls -al /usr/local/Ascend/ascend-toolkit/") + +print("[CANN-Modelzoo] before train - list my dataset files:") +os.system("ls -al %s" % config.data_url) + +print("[CANN-Modelzoo] start run train shell") +# 设置sh文件格式为linux可执行 +os.system("dos2unix ./test/*") + +# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定 +# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS +os.system("bash ./test/train_full_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url)) + +print("[CANN-Modelzoo] finish run train shell") + +# 将当前执行目录所有文件拷贝到obs的output进行备份 +print("[CANN-Modelzoo] after train - list my output files:") +os.system("cp -r %s %s " % (code_dir, config.train_url)) +os.system("ls -al %s" % config.train_url) diff --git a/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/modelarts_entry_perf.py b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/modelarts_entry_perf.py new file mode 100644 index 0000000000000000000000000000000000000000..f9e99ba4cce86b3dfdd77d914c65326fd32a7687 --- /dev/null +++ b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/modelarts_entry_perf.py @@ -0,0 +1,64 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import sys +#环境变量日志设置error级别 +os.environ['ASCEND_GLOBAL_LOG_LEVEL'] = '3' +# 解析输入参数data_url +parser = argparse.ArgumentParser() +parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0") +parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/") +config = parser.parse_args() + +print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0])) +code_dir = sys.path[0] +os.chdir(code_dir) +print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd())) + +print("[CANN-Modelzoo] before train - list my run files:") +os.system("ls -al /usr/local/Ascend/ascend-toolkit/") + +print("[CANN-Modelzoo] before train - list my dataset files:") +os.system("ls -al %s" % config.data_url) + +print("[CANN-Modelzoo] start run train shell") +# 设置sh文件格式为linux可执行 +os.system("dos2unix ./test/*") + +# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定 +# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS +os.system("bash ./test/train_performance_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url)) + +print("[CANN-Modelzoo] finish run train shell") + +# 将当前执行目录所有文件拷贝到obs的output进行备份 +print("[CANN-Modelzoo] after train - list my output files:") +os.system("cp -r %s %s " % (code_dir, config.train_url)) +os.system("ls -al %s" % config.train_url) diff --git a/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/modelzoo_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..463cef7e72801222e8435d5d1764ae72f4ca038a --- /dev/null +++ b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/modelzoo_level.txt @@ -0,0 +1,3 @@ +FuncStatus:OK +PerfStatus:NOK +PrecisionStatus:NOK \ No newline at end of file diff --git a/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/requirements.txt b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67609a0dcac25240871cd1121c2dea4b7820c832 --- /dev/null +++ b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/requirements.txt @@ -0,0 +1,2 @@ +运行环境和依赖:ubuntu16.04+python3.7.5+tensorflow-gpu1.15.0+opencv+opencv-contrib3.4.2.17+evo1.12.0 + diff --git a/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/test/train_full_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..ee1d54371f7d9d60737c26d249ae236d5da39105 --- /dev/null +++ b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/test/train_full_1p.sh @@ -0,0 +1,183 @@ +#!/bin/bash + +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} +if [ x"${modelarts_flag}" != x ]; +then + echo "running without etp..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +batch_size=32 + +if [ x"${modelarts_flag}" != x ]; +then + python3.7 ./Truemain.py --datapath=${data_path} --outputpath=${output_path} +else + python3.7 ./Truemain.py --datapath=${data_path} --outputpath=${output_path} > ${print_log} +fi + +## 性能相关数据计算 +#StepTime=`grep "sec/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'` +#FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算,此处可以计算最后10行的精度平均值 +train_accuracy=`grep "Train_error :" ${print_log} | tail -n 10 |awk -F ":" '{print $4}' | awk -F "-" '{print $1}' | awk '{sum+=$1} END {print sum/NR}'` +# 提取所有loss打印信息 +grep "Train_error :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/test/train_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..311e56ea047fa9394770f493e7231dd66e1c6a06 --- /dev/null +++ b/TensorFlow/contrib/cv/ConvLSTM_ID2358_for_TensorFlow/test/train_performance_1p.sh @@ -0,0 +1,185 @@ +#!/bin/bash + +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} +if [ x"${modelarts_flag}" != x ]; +then + echo "running with modelarts..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +train_epochs=1 +train_steps=500 +batch_size=32 + +if [ x"${modelarts_flag}" != x ]; +then + python3.7 ./Truemain.py --datapath=${data_path} --outputpath=${output_path} +else + python3.7 ./Truemain.py --datapath=${data_path} --outputpath=${output_path} > ${print_log}#--train_iter=${train_epochs} +fi + +# 性能相关数据计算,按照关键字,取最后n行中每行的最后一列(默认空格分隔),求平均 +StepTime=`grep "sec/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算,这个sh文件是性能数据计算,所以可以不用管 +train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}'` +# 提取所有loss打印信息 +grep "Train_error :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file