diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/.keep b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/.keep b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/__init__.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/__init__.py @@ -0,0 +1 @@ + diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/dataset_base.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/dataset_base.py new file mode 100644 index 0000000000000000000000000000000000000000..c7ed20d802ca7c5e8bc3976e74d9186c29f07511 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/dataset_base.py @@ -0,0 +1,240 @@ +# the base class of dataset + +from __future__ import print_function, division, absolute_import + +import gpu_config +import tensorflow as tf + +from collections import namedtuple +import time, os, cPickle, sys, threading, glob +from datetime import datetime +import time + +import numpy as np +import cv2 + +from data.util import * +Annotation = namedtuple('Annotation', 'name,pose') + +def _float_feature(value): + if isinstance(value, np.ndarray): + value = value + elif not isinstance(value, list): + value = [value] + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + +def _bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + +class BaseDataset(object): + '''provide basic utilities to convert the initial dataset to TFRecord files + and the interface to define readdata on the graph + ''' + def __init__(self, subset): + '''subset: e.g., train, validation, test, or train_1 in the nyu (with view 1) + ''' + self.subset = subset + + def loadAnnotation(self): + '''the annotation is a sequential list of Annotation namedtuple + ''' + raise NotImplementedError + + @property + def annotations(self): + raise NotImplementedError + + def convert_to_example(self, label): + '''load the image corresponding to the label.name, + then serialize the structure to tf.train.Example + ''' + raise NotImplementedError + + def saveSampleToRecord(self, idx_list, tar_file_path): + curr_list = [self.annotations[idx] for idx in idx_list] + # if os.path.exists(tar_file_path): + # print('%s alread written'%tar_file_path) + # sys.stdout.flush() + # return + + writer = tf.python_io.TFRecordWriter(tar_file_path) + for label in curr_list: + example = self.convert_to_example(label) + writer.write(example.SerializeToString()) + writer.close() + + def write_TFRecord_single_thread(self, thread_idx, thread_range, num_shards_per_thread): + print('Launching thread %d, with files from %d to %d'%(thread_idx, thread_range[0], thread_range[1])) + sys.stdout.flush() + spacing = np.linspace(thread_range[0], thread_range[1], num_shards_per_thread+1).astype(np.int) + + shard_range = [] + for idx in range(num_shards_per_thread): + shard_range.append((spacing[idx], spacing[idx+1])) + + if not hasattr(self, 'num_shards'): + '''in case of single thread + ''' + self.num_shards = num_shards_per_thread + + for curr_shard_idx, shard in enumerate(shard_range): + file_idx = thread_idx*num_shards_per_thread + curr_shard_idx + file_name = '%s-%d-of-%d'%(self.subset, file_idx, self.num_shards) + file_path = os.path.join(self.tf_dir, file_name) + print('[Thread %d] begin processing %d - %d images, to %s'%( + thread_idx,shard[0],shard[1],file_path)) + t1 = time.time() + sys.stdout.flush() + self.saveSampleToRecord(range(shard[0], shard[1]), file_path) + t2 = time.time() + print('[Thread {}]end at ={}, with {}s'.format(thread_idx, datetime.now(), t2-t1)) + + def write_TFRecord_multi_thread(self, num_threads, num_shards): + '''convert all the dataset to several file shards + num_threads: number of threads to load and save the data + num_shards: number of file segment on the harddisk + ''' + if not os.path.exists(self.tf_dir): + os.mkdir(self.tf_dir) + + assert not num_shards % num_threads, ( + 'please make the num_threads commensurate with file_shards') + self.num_shards = num_shards + self.num_threads = num_threads + num_shards_per_thread = int(num_shards/num_threads) + + self.loadAnnotation() + + spacing = np.linspace(0, len(self.annotations), num_threads+1).astype(np.int) + thread_range = [] + for idx in range(num_threads): + thread_range.append((spacing[idx], spacing[idx+1])) + + coord = tf.train.Coordinator() + threads = [] + print('begin writing at ', datetime.now()) + sys.stdout.flush() + for thread_idx in range(len(thread_range)): + args = (thread_idx, + thread_range[thread_idx], + num_shards_per_thread) + + t = threading.Thread(target=self.write_TFRecord_single_thread, args=args) + t.start() + threads.append(t) + + # wait all thread end + coord.join(threads) + + # interface to the batch iteration + @property + def filenames(self): + if self.subset == 'testing': + pattern = os.path.join(self.tf_dir, '%s-*'%'testing') + else: + pattern = os.path.join(self.tf_dir, '%s-*'%'training') + files = glob.glob(pattern) + print('[data.dataset_base]total file found = %d'%(len(files))) + return files + + @property + def is_train(self): + raise NotImplementedError + + @property + def approximate_num(self): + '''return: + the approximate total number of training set + ''' + raise NotImplementedError + + def get_batch_op(self, + batch_size, num_readers=1, num_preprocess_threads=1, + preprocess_op=None, + is_train=None): + '''return the operation on tf graph of + iteration over the given dataset + ''' + if is_train == None: + is_train = self.is_train + + with tf.name_scope('batch_processing'): + min_queue_examples = batch_size*1 + + if is_train: + assert num_readers >1, 'during training, num_readers should be greater than 1, to shuffle the input' + filename_queue = tf.train.string_input_producer( + self.filenames, capacity=32, shuffle=True) + + example_queue = tf.RandomShuffleQueue( + capacity=self.approximate_num_per_file*8 + 3*batch_size, + min_after_dequeue=self.approximate_num_per_file*8, + dtypes=[tf.string]) + + else: + filename_queue = tf.train.string_input_producer( + self.filenames, capacity=1, shuffle=False) + example_queue = tf.FIFOQueue( + capacity=min_queue_examples+batch_size, + dtypes=[tf.string]) + + if num_readers > 1: + enqueue_ops = [] + for _ in range(num_readers): + reader = tf.TFRecordReader() + _, value = reader.read(filename_queue) + enqueue_ops.append(example_queue.enqueue([value])) + + tf.train.queue_runner.add_queue_runner( + tf.train.queue_runner.QueueRunner(example_queue, enqueue_ops)) + example_serialized = example_queue.dequeue() + else: + reader = tf.TFRecordReader() + _, example_serialized = reader.read(filename_queue) + + results = [] + for thread_idx in range(num_preprocess_threads): + dm, pose, name = self.parse_example(example_serialized) + if preprocess_op != None: + result = preprocess_op(dm, pose, self.cfg) + results.append(list(result)) + else: + results.append([dm, pose]) + + batch = tf.train.batch_join( + results, batch_size=batch_size, capacity=2*num_preprocess_threads*batch_size) + + return batch + + # TODO: merge this function to get_batch_op + def get_batch_op_test(self, batch_size, preprocess_op=None): + '''return the operation on tf graph of + iteration over the given dataset + ''' + with tf.name_scope('batch_processing'): + min_queue_examples = 1 + + filename_queue = tf.train.string_input_producer( + self.filenames, num_epochs=1, capacity=1, shuffle=False) + example_queue = tf.FIFOQueue( + capacity=10, + dtypes=[tf.string]) + + reader = tf.TFRecordReader() + _, example_serialized = reader.read(filename_queue) + + results = [] + + dm, pose, name = self.parse_example(example_serialized) + if preprocess_op != None: + result = preprocess_op(dm, pose, self.cfg) + results.append(list(result)+[name]) + else: + results.append([dm, pose, name]) + + batch = tf.train.batch_join( + results, batch_size=batch_size, capacity=2) + return batch + + def parse_example(self, example_serialized): + raise NotImplementedError diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/evaluation.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..ab7958393f39c03db7312ffa85394e8c61cfc088 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/evaluation.py @@ -0,0 +1,109 @@ +import matplotlib.pyplot as plt +import numpy.linalg as alg + +class Evaluation(object): + def __init__(self): + pass + + @classmethod + def maxJntError(cls_obj, skel1, skel2): + diff = skel1.reshape(-1,3) - skel2.reshape(-1,3) + diff = alg.norm(diff, axis=1) + return diff.max() + + @classmethod + def meanJntError(cls_obj, skel1, skel2): + diff = skel1.reshape(-1,3) - skel2.reshape(-1,3) + diff = alg.norm(diff, axis=1) + return diff.mean() + + + @classmethod + def averageMaxJntError(cls_obj, score_list): + score_list = sorted(score_list) + + th_idx = 0 + for i in range(0, len(score_list)): + if(score_list[i]<=10.5): + th_idx += 1 + print '10mm percentage: %f'%(float(th_idx)/len(score_list)) + + th_idx = 0 + for i in range(0, len(score_list)): + if(score_list[i]<=20.5): + th_idx += 1 + print '20mm percentage: %f'%(float(th_idx)/len(score_list)) + + th_idx = 0 + for i in range(0, len(score_list)): + if(score_list[i]<=30.5): + th_idx += 1 + print '30mm percentage: %f'%(float(th_idx)/len(score_list)) + + th_idx = 0 + for i in range(0, len(score_list)): + if(score_list[i]<=40.5): + th_idx += 1 + print '40mm percentage: %f'%(float(th_idx)/len(score_list)) + + thresh_list = [thresh*5.0+0.5 for thresh in range(0, 17)] + precent_list = [1]*len(thresh_list) + + cur_score_idx = 0 + for i in range(0, len(thresh_list)): + th_idx = 0 + for j in range(0, len(score_list)): + if(score_list[j] 1: + enqueue_ops = [] + for _ in range(num_readers): + reader = tf.TFRecordReader() + _, value = reader.read(filename_queue) + enqueue_ops.append(example_queue.enqueue([value])) + + tf.train.queue_runner.add_queue_runner( + tf.train.queue_runner.QueueRunner(example_queue, enqueue_ops)) + example_serialized = example_queue.dequeue() + else: + reader = tf.TFRecordReader() + _, example_serialized = reader.read(filename_queue) + + results = [] + for thread_idx in range(num_preprocess_threads): + dm, pose, bbx, name = self.parse_example_test(example_serialized) + if preprocess_op != None: + result = preprocess_op(dm, pose, bbx, self.cfg) + results.append(list(result)) + else: + results.append([dm, pose]) + batch = tf.train.batch_join( + results, batch_size=batch_size, capacity=2*num_preprocess_threads*batch_size) + return batch + else: + return super(NyuDataset, self).get_batch_op(batch_size, + num_readers, + num_preprocess_threads, + preprocess_op, + is_train) + +def saveTFRecord(): + # reader = NyuDataset('training') + # reader.write_TFRecord_multi_thread(num_threads=30, num_shards=300) + + reader = NyuDataset('testing') + reader.write_TFRecord_multi_thread(num_threads=16, num_shards=16) + +if __name__ == '__main__': + saveTFRecord() diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/nyu_bbx.pkl b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/nyu_bbx.pkl new file mode 100644 index 0000000000000000000000000000000000000000..944d67e56d04480da5065707b7a53e4d63583f23 Binary files /dev/null and b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/nyu_bbx.pkl differ diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/preprocess.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..46425e94c2f59cb9fb2c11fb3a102872ec6dd4de --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/preprocess.py @@ -0,0 +1,268 @@ +from __future__ import print_function, absolute_import, division + +import gpu_config +import numpy as np +import tensorflow as tf +import data.util +from data.util import xyz2uvd_op, uvd2xyz_op, heatmap_from_xyz_op, CameraConfig +FLAGS = tf.app.flags.FLAGS + +def crop_from_xyz_pose(dm, pose, cfg, out_w, out_h, pad=20.0): + '''crop depth map by generate the bounding box according to the pose + Args: + dms: depth map + poses: either estimated or groundtruth in xyz coordinate + cfg: the initial camera configuration + out_w: output width + out_h: output height + Returns: + crop_dm: the cropped depth map + new_cfg: the new camera configuration for the cropped depth map + ''' + with tf.name_scope('crop'): + # determine bouding box from pose + in_h, in_w = dm.get_shape()[0].value, dm.get_shape()[1].value + uvd_pose = tf.reshape(xyz2uvd_op(pose,cfg), (-1,3)) + min_coor = tf.reduce_min(uvd_pose, axis=0) + max_coor = tf.reduce_max(uvd_pose, axis=0) + + top = tf.minimum(tf.maximum(min_coor[1]-pad, 0.0), cfg.h-2*pad) + left = tf.minimum(tf.maximum(min_coor[0]-pad, 0.0), cfg.w-2*pad) + bottom = tf.maximum(tf.minimum(max_coor[1]+pad, cfg.h), tf.cast(top, tf.float32)+2*pad-1) + right = tf.maximum(tf.minimum(max_coor[0]+pad, cfg.w), tf.cast(left, tf.float32)+2*pad-1) + + top = tf.cast(top, tf.int32) + left = tf.cast(left, tf.int32) + bottom = tf.cast(bottom, tf.int32) + right = tf.cast(right, tf.int32) + + cropped_dm = tf.image.crop_to_bounding_box(dm, + offset_height=top, + offset_width=left, + target_height=bottom-top, + target_width=right-left) + + longer_edge = tf.maximum(bottom-top, right-left) + offset_height = tf.to_int32(tf.divide(longer_edge-bottom+top, 2)) + offset_width = tf.to_int32(tf.divide(longer_edge-right+left, 2)) + cropped_dm = tf.image.pad_to_bounding_box(cropped_dm, + offset_height=offset_height, + offset_width=offset_width, + target_height=longer_edge, + target_width=longer_edge) + cropped_dm = tf.image.resize_images(cropped_dm, (out_h, out_w)) + + # to further earse the background + uvd_list = tf.unstack(uvd_pose, axis=-1) + + uu = tf.clip_by_value(tf.to_int32(uvd_list[0]), 0, in_w-1) + vv = tf.clip_by_value(tf.to_int32(uvd_list[1]), 0, in_h-1) + + dd = tf.gather_nd(dm, tf.stack([vv,uu], axis=-1)) + dd = tf.boolean_mask(dd, tf.greater(dd, 100)) + d_th = tf.reduce_min(dd) + 250.0 + if FLAGS.dataset == 'icvl': + cropped_dm = tf.where(tf.less(cropped_dm,500.0), cropped_dm, tf.zeros_like(cropped_dm)) + else: + cropped_dm = tf.where(tf.less(cropped_dm,d_th), cropped_dm, tf.zeros_like(cropped_dm)) + + with tf.name_scope('cfg'): + ratio_x = tf.cast(longer_edge/out_w, tf.float32) + ratio_y = tf.cast(longer_edge/out_h, tf.float32) + top = tf.cast(top, tf.float32) + left = tf.cast(left, tf.float32) + + new_cfg = tf.stack([cfg.fx/ratio_x, cfg.fy/ratio_y, + (cfg.cx-left+tf.to_float(offset_width))/ratio_x, + (cfg.cy-top+tf.to_float(offset_height))/ratio_y, + tf.cast(out_w,tf.float32), tf.cast(out_h,tf.float32)], axis=0) + return [cropped_dm, pose, new_cfg] + +def crop_from_bbx(dm, pose, bbx, cfg, out_w, out_h): + '''crop depth map by generate the bounding box according to the pose + Args: + dms: depth map + pose: groundtruth pose for further error evaluation + bbx: bounding box + cfg: the initial camera configuration + out_w: output width + out_h: output height + Returns: + crop_dm: the cropped depth map + new_cfg: the new camera configuration for the cropped depth map + ''' + with tf.name_scope('crop'): + top, left, bottom, right, d_th = bbx[0], bbx[1], bbx[2], bbx[3], bbx[4] + + top = tf.cast(top, tf.int32) + left = tf.cast(left, tf.int32) + bottom = tf.cast(bottom, tf.int32) + right = tf.cast(right, tf.int32) + + cropped_dm = tf.image.crop_to_bounding_box(dm, + offset_height=top, + offset_width=left, + target_height=bottom-top, + target_width=right-left) + + longer_edge = tf.maximum(bottom-top, right-left) + offset_height = tf.to_int32(tf.divide(longer_edge-bottom+top, 2)) + offset_width = tf.to_int32(tf.divide(longer_edge-right+left, 2)) + cropped_dm = tf.image.pad_to_bounding_box(cropped_dm, + offset_height=offset_height, + offset_width=offset_width, + target_height=longer_edge, + target_width=longer_edge) + cropped_dm = tf.image.resize_images(cropped_dm, (out_h, out_w)) + cropped_dm = tf.where(tf.less(cropped_dm,d_th), cropped_dm, tf.zeros_like(cropped_dm)) + + with tf.name_scope('cfg'): + ratio_x = tf.cast(longer_edge/out_w, tf.float32) + ratio_y = tf.cast(longer_edge/out_h, tf.float32) + top = tf.cast(top, tf.float32) + left = tf.cast(left, tf.float32) + + new_cfg = tf.stack([cfg.fx/ratio_x, cfg.fy/ratio_y, + (cfg.cx-left+tf.to_float(offset_width))/ratio_x, + (cfg.cy-top+tf.to_float(offset_height))/ratio_y, + tf.cast(out_w,tf.float32), tf.cast(out_h,tf.float32)], axis=0) + return [cropped_dm, pose, new_cfg] + +def center_of_mass(dm, cfg): + shape = tf.shape(dm) + c_h, c_w = shape[0], shape[1] + ave_u, ave_v = tf.cast(c_w/2, tf.float32), tf.cast(c_h/2, tf.float32) + ave_d = tf.reduce_mean(tf.boolean_mask(dm, tf.greater(dm,0))) + + ave_d = tf.maximum(ave_d, 200.0) + + ave_x = (ave_u-cfg[2])*ave_d/cfg[0] + ave_y = (ave_v-cfg[3])*ave_d/cfg[1] + ave_xyz=tf.stack([ave_x,ave_y,ave_d],axis=0) + return ave_xyz + +def norm_xyz_pose(xyz_poses, coms, pca_para=None): + jnt_num = int(xyz_poses.shape[1].value/3) + def fn(elems): + xyz_pose, com = elems[0], elems[1] + norm_xyz_pose = tf.divide(xyz_pose - tf.tile(com,[jnt_num]), POSE_NORM_RATIO) + return [norm_xyz_pose, com] + + norm_xyz_poses, _ = tf.map_fn(fn, [xyz_poses,coms]) + if pca_para is not None: + norm_xyz_poses = tf.nn.xw_plus_b(norm_xyz_poses, tf.transpose(pca_para[0]), pca_para[2]) + norm_xyz_poses = tf.divide(norm_xyz_poses, PCA_NORM_RATIO) + return norm_xyz_poses + +def unnorm_xyz_pose(norm_xyz_poses, coms, pca_para=None): + if pca_para is not None: + norm_xyz_poses = tf.multiply(norm_xyz_poses, PCA_NORM_RATIO) + norm_xyz_poses = tf.nn.xw_plus_b(norm_xyz_poses, pca_para[0], pca_para[1]) + # norm_xyz_poses = tf.matmul(norm_xyz_poses, pca_para[0]) + + jnt_num = int(norm_xyz_poses.shape[1].value/3) + def fn(elems): + norm_xyz_pose, com = elems[0], elems[1] + xyz_pose = tf.multiply(norm_xyz_pose, POSE_NORM_RATIO) + tf.tile(com,[jnt_num]) + return [xyz_pose, com] + + xyz_poses, _ = tf.map_fn(fn, [norm_xyz_poses,coms]) + return xyz_poses + +D_RANGE=300.0 +POSE_NORM_RATIO = 100.0 +PCA_NORM_RATIO = 5.0 + +def norm_dm(dms, coms): + def fn(elems): + dm, com = elems[0], elems[1] + max_depth = com[2]+D_RANGE*0.5 + min_depth = com[2]-D_RANGE*0.5 + mask = tf.logical_and(tf.less(dm, max_depth), tf.greater(dm, min_depth-D_RANGE*0.5)) + normed_dm = tf.where(mask, tf.divide(dm-min_depth, D_RANGE), -1.0*tf.ones_like(dm)) + return [normed_dm, com] + + norm_dms, _ = tf.map_fn(fn, [dms, coms]) + + return norm_dms + +def generate_xyzs_from_multi_cfgs(dms, cfgs, coms): + '''generate the point cloud from depth map + Args: + dms: the normalized depth map, (b,h,w,1) + cfgs: the corresponding camera configurations, (b, 6) + coms: the corresponding center of mass, (b, 3) + Returns: + xyzs: the normalized xyz point cloud, (b, h, w, 3) + ''' + + def fn(elem): + dm, cfg, com = elem[0], elem[1], elem[2] + + zz = tf.squeeze(dm, axis=-1) + min_depth = com[2]-D_RANGE*0.5 + max_depth = com[2]+D_RANGE*0.5 + zz = tf.where(tf.less(zz, -0.99), + tf.ones_like(zz)*max_depth, + tf.multiply(zz, D_RANGE)+min_depth) + + xx, yy = tf.meshgrid(tf.range(h), tf.range(w)) + xx = tf.to_float(xx) + yy = tf.to_float(yy) + + w_ratio = cfg[4]/w + h_ratio = cfg[5]/h + new_cfg = CameraConfig(cfg[0]/w_ratio, cfg[1]/h_ratio, + cfg[2]/w_ratio, cfg[3]/h_ratio, + w, h) + + xx = tf.multiply(xx-new_cfg[2], tf.divide(zz, new_cfg[0])) + yy = tf.multiply(yy-new_cfg[3], tf.divide(zz, new_cfg[1])) + + # renormalize the points as normalizing the pose + xx = tf.divide(xx-com[0], POSE_NORM_RATIO) + yy = tf.divide(yy-com[1], POSE_NORM_RATIO) + zz = tf.divide(zz-com[2], POSE_NORM_RATIO) + + xyz = tf.stack([xx,yy,zz], axis=-1) + return [xyz, cfg, com] + + h, w = dms.get_shape()[1].value, dms.get_shape()[2].value + xyzs, _, _ = tf.map_fn(fn, [dms, cfgs, coms]) + return xyzs + +def data_aug(dms, poses, cfgs, coms): + def fn(elems): + dm, pose, cfg, com = elems[0], elems[1], elems[2], elems[3] + # random rotation + angle = tf.random_uniform((1,),-1*np.pi,np.pi) + rot_dm = tf.contrib.image.rotate(dm,angle) + + uv_com = xyz2uvd_op(com, cfg) + uvd_pt = xyz2uvd_op(pose, cfg) - tf.tile(uv_com,[jnt_num]) + cost, sint = tf.cos(angle)[0], tf.sin(angle)[0] + rot_mat = tf.stack([cost,-sint,0, sint,cost,0, 0.0,0.0,1.0], axis=0) + rot_mat = tf.reshape(rot_mat, (3,3)) + + uvd_pt = tf.reshape(uvd_pt, (-1,3)) + rot_pose = tf.reshape(tf.matmul(uvd_pt, rot_mat), (-1,)) + + # random elongate x,y edge + edge_ratio = tf.clip_by_value(tf.random_normal((2,), 1.0, 0.2), 0.9, 1.1) + target_height = tf.to_int32(tf.to_float(tf.shape(dm)[0])*edge_ratio[0]) + target_width = tf.to_int32(tf.to_float(tf.shape(dm)[1])*edge_ratio[1]) + # 1 stands for nearest neighour interpolation + rot_dm = tf.image.resize_images(rot_dm, (target_height, target_width), 1) + rot_dm = tf.image.resize_image_with_crop_or_pad(rot_dm, tf.shape(dm)[0], tf.shape(dm)[1]) + rot_pose = tf.multiply(rot_pose, tf.tile([edge_ratio[1],edge_ratio[0],1.0], [jnt_num])) + + rot_pose = rot_pose + tf.tile(uv_com, [jnt_num]) + rot_pose = uvd2xyz_op(rot_pose, cfg) + rot_pose = tf.reshape(rot_pose, (-1,)) + return [rot_dm, rot_pose, cfgs, coms] + + jnt_num = tf.to_int32(tf.shape(poses)[1]/3) + aug_dms, aug_poses, _, _ = tf.map_fn(fn, [dms, poses, cfgs, coms]) + aug_dms = tf.reshape(aug_dms, tf.shape(dms)) + return aug_dms, aug_poses + diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/util.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/util.py new file mode 100644 index 0000000000000000000000000000000000000000..f7b5a81dc1087983d159763b2243b11fc55970c7 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/util.py @@ -0,0 +1,188 @@ +from __future__ import print_function, division, absolute_import +import collections +import cv2, numpy as np +import scipy.stats as st + +import gpu_config +import tensorflow as tf + +CameraConfig = collections.namedtuple('CameraConfig', 'fx,fy,cx,cy,w,h') + +'''utilities for 2D-3D conversions +function with _op suffix returns a tf operation +''' + +'''_pro: perspective transformation + _bpro: back perspective transformation +''' +# fx, fy, cx, cy, w, h +# 0, 1, 2, 3, 4, 5 +_pro = lambda pt3, cfg: [pt3[0]*cfg[0]/pt3[2]+cfg[2], pt3[1]*cfg[1]/pt3[2]+cfg[3], pt3[2]] +_bpro = lambda pt2, cfg: [(pt2[0]-cfg[2])*pt2[2]/cfg[0], (pt2[1]-cfg[3])*pt2[2]/cfg[1], pt2[2]] + +def xyz2uvd(xyz, cfg): + '''xyz: list of xyz points + cfg: camera configuration + ''' + xyz = xyz.reshape((-1,3)) + # perspective projection function + uvd = [_pro(pt3, cfg) for pt3 in xyz] + return np.array(uvd) + +def uvd2xyz(uvd, cfg): + '''uvd: list of uvd points + cfg: camera configuration + ''' + uvd = uvd.reshape((-1,3)) + # backprojection + xyz = [_bpro(pt2, cfg) for pt2 in uvd] + return np.array(xyz) + +def xyz2uvd_op(xyz_pts, cfg): + '''xyz_pts: tensor of xyz points + camera_cfg: constant tensor of camera configuration + ''' + xyz_pts = tf.reshape(xyz_pts, (-1,3)) + xyz_list = tf.unstack(xyz_pts) + uvd_list = [_pro(pt, cfg) for pt in xyz_list] + uvd_pts = tf.stack(uvd_list) + return tf.reshape(uvd_pts, shape=(-1,)) + +def uvd2xyz_op(uvd_pts, cfg): + uvd_pts = tf.reshape(uvd_pts, (-1,3)) + uvd_list = tf.unstack(uvd_pts) + xyz_list = [_bpro(pt, cfg) for pt in uvd_list] + xyz_pts = tf.stack(xyz_list) + return tf.reshape(xyz_pts, (-1,)) + +'''as a pre-processing step +''' +def _gaussian_kern(filter_size=10, sigma=3): + ''' + return an np array of a Gaussian kernel + ''' + interval = (2*sigma+1.0)/(filter_size) + x = np.linspace(-sigma-interval/2., sigma+interval/2., filter_size+1) + kern1d = np.diff(st.norm.cdf(x)) + kernel_raw = np.sqrt(np.outer(kern1d, kern1d)) + kernel = kernel_raw/kernel_raw.sum() + return kernel + +def gaussian_filter(filter_size=10, sigma=3): + gau_init = tf.constant(_gaussian_kern(filter_size,sigma), tf.float32) + with tf.variable_scope('preprocess') as scope: + try: + gaussian_filter = tf.get_variable('gaussian_filter', + initializer=gau_init, trainable=False) + gaussian_filter = tf.reshape(gaussian_filter, (filter_size,filter_size,1,1)) + except ValueError: + scope.reuse_variables() + gaussian_filter = tf.get_variable('gaussian_filter', + initializer=gau_init, trainable=False) + gaussian_filter = tf.reshape(gaussian_filter, (filter_size,filter_size,1,1)) + return gaussian_filter + +def heatmap_from_uvd_op(uvd_pts, cfg, gaussian_filter): + '''we firstly construct a sparse tensor from the coordinate + val: the value at the center of corresponding point + ''' + with tf.name_scope('preprocess'): + uvd_pts = tf.reshape(uvd_pts, (-1,3)) + num_pt = uvd_pts.shape[0] + num_pt_op = tf.to_int64(num_pt) + + nn = tf.range(num_pt, dtype=tf.int64) + nn = tf.reshape(nn, (-1,1)) + + xx = uvd_pts[:,0] + xx = tf.clip_by_value(xx, 0, cfg.w-1) + xx = tf.to_int64(xx) + xx = tf.reshape(xx, (-1,1)) + + yy = uvd_pts[:,1] + yy = tf.clip_by_value(yy, 0, cfg.h-1) + yy = tf.to_int64(yy) + yy = tf.reshape(yy, (-1,1)) + indices = tf.concat([nn,yy,xx], axis=1) + + val = 1.0 + raw_hm = tf.sparse_to_dense(sparse_indices=indices, + output_shape=[num_pt_op,cfg.h,cfg.w], + sparse_values=val) + raw_hm = tf.expand_dims(raw_hm, axis=[-1]) + raw_hm = tf.cast(raw_hm, tf.float32) + + hm = tf.nn.conv2d(raw_hm, gaussian_filter, strides=[1,1,1,1], + padding='SAME', data_format='NHWC') + hm = tf.nn.conv2d(hm, gaussian_filter, strides=[1,1,1,1], + padding='SAME', data_format='NHWC') + hm = tf.divide(hm, tf.reduce_max(hm)) + + # shuffle dimensions of hm + hm_list = tf.unstack(hm, axis=0) + hm = tf.concat(hm_list, axis=2) + return hm + +def heatmap_from_xyz_op(xyz_pts, cfg, gaussian_filter): + return heatmap_from_uvd_op(xyz2uvd_op(xyz_pts, cfg), cfg, gaussian_filter) + + +'''utilities for visualization +''' +def visHeatMap(dm, pose, ch_flag=None): + raise NotImplementedError + +def visDepthMap(dm, thresh=750, isHeatmap=True): + dm[dm>thresh] = 0 + ratio = 255/thresh + dm = dm*ratio + if False: + dm = dm/dm.max() + dm_color = cv2.applyColorMap(dm, cv2.COLORMAP_JET) + dm = dm_color + else: + dm = cv2.cvtColor(dm.astype('uint8'), cv2.COLOR_GRAY2BGR) + return dm + +def visAnnotatedDepthMap(dm, pose, cfg, thresh=750): + dm = visDepthMap(dm, thresh) + pose = xyz2uvd(pose,cfg) + for pt2 in pose: + cv2.circle(dm, (int(pt2[0]), int(pt2[1])), 3, (0,0,255), -1) + return dm + +def visAnnotatedDepthMap_uvd(dm, pose, thresh=750): + dm = visDepthMap(dm, thresh) + for pt2 in pose: + cv2.circle(dm, (int(pt2[0]), int(pt2[1])), 3, (0,0,255), -1) + return dm + +'''unit test +''' +def run_heatmap_from_xyz(): + from data.bigHand import BigHandDataset + + pts = np.array([-67.4598, 5.3851, 584.7425, -55.6470, 8.8958, 587.4889, -35.5874, -54.6665, 583.3420, -54.7895, -53.8799, 577.8048, -71.0328, -51.3926, 573.4493, -88.8696, -46.2022, 569.1099, -32.8905, -20.8474, 553.7415, -18.7491, -39.3305, 532.7702, -19.8893, -56.4645, 516.0034, -35.5810, -69.2128, 545.6373, -35.5768, -78.8591, 520.6336, -35.2772, -75.8186, 501.8809, -52.5099, -66.7139, 535.8283, -51.0812, -74.7579, 509.5187, -51.7939, -78.6711, 488.8988, -72.3119, -85.2855, 549.0604, -73.1781, -108.2356, 532.5458, -69.9800, -125.8427, 521.5565, -101.7839, -74.5066, 557.4333, -110.1215, -92.7800, 549.8948, -117.0142, -109.9064, 545.4029 +]) + pts = pts.reshape((-1,)).astype(np.float32) + + tf.reset_default_graph() + xyz_pts = tf.placeholder(tf.float32,(BigHandDataset.pose_dim,)) + cfg = BigHandDataset.cfg + heatmap_op = heatmap_from_xyz_op(xyz_pts, cfg) + + with tf.Session() as sess: + (heatmap,) = sess.run([heatmap_op], {xyz_pts:pts}) + print('gaussian blurred') + summap = np.zeros((BigHandDataset.cfg.h, BigHandDataset.cfg.w)) + print(heatmap.shape) + for hm in heatmap: + summap += hm + + summap /= summap.max() + import matplotlib.pyplot as plt + plt.imshow(summap, interpolation='none') + plt.show() + +if __name__ == '__main__': + run_heatmap_from_xyz() diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/visualization.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..adc8217cdf7403774a2e3dc62f644ab7e340b71d --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/visualization.py @@ -0,0 +1,137 @@ +# for matplotlib wrapper to tf summary +import tensorflow as tf +import tfplot, matplotlib +import matplotlib.lines as lines + +FLAGS = tf.app.flags.FLAGS + +def figure_heatmap(hm): + fig = matplotlib.figure.Figure() + ax = fig.add_subplot(1,1,1) + im = ax.imshow(hm, cmap=matplotlib.cm.jet) + fig.colorbar(im) + return fig + +def figure_joint(dm, uvd_pt): + fig = matplotlib.figure.Figure() + ax = fig.add_subplot(1,1,1) + ax.imshow(dm, cmap=matplotlib.cm.Greys) + + if FLAGS.dataset == 'bighand': + ax.scatter(uvd_pt[0,0], uvd_pt[0,1], s=200, c='w') + ax.scatter(uvd_pt[1:6,0], uvd_pt[1:6,1], s=100, c='w') + ax.scatter(uvd_pt[6:9,0], uvd_pt[6:9,1], s=60, c='c') + ax.scatter(uvd_pt[9:12,0], uvd_pt[9:12,1], s=60, c='m') + ax.scatter(uvd_pt[12:15,0], uvd_pt[12:15,1], s=60, c='y') + ax.scatter(uvd_pt[15:18,0], uvd_pt[15:18,1], s=60, c='g') + ax.scatter(uvd_pt[18:,0], uvd_pt[18:,1], s=60, c='r') + elif FLAGS.dataset == 'nyu': + ax.scatter(uvd_pt[10:,0], uvd_pt[10:,1], s=200, c='w') + ax.scatter(uvd_pt[0,0], uvd_pt[0,1], s=60, c='c') + ax.scatter(uvd_pt[1,0], uvd_pt[1,1], s=90, c='c') + ax.scatter(uvd_pt[2,0], uvd_pt[2,1], s=60, c='m') + ax.scatter(uvd_pt[3,0], uvd_pt[3,1], s=90, c='m') + ax.scatter(uvd_pt[4,0], uvd_pt[4,1], s=60, c='y') + ax.scatter(uvd_pt[5,0], uvd_pt[5,1], s=90, c='y') + ax.scatter(uvd_pt[6,0], uvd_pt[6,1], s=60, c='g') + ax.scatter(uvd_pt[7,0], uvd_pt[7,1], s=90, c='g') + ax.scatter(uvd_pt[8,0], uvd_pt[8,1], s=60, c='r') + ax.scatter(uvd_pt[9,0], uvd_pt[9,1], s=90, c='r') + elif FLAGS.dataset == 'msra': + fig_color = ['c', 'm', 'y', 'g', 'r'] + ax.scatter(uvd_pt[0:,0], uvd_pt[0:,1], s=200, c='w') + for f in range(5): + ax.scatter(uvd_pt[f*4+1,0], uvd_pt[f*4+1,1], s=90, c=fig_color[f]) + ax.scatter(uvd_pt[f*4+2,0], uvd_pt[f*4+2,1], s=80, c=fig_color[f]) + ax.scatter(uvd_pt[f*4+3,0], uvd_pt[f*4+3,1], s=70, c=fig_color[f]) + ax.scatter(uvd_pt[f*4+4,0], uvd_pt[f*4+4,1], s=60, c=fig_color[f]) + + elif FLAGS.dataset == 'icvl': + fig_color = ['c', 'm', 'y', 'g', 'r'] + ax.scatter(uvd_pt[0:,0], uvd_pt[0:,1], s=200, c='w') + for f in range(5): + ax.scatter(uvd_pt[f*3+1,0], uvd_pt[f*3+1,1], s=90, c=fig_color[f]) + ax.scatter(uvd_pt[f*3+2,0], uvd_pt[f*3+2,1], s=80, c=fig_color[f]) + ax.scatter(uvd_pt[f*3+3,0], uvd_pt[f*3+3,1], s=60, c=fig_color[f]) + return fig + +def figure_joint_skeleton(dm, uvd_pt): + fig = matplotlib.figure.Figure() + ax = fig.add_subplot(1,1,1) + ax.imshow(dm, cmap=matplotlib.cm.Greys) + + if FLAGS.dataset == 'bighand': + ax.scatter(uvd_pt[0,0], uvd_pt[0,1], s=200, c='w') + ax.scatter(uvd_pt[1:6,0], uvd_pt[1:6,1], s=100, c='w') + ax.scatter(uvd_pt[6:9,0], uvd_pt[6:9,1], s=60, c='c') + ax.scatter(uvd_pt[9:12,0], uvd_pt[9:12,1], s=60, c='m') + ax.scatter(uvd_pt[12:15,0], uvd_pt[12:15,1], s=60, c='y') + ax.scatter(uvd_pt[15:18,0], uvd_pt[15:18,1], s=60, c='g') + ax.scatter(uvd_pt[18:,0], uvd_pt[18:,1], s=60, c='r') + elif FLAGS.dataset == 'nyu': + fig_color = ['c', 'm', 'y', 'g', 'r'] + for f in range(5): + ax.plot([uvd_pt[f*2,0], uvd_pt[f*2+1,0]], + [uvd_pt[f*2,1], uvd_pt[f*2+1,1]], color=fig_color[f], linewidth=3) + ax.scatter(uvd_pt[f*2,0],uvd_pt[f*2,1],s=60,c=fig_color[f]) + ax.scatter(uvd_pt[f*2+1,0],uvd_pt[f*2+1,1],s=60,c=fig_color[f]) + if f<4: + ax.plot([uvd_pt[13,0], uvd_pt[f*2+1,0]], + [uvd_pt[13,1], uvd_pt[f*2+1,1]], color=fig_color[f], linewidth=3) + ax.plot([uvd_pt[9,0], uvd_pt[10,0]], + [uvd_pt[9,1], uvd_pt[10,1]], color='r', linewidth=3) + + ax.scatter(uvd_pt[13,0], uvd_pt[13,1], s=200, c='w') + ax.scatter(uvd_pt[11,0], uvd_pt[11,1], s=100, c='b') + ax.scatter(uvd_pt[12,0], uvd_pt[12,1], s=100, c='b') + + ax.plot([uvd_pt[13,0], uvd_pt[11,0]], + [uvd_pt[13,1], uvd_pt[11,1]], color='b', linewidth=3) + ax.plot([uvd_pt[13,0], uvd_pt[12,0]], + [uvd_pt[13,1], uvd_pt[12,1]], color='b', linewidth=3) + ax.plot([uvd_pt[13,0], uvd_pt[10,0]], + [uvd_pt[13,1], uvd_pt[10,1]], color='r', linewidth=3) + + elif FLAGS.dataset == 'msra': + fig_color = ['c', 'm', 'y', 'g', 'r'] + ax.scatter(uvd_pt[0:,0], uvd_pt[0:,1], s=200, c='w') + for f in range(5): + ax.scatter(uvd_pt[f*4+1,0], uvd_pt[f*4+1,1], s=90, c=fig_color[f]) + ax.scatter(uvd_pt[f*4+2,0], uvd_pt[f*4+2,1], s=80, c=fig_color[f]) + ax.scatter(uvd_pt[f*4+3,0], uvd_pt[f*4+3,1], s=70, c=fig_color[f]) + ax.scatter(uvd_pt[f*4+4,0], uvd_pt[f*4+4,1], s=60, c=fig_color[f]) + ax.plot([uvd_pt[f*4+1,0], uvd_pt[f*4+2,0]], + [uvd_pt[f*4+1,1], uvd_pt[f*4+2,1]], color=fig_color[f], linewidth=3) + ax.plot([uvd_pt[f*4+2,0], uvd_pt[f*4+3,0]], + [uvd_pt[f*4+2,1], uvd_pt[f*4+3,1]], color=fig_color[f], linewidth=3) + ax.plot([uvd_pt[f*4+3,0], uvd_pt[f*4+4,0]], + [uvd_pt[f*4+3,1], uvd_pt[f*4+4,1]], color=fig_color[f], linewidth=3) + elif FLAGS.dataset == 'icvl': + fig_color = ['c', 'm', 'y', 'g', 'r'] + ax.scatter(uvd_pt[0:,0], uvd_pt[0:,1], s=200, c='w') + for f in range(5): + ax.scatter(uvd_pt[f*3+1,0], uvd_pt[f*3+1,1], s=90, c=fig_color[f]) + ax.scatter(uvd_pt[f*3+2,0], uvd_pt[f*3+2,1], s=80, c=fig_color[f]) + ax.scatter(uvd_pt[f*3+3,0], uvd_pt[f*3+3,1], s=60, c=fig_color[f]) + ax.plot([uvd_pt[f*3+1,0], uvd_pt[f*3+2,0]], + [uvd_pt[f*3+1,1], uvd_pt[f*3+2,1]], color=fig_color[f], linewidth=3) + ax.plot([uvd_pt[f*3+2,0], uvd_pt[f*3+3,0]], + [uvd_pt[f*3+2,1], uvd_pt[f*3+3,1]], color=fig_color[f], linewidth=3) + + return fig + +def figure_smp_pts(dm, pts1, pts2): + fig = matplotlib.figure.Figure() + ax = fig.add_subplot(1,1,1) + ax.imshow(dm, cmap=matplotlib.cm.jet) + + for pt1, pt2 in zip(pts1, pts2): + ax.plot([pt1[0], pt2[0]], [pt1[1], pt2[1]]) + ax.scatter(pt1[0], pt1[1], s=60, c='w') + ax.scatter(pt2[0], pt2[1], s=60, c='m') + return fig + +tf_heatmap_wrapper = tfplot.wrap(figure_heatmap, batch=True, name='hm_summary') +tf_jointplot_wrapper = tfplot.wrap(figure_joint_skeleton, batch=True, name='pt_summary') +tf_smppt_wrapper = tfplot.wrap(figure_smp_pts, batch=True, name='smppt_summary') + diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/.keep b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_icvl_model.sh b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_icvl_model.sh new file mode 100644 index 0000000000000000000000000000000000000000..84406e467eedf320e07ec2e12285b73da48c3ce4 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_icvl_model.sh @@ -0,0 +1,25 @@ +cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" +cd $cur_dir +model_dir=${cur_dir}/train_cache/icvl_training_s2_f128_daug_um_v1/ +if ! [ -d $model_dir ]; then + mkdir -p $model_dir +fi + +cd $model_dir +url=https://polybox.ethz.ch/index.php/s/f9EWUGSpTeKmFDo/download +fname=icvl.tar.gz + +if [ -f $fname ]; then + echo "file already exists, no need to download again" +else + echo "downloading the pretrained model(62M)..." + wget $url + mv download $fname +fi + +echo "unzipping..." +tar xvzf $fname +mv icvl/*.* ./ +rmdir icvl/ + +echo "done." \ No newline at end of file diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_msra_model.sh b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_msra_model.sh new file mode 100644 index 0000000000000000000000000000000000000000..e1db07d0ebcde7e9bd8cb867dc3560ce42459eaa --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_msra_model.sh @@ -0,0 +1,31 @@ +cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" +cd $cur_dir + +cache_dir=${cur_dir}/msra_model +if ! [ -d $cache_dir ]; then + mkdir $cache_dir +fi +cd $cache_dir + +fname=msra.tar.gz +url=https://polybox.ethz.ch/index.php/s/B2W1ngyUAitsv2e/download +if [ -f $fname ]; then + echo "file already exists, no need to download again" +else + echo "downloading the pretrained model(566M)..." + wget $url + mv download $fname +fi +echo "unzipping..." +tar xvzf $fname + + +cd $cur_dir +for pid in {0..8}; do + tar_dir=${cur_dir}/train_cache/msra_P${pid}_training_s2_f128_daug_um_v1/ + src_dir=${cache_dir}/msra/P${pid}/ + mv $src_dir $tar_dir +done + +rmdir ${cache_dir}/msra +echo "done." \ No newline at end of file diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_nyu_model.sh b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_nyu_model.sh new file mode 100644 index 0000000000000000000000000000000000000000..9654cd227a627a9d7019d55e908da428a68553d2 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_nyu_model.sh @@ -0,0 +1,25 @@ +cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" +cd $cur_dir +model_dir=${cur_dir}/train_cache/nyu_training_s2_f128_daug_um_v1/ +if ! [ -d $model_dir ]; then + mkdir -p $model_dir +fi + +cd $model_dir +url=https://polybox.ethz.ch/index.php/s/Q4GS7bgRRM3zK5J/download +fname=nyu.tar.gz + +if [ -f $fname ]; then + echo "file already exists, no need to download again" +else + echo "downloading the pretrained model(61M)..." + wget $url + mv download $fname +fi + +echo "unzipping..." +tar xvzf $fname +mv nyu/*.* ./ +rmdir nyu/ + +echo "done." \ No newline at end of file diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/gpu_config.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/gpu_config.py new file mode 100644 index 0000000000000000000000000000000000000000..0e3d0280cfd0cc2e20c64a882803965182bc605f --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/gpu_config.py @@ -0,0 +1,41 @@ +from __future__ import print_function, division, absolute_import +import os +import commands + +# check the job id +gpu_lock_path = '/tmp/lock-gpu*/info.txt' +lock_str = commands.getstatusoutput('cat %s'%gpu_lock_path) +lock_str = lock_str[1] +lock_str = lock_str.split('\n') + + +# on gpu server, use the gpu for tensorflow +if 'SGE_GPU' in os.environ: + gpulist = [] + for line in lock_str: + if line.find('wanc') == -1: + continue + line = line.split(' ') + job_idx = int(line[7]) + gpu_idx = int(line[1]) + gpulist.append((gpu_idx, job_idx)) + gpulist = sorted(gpulist, key=lambda x:x[1]) + gpu_idx,job_idx = gpulist[-1] + + gpu_list = [gpu_idx] + os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(gpu) for gpu in gpu_list) + print('use GPU for tensorflow') +else: + os.environ['CUDA_VISIBLE_DEVICES'] = '' + gpu_list = [] + print('\x1b[0;31;47m use CPU for tensorflow \x1b[0m') + +num_gpus = len(gpu_list) +print('available gpu list, ', gpu_list) + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + +import tensorflow as tf +config = tf.ConfigProto() +config.allow_soft_placement = True +config.gpu_options.allow_growth = True diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/.keep b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/__init__.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/hourglass_um_crop_tiny.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/hourglass_um_crop_tiny.py new file mode 100644 index 0000000000000000000000000000000000000000..b9f6cc47d62850594eef9962512c5879c1f336f0 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/hourglass_um_crop_tiny.py @@ -0,0 +1,909 @@ +'''to simultaneously regress the 3D joint offset and the 2D joint heatmap +''' +from __future__ import print_function,absolute_import, division + +import time, os +import numpy as np +from datetime import datetime +import importlib + +import gpu_config +import tensorflow as tf +import data.util +from data.util import heatmap_from_xyz_op, CameraConfig, xyz2uvd_op, uvd2xyz_op +import data.util +import data.preprocess +import numpy as np, numpy.linalg as alg + +# from model_new.train_single_gpu import train +from model.train_single_gpu import train +from model.test_model import test +import network.slim as slim + +from data.preprocess import generate_xyzs_from_multi_cfgs, crop_from_xyz_pose, crop_from_bbx, center_of_mass, norm_xyz_pose, unnorm_xyz_pose + +from data.visualization import tf_heatmap_wrapper, tf_jointplot_wrapper, tf_smppt_wrapper +from data.evaluation import Evaluation + +# implementation setting +tf.app.flags.DEFINE_integer('num_gpus', 1, #gpu_config.num_gpus, + """how many gpu to be used""") +# use cpu instead if no gpu is available +tf.app.flags.DEFINE_integer('batch_size', 40, + '''batch size''') +tf.app.flags.DEFINE_integer('debug_level', 1, + '''the higher, the more saved to summary''') +tf.app.flags.DEFINE_integer('sub_batch', 5, + '''batch size''') +tf.app.flags.DEFINE_integer('pid', 0, + '''for msra person id''') +tf.app.flags.DEFINE_boolean('is_train', True, + '''True for traning, False for testing''') + +# the network architecture to be used +tf.app.flags.DEFINE_string('net_module', 'um_v1', + '''the module containing the network architecture''') +tf.app.flags.DEFINE_boolean('is_aug', True, + '''whether to augment data''') +tf.app.flags.DEFINE_string('dataset', 'nyu', + '''the dataset to conduct experiments''') +# epoch +tf.app.flags.DEFINE_integer('epoch', 80, + '''number of epoches''') + +# network specification +tf.app.flags.DEFINE_integer('num_stack', 2, + 'number of stacked hourglass') +tf.app.flags.DEFINE_integer('num_fea', 128, + 'number of feature maps in hourglass') +tf.app.flags.DEFINE_integer('kernel_size', 3, + 'kernel size for the residual module') + +FLAGS = tf.app.flags.FLAGS + +MAXIMUM_DEPTH = 600.0 + +class JointDetectionModel(object): + _moving_average_decay = 0.9999 + _batchnorm_moving_average_decay = 0.9997 + _init_lr = 0.001 + if FLAGS.dataset == 'nyu': + _num_epochs_per_decay = 10 + elif FLAGS.dataset == 'msra': + _num_epochs_per_decay = 20 + _lr_decay_factor = 0.1 + + _adam_beta1 = 0.5 + + # maximum allowed depth + _max_depth = 600.0 + + # input size: the input of the network + _input_height = 128 + _input_width = 128 + + # output size: the output size of network, as well as the largest size of hourglass model + _output_height = 32 + _output_width = 32 + + _gau_sigma = 3.0 + _gau_filter_size = 10 + + _base_dir = './exp/train_cache/' + + + def __init__(self, dataset, detect_net, epoch, net_desc='dummy', val_dataset=None): + ''' + args: + dataset: data.xxxdataset isinstance + detect_net: funtional input of the net + desc: string, the name of the corresponding cache folder + notice: + any tf operations on the graph cannot be defined here, + they can only be defined after the graph is initialized by the training module + ''' + self._dataset = dataset + self._jnt_num = int(dataset.jnt_num) + self._cfg = self._dataset.cfg + + self._num_batches_per_epoch = dataset.approximate_num / (FLAGS.batch_size*FLAGS.sub_batch) + self._net_desc = net_desc + self._net = detect_net + self._max_steps = int(epoch*self._num_batches_per_epoch) + + self._val_dataset = val_dataset + self._model_desc = '%s_%s_s%d_f%d'%(dataset.name, dataset.subset, FLAGS.num_stack, FLAGS.num_fea) + if FLAGS.is_aug: + self._model_desc += '_daug' + + if self._val_dataset: + assert self._jnt_num == self._val_dataset.jnt_num, ( + 'the validation dataset should be with the same number of joints to the traning dataset') + + if not os.path.exists(self._base_dir): + os.makedirs(self._base_dir) + + self._log_path = os.path.join(self._base_dir, self.name, 'validation_log.txt') + + '''data interface + 1. initialize the dataset + 2. the global setting of the batch_size + 3. total number of steps + ''' + def batch_input(self, dataset, batch_size=None): + if batch_size is None: + batch_size = FLAGS.batch_size + dm_batch, pose_batch, cfg_batch, com_batch = dataset.get_batch_op( + batch_size=batch_size, + num_readers = 2, + num_preprocess_threads = 2, + preprocess_op=dataset.preprocess_op(self._input_width, self._input_height)) + return [dm_batch, pose_batch, cfg_batch, com_batch] + + def batch_input_test(self, dataset): + dm_batch, pose_batch, cfg_batch, com_batch, name_batch = dataset.get_batch_op_test( + batch_size = FLAGS.batch_size, + preprocess_op = dataset.preprocess_op(self._input_width, self._input_height)) + return [dm_batch, pose_batch, cfg_batch, com_batch, name_batch] + + @property + def train_dataset(self): + return self._dataset + + @property + def val_dataset(self): + return self._val_dataset + + '''hyper parameters + ''' + @property + def init_lr(self): + '''the initial learning rate + ''' + return self._init_lr + @property + def lr_decay_factor(self): + '''the rate of exponential decay of learning rate + ''' + return self._lr_decay_factor + + @property + def decay_steps(self): + '''lr does not decay when global_step < decay_steps + ''' + return self._num_batches_per_epoch * self._num_epochs_per_decay + + @property + def moving_average_decay(self): + return self._moving_average_decay + + @property + def max_steps(self): + return self._max_steps + + '''training operation + ''' + def inference(self, normed_dms, cfgs, coms, reuse_variables, is_training=True): + with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): + # resize the image to fit the network input + # during training, inference is called by loss function, where dms are resized + end_points = self._net(normed_dms, cfgs, coms, self._jnt_num, is_training) + return end_points + + max_dist_2d = 4.0 # 4 pixels + max_dist_3d = 0.8 # 80mm 3d distance + def _hm_3d(self, oms): + '''generate 3D distance heatmap according to the offset map + Args: + oms: the normalized xyz offset maps, (b,h,w,3*j) + Returns: + hms: the 3D heatmap, (b,h,w,j) + ''' + om_list = tf.unstack(oms, axis=-1) + hm_list = [] + for j in range(self._jnt_num): + xx,yy,zz = om_list[j*3], om_list[j*3+1], om_list[j*3+2] + hm = tf.sqrt(xx**2+yy**2+zz**2) + hm = tf.divide(self.max_dist_3d-hm, self.max_dist_3d) + hm = tf.maximum(hm, tf.zeros_like(hm)) + hm_list.append(hm) + hms = tf.stack(hm_list, axis=-1) + return hms + + def _hm_2d(self, poses, cfgs, out_h, out_w): + '''synthesize the 2d heatmap + Args: + poses: unnormed xyz pose, (b,j*3) + cfgs: camera configuration, (b, 6) + out_h, out_w: output of heatmap size + Returns: + hm2: 2D heatmap, (b, out_h, out_w, j) + ''' + def fn(elems): + xyz_pose, cfg = elems[0], elems[1] + + w_ratio = cfg[4] / out_w + h_ratio = cfg[5] / out_h + new_cfg = CameraConfig(cfg[0]/w_ratio, cfg[1]/h_ratio, + cfg[2]/w_ratio, cfg[3]/h_ratio, + out_w, out_h) + + xx, yy = tf.meshgrid(tf.range(out_h), tf.range(out_w)) + xx, yy = tf.cast(xx, tf.float32), tf.cast(yy, tf.float32) + xx = tf.tile(tf.expand_dims(xx, axis=-1), [1, 1, self._jnt_num]) + yy = tf.tile(tf.expand_dims(yy, axis=-1), [1, 1, self._jnt_num]) + + uvd_pose = tf.reshape(data.util.xyz2uvd_op(xyz_pose, new_cfg), (-1,3)) + [uu,vv,dd] = tf.unstack(uvd_pose, axis=-1) + uu = tf.reshape(uu, (1,1,-1)) + vv = tf.reshape(vv, (1,1,-1)) + + hm = tf.maximum(self.max_dist_2d-tf.sqrt(tf.square(xx-uu)+tf.square(yy-vv)), + tf.zeros_like(xx))/self.max_dist_2d + return [hm, cfg] + + with tf.name_scope('pose_sync'): + hms, _ = tf.map_fn(fn, [poses, cfgs]) + return hms + + def _um(self, om, hm_3d): + '''get the unit offset vector map from offset maps + Args: + om: the offset map, (b,h,w,j*3) + hm_3d: the offset norm, (b,h,w,j) + Returns: + um: the unit offset map, (b,h,w,j*3) + ''' + om_list = tf.unstack(om, axis=-1) + + dm_3d = self.max_dist_3d - tf.multiply(hm_3d, self.max_dist_3d) + dm_list = tf.unstack(dm_3d, axis=-1) + + um_list = [] + + for j in range(self._jnt_num): + x,y,z = om_list[j*3], om_list[j*3+1], om_list[j*3+2] + d = dm_list[j] + + mask = tf.less(d, self.max_dist_3d-1e-2) + + x = tf.where(mask, tf.divide(x, d), tf.zeros_like(x)) + y = tf.where(mask, tf.divide(y, d), tf.zeros_like(y)) + z = tf.where(mask, tf.divide(z, d), tf.zeros_like(z)) + um_list += [x,y,z] + return tf.stack(um_list, axis=-1) + + def _resume_om(self, hm_3d, um): + '''resume the offset map from the 3d heatmap and unit offset vector + Args: + hm_3d: the 3D heatmap, (b,h,w,j) + um: the 3D unit offset vector, (b,h,w,j*3) + Returns: + om: the 3D offset vector, (b,h,w,j) + ''' + # um = tf.clip_by_value(um, -1.0, 1.0) + um_list = tf.unstack(um, axis=-1) + + dm_3d = self.max_dist_3d - tf.multiply(hm_3d, self.max_dist_3d) + dm_list = tf.unstack(dm_3d, axis=-1) + + om_list = [] + + for j in range(self._jnt_num): + x,y,z = um_list[j*3], um_list[j*3+1], um_list[j*3+2] + d = dm_list[j] + x = tf.multiply(x,d) + y = tf.multiply(y,d) + z = tf.multiply(z,d) + om_list += [x,y,z] + return tf.stack(om_list, axis=-1) + + def _vis_um_xy(self, ums): + '''visualize the xy plane angle of ums + ''' + um_list = tf.unstack(ums, axis=-1) + angle_list = [] + for j in range(self._jnt_num): + x,y,z = um_list[j*3], um_list[j*3+1], um_list[j*3+2] + d = tf.sqrt(x**2+y**2) + sin = tf.where(tf.less(d**2+z**2, 0.1), tf.ones_like(d), tf.sin(tf.divide(x,d))) + angle_list.append(sin) + return tf.stack(angle_list, axis=-1) + + def _vis_um_z(self, ums): + '''visuzlie the z plane angle of ums + ''' + um_list = tf.unstack(ums, axis=-1) + angle_list = [] + for j in range(self._jnt_num): + angle_list.append(um_list[j*3+2]) + return tf.stack(angle_list, axis=-1) + + # training + def loss(self, dms, poses, cfgs, coms): + ''' the losses for the training + Args: + dms: + poses: + reuse_variables: + Returns: + the total loss + ''' + if FLAGS.is_aug: + dms, poses = data.preprocess.data_aug(dms, poses, cfgs, coms) + + # generate ground truth + gt_hms = self._hm_2d(poses, cfgs, self._output_height, self._output_width) + + gt_normed_poses = norm_xyz_pose(poses, coms) + normed_dms = data.preprocess.norm_dm(dms, coms) + tiny_normed_dms = tf.image.resize_images(normed_dms, (self._output_height, self._output_width), 2) + xyzs = generate_xyzs_from_multi_cfgs(tiny_normed_dms, cfgs, coms) + xyzs = tf.tile(xyzs, [1,1,1,self._jnt_num]) + gt_oms = tf.reshape(gt_normed_poses, (-1,1,1,3*self._jnt_num)) - xyzs + + gt_hm3s = self._hm_3d(gt_oms) + gt_ums = self._um(gt_oms, gt_hm3s) + + # generate estimation + end_points = self.inference(normed_dms, cfgs, coms, reuse_variables=None, is_training=True) + + # heatmap loss + est_hm_list = end_points['hm_outs'] + hm_losses = [tf.nn.l2_loss(est_hms-gt_hms) for est_hms in est_hm_list] + + # 3D heatmap loss + est_hm3_list = end_points['hm3_outs'] + hm3_losses = [tf.nn.l2_loss(est_hm3-gt_hm3s) for est_hm3 in est_hm3_list] + + # offsetmap loss + # we only consider the nearby point offset maps + # in order to make the oms loss on the same scale w.r.t. hms loss + est_um_list = end_points['um_outs'] + um_losses = [tf.nn.l2_loss(est_ums-gt_ums) for est_ums in est_um_list] + + # add the weight decay loss + reg_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), 'reg_loss') + hm_loss = tf.add_n(hm_losses, 'hm_loss') + um_loss = tf.add_n(um_losses, 'um_loss') + hm3_loss = tf.add_n(hm3_losses, 'hm3_loss') + + total_loss = reg_loss+hm_loss+um_loss+hm3_loss + + tf.summary.scalar('tra/um_loss', um_loss) + tf.summary.scalar('tra/hm_loss', hm_loss) + tf.summary.scalar('tra/hm3_loss', hm3_loss) + + # to visualize the training error, + # only pick the first three for tensorboard visualization + est_hms = est_hm_list[-1][0:3,:,:,:] + est_ums = est_um_list[-1][0:3,:,:,:] + est_hm3s = est_hm3_list[-1][0:3,:,:,:] + tiny_normed_dms = tiny_normed_dms[0:3,:,:,:] + cfgs = cfgs[0:3,:] + coms = coms[0:3,:] + dms = dms[0:3,:,:,:] + est_oms = self._resume_om(est_hm3s, est_ums) + + # get point estimation + est_normed_poses = self._xyz_estimation(est_hms, est_oms, est_hm3s, tiny_normed_dms, cfgs, coms) + est_normed_poses = tf.reshape(est_normed_poses, + (est_normed_poses.get_shape()[0].value, -1)) + xyz_pts = unnorm_xyz_pose(est_normed_poses, coms) + + # 2d joint detection + def to_uvd_fn(elem): + xyz_pt, cfg = elem[0], elem[1] + return [data.util.xyz2uvd_op(xyz_pt, cfg), cfg] + uvd_pts, _ = tf.map_fn(to_uvd_fn, [xyz_pts, cfgs]) + resized_hms = tf.image.resize_images(est_hms, (self._input_height, self._input_width), 2) + hm_uvd_pts, _ = self._uvd_estimation_op(resized_hms, tf.ones_like(resized_hms)) + + # for visualization + gt_xy_angle = self._vis_um_xy(gt_ums) + gt_z_angle = self._vis_um_z(gt_ums) + est_xy_angle = self._vis_um_xy(est_ums) + est_z_angle = self._vis_um_z(est_ums) + + if FLAGS.debug_level > 0: + tf.summary.image('tra_dm/', dms) + tf.summary.image('tra_pts/', tf_jointplot_wrapper(tf.squeeze(dms,axis=-1), + tf.reshape(uvd_pts, (3,-1,3)))) + tf.summary.image('tra_pt_hm/', tf_jointplot_wrapper(tf.squeeze(dms, axis=-1), + tf.reshape(hm_uvd_pts, (3,-1,3)))) + if FLAGS.debug_level > 1: + tf.summary.image('tra_hm_est_0/', tf_heatmap_wrapper(est_hms[:,:,:,0])) + tf.summary.image('tra_hm_gt_0/', tf_heatmap_wrapper(gt_hms[:,:,:,0])) + tf.summary.image('tra_3d_hm_est_0/', tf_heatmap_wrapper(est_hm3s[:,:,:,0])) + tf.summary.image('tra_3d_hm_gt_0/', tf_heatmap_wrapper(gt_hm3s[:,:,:,0])) + tf.summary.image('tra_um_xy_gt_0', tf_heatmap_wrapper(gt_xy_angle[:,:,:,0])) + tf.summary.image('tra_um_z_gt_0', tf_heatmap_wrapper(gt_z_angle[:,:,:,0])) + tf.summary.image('tra_um_xy_est_0', tf_heatmap_wrapper(est_xy_angle[:,:,:,0])) + tf.summary.image('tra_um_z_est_0', tf_heatmap_wrapper(est_z_angle[:,:,:,0])) + + if FLAGS.debug_level > 2: + tf.summary.image('tra_hm_gt_1/', tf_heatmap_wrapper(gt_hms[:,:,:,5])) + tf.summary.image('tra_hm_est_1/', tf_heatmap_wrapper(est_hms[:,:,:,5])) + tf.summary.image('tra_3d_hm_est_1/', tf_heatmap_wrapper(est_hm3s[:,:,:,5])) + tf.summary.image('tra_3d_hm_gt_1/', tf_heatmap_wrapper(gt_hm3s[:,:,:,5])) + tf.summary.image('tra_um_xy_est_1', tf_heatmap_wrapper(est_xy_angle[:,:,:,5])) + tf.summary.image('tra_um_z_est_1', tf_heatmap_wrapper(est_z_angle[:,:,:,5])) + tf.summary.image('tra_um_xy_gt_1', tf_heatmap_wrapper(gt_xy_angle[:,:,:,5])) + tf.summary.image('tra_um_z_gt_1', tf_heatmap_wrapper(gt_z_angle[:,:,:,5])) + + return total_loss + + def opt(self, lr): + '''return the optimizer of the model + ''' + return tf.train.AdamOptimizer(lr, beta1=self._adam_beta1) + + # validation and test + def test(self, dms, poses, cfgs, coms, reuse_variables=True): + '''the validation step to show the result from the validation set + + ''' + batch_size = dms.get_shape()[0].value + # 1st phase, gpu computation + normed_dms = data.preprocess.norm_dm(dms, coms) + end_points = self.inference(normed_dms, cfgs, coms, reuse_variables=reuse_variables, is_training=False) + + est_hms = end_points['hm_outs'][-1] + + tiny_normed_dms = tf.image.resize_images(normed_dms, (self._output_height, self._output_width), 2) + est_ums = end_points['um_outs'][-1] + est_hm3s = end_points['hm3_outs'][-1] + + est_oms = self._resume_om(est_hm3s, est_ums) + + est_normed_poses = self._xyz_estimation(est_hms, est_oms, est_hm3s, tiny_normed_dms, cfgs, coms) + est_normed_poses = tf.reshape(est_normed_poses, + (est_normed_poses.get_shape()[0].value, -1)) + xyz_pts = unnorm_xyz_pose(est_normed_poses, coms) + + def to_uvd_fn(elem): + xyz_pt, cfg = elem[0], elem[1] + return [data.util.xyz2uvd_op(xyz_pt, CameraConfig(*tf.unstack(cfg,axis=0))), cfg] + uvd_pts, _ = tf.map_fn(to_uvd_fn, [xyz_pts, cfgs]) + gt_uvd_pts, _ = tf.map_fn(to_uvd_fn, [poses, cfgs]) + + resized_hms = tf.image.resize_images(est_hms, (self._input_height, self._input_width)) + hm_uvd_pts, _ = self._uvd_estimation_op(resized_hms, tf.ones_like(resized_hms)) + + # for gt visulization + gt_normed_poses = norm_xyz_pose(poses, coms) + gt_hms = self._hm_2d(poses, cfgs, self._output_height, self._output_width) + xyzs = generate_xyzs_from_multi_cfgs(tiny_normed_dms, cfgs, coms) + xyzs = tf.tile(xyzs, [1,1,1,self._jnt_num]) + gt_oms = tf.reshape(gt_normed_poses, (-1,1,1,3*self._jnt_num)) - xyzs + gt_hm3s = self._hm_3d(gt_oms) + gt_ums = self._um(gt_oms, gt_hm3s) + gt_xy_angle = self._vis_um_xy(gt_ums) + gt_z_angle = self._vis_um_z(est_ums) + + # add summayries + est_xy_angle = self._vis_um_xy(est_ums) + est_z_angle = self._vis_um_z(est_ums) + if FLAGS.debug_level > 0: + tf.summary.image('val_pts/', tf_jointplot_wrapper(tf.squeeze(dms,axis=-1), + tf.reshape(uvd_pts, (batch_size,-1,3))), + collections=['val_summaries']) + tf.summary.image('gt_pts/', tf_jointplot_wrapper(tf.squeeze(dms,axis=-1), + tf.reshape(gt_uvd_pts, (batch_size,-1,3))), + collections=['val_summaries']) + if FLAGS.debug_level > 1: + tf.summary.image('gt_hm/', tf_heatmap_wrapper(gt_hms[:,:,:,0]), + collections=['val_summaries']) + tf.summary.image('gt_hm3', tf_heatmap_wrapper(gt_hm3s[:,:,:,0]), + collections=['val_summaries']) + tf.summary.image('val_hm/', tf_heatmap_wrapper(est_hms[:,:,:,0]), + collections=['val_summaries']) + tf.summary.image('val_hm3', tf_heatmap_wrapper(est_hm3s[:,:,:,0]), + collections=['val_summaries']) + tf.summary.image('val_dm/', dms, collections=['val_summaries']) + tf.summary.image('val_pts_hm/', tf_jointplot_wrapper(tf.squeeze(dms,axis=-1), + tf.reshape(hm_uvd_pts, (batch_size,-1,3))), + collections=['val_summaries']) + + if FLAGS.debug_level > 2: + tf.summary.image('gt_xy', tf_heatmap_wrapper(gt_xy_angle[:,:,:,0]), + collections=['val_summaries']) + tf.summary.image('gt_z', tf_heatmap_wrapper(gt_z_angle[:,:,:,0]), + collections=['val_summaries']) + tf.summary.image('val_xy', tf_heatmap_wrapper(est_xy_angle[:,:,:,0]), + collections=['val_summaries']) + tf.summary.image('val_z', tf_heatmap_wrapper(est_z_angle[:,:,:,0]), + collections=['val_summaries']) + + + self.val_summary_op = tf.summary.merge_all(key='val_summaries') + + # interface to fetch output + self.uvd_pts = uvd_pts + self.xyz_pts = xyz_pts + self.val_dms = dms + self.est_hms = est_hms + self.gt_pose = poses + print('testing graph is established') + + @property + def is_validate(self): + return True if self._val_dataset else False + + @property + def name(self): + return '%s_%s'%(self._model_desc, self._net_desc) + + @property + def train_dir(self): + return os.path.join(self._base_dir, self.name) + + @property + def summary_dir(self): + return os.path.join(self.train_dir, 'summary') + + def _mean_shift(self, can_pts, num_it=10, band_width=0.8): + '''mean shift over the candidate point + Args: + can_pts: candidate points, (b,j,n,3) + num_it: number of iterations + band_width: bandwidth of the kernel + Returns: + centers: the density maximal points + ''' + def joint_fn(can_pt): + '''iteration over joint + Args: + can_pt: (n,3) + Returns: + center: (3) + ''' + # initialization + num_quan = 2.0 + quan_pt = tf.clip_by_value((can_pt+1.0)*num_quan, 0, 2*num_quan-0.1) + quan_pt = tf.to_int64(quan_pt) + + quan_hm = tf.scatter_nd(quan_pt, tf.ones(num_pt,), + (int(2*num_quan),int(2*num_quan),int(2*num_quan))) + curr_pt = tf.where(tf.equal(quan_hm, tf.reduce_max(quan_hm)))[-1] + curr_pt = tf.divide(tf.to_float(curr_pt), num_quan) - 1.0 + curr_pt += 0.5/num_quan + + # iteration + for _ in range(num_it): + s = tf.reduce_sum((can_pt - curr_pt)**2, axis=-1) + s = tf.expand_dims(tf.exp(inverse_sigma*s), axis=-1) + curr_pt = tf.reduce_sum(tf.multiply(can_pt, s), axis=0) + curr_pt = tf.divide(curr_pt, tf.reduce_sum(s)) + curr_pt = tf.reshape(curr_pt, (1,3)) + + curr_pt = tf.reshape(curr_pt, (3,)) + return curr_pt + + def batch_fn(can_pt): + '''iteration over batch + Args: + can_pt: (j,n,3) + Returns: + centers: (j,3) + ''' + return tf.map_fn(joint_fn ,can_pt) + + num_jnt = can_pts.get_shape()[1].value + num_pt = can_pts.get_shape()[2].value + inverse_sigma = -1.0 / (2*band_width*band_width) + + return tf.map_fn(batch_fn, can_pts) + + def _generate_candidates(self, hms, xyzs, num_pt): + '''generate the candidates to do mean shift, from xyzs + Args: + hms: estimated heatmaps, (b,h,w,j) + xyzs: the xyz points, (b,h,w,j*3) + num_pt: the number of candidates + Returns: + can_pts: candidate points, (b,j,n,3) + ''' + def fn(elems): + hm, xyz = elems[0], elems[1] + hm = tf.reshape(hm, (-1, jnt_num)) + xyz = tf.reshape(xyz, (-1, 3*jnt_num)) + + hm_list = tf.unstack(hm, axis=-1) + xyz_list = tf.unstack(xyz, axis=-1) + can_list = [] + + for j in range(jnt_num): + weights, indices = tf.nn.top_k(hm_list[j], k=num_pt, sorted=True) + xx = tf.gather(xyz_list[j*3], indices) + yy = tf.gather(xyz_list[j*3+1], indices) + zz = tf.gather(xyz_list[j*3+2], indices) + can_list.append(tf.stack([xx,yy,zz], axis=1)) + can_pts = tf.stack(can_list, axis=0) + return [can_pts, hms] + + jnt_num = hms.get_shape()[-1].value + can_pts, _ = tf.map_fn(fn, [hms, xyzs]) + return can_pts + + def _get_candidate_weights(self, xyz_pts, coms, cfgs, hms, dms): + '''the weights measures how xyz_pts fits the 2d hms estimation and dms observation + Args: + xyz_pts: the candidate points, (b,j,n,3) + coms: centers of mass, (b,3) + cfgs: camera configurations, (b,6) + hms: estimated 2D heatmap, (b,h,w,j) + dms: depth map, (b,h,w,1) + Returns: + weights: the weights of the corresponding points, (b,j,n,1) + ''' + def fn(elems): + xyz_pt, com, cfg, hm, dm = elems[0], elems[1], elems[2], elems[3], elems[4] + + xx,yy,zz = tf.unstack(tf.reshape(xyz_pt,(-1,3)), axis=-1) + xyz_pt = tf.reshape(xyz_pt, (-1,)) + + xyz_pt = tf.multiply(xyz_pt, data.preprocess.POSE_NORM_RATIO) + tf.tile(com,[jnt_num*pnt_num]) + xyz_pt = tf.reshape(xyz_pt, (-1,3)) + + w_ratio = cfg[4] / out_w + h_ratio = cfg[5] / out_h + new_cfg = CameraConfig(cfg[0]/w_ratio, cfg[1]/h_ratio, + cfg[2]/w_ratio, cfg[3]/h_ratio, + out_w, out_h) + uvd_pt = xyz2uvd_op(xyz_pt, new_cfg) + uvd_pt = tf.reshape(uvd_pt, (-1, 3)) + uu, vv, dd = tf.unstack(uvd_pt, axis=-1) + uu = tf.to_int32(uu+0.5) + vv = tf.to_int32(vv+0.5) + jj = tf.tile(tf.expand_dims(tf.range(jnt_num),axis=-1), [1,pnt_num]) + jj = tf.reshape(jj, (-1,)) + + indices = tf.stack([vv,uu,jj], axis=-1) + weights = tf.gather_nd(hm, indices) + weights = tf.reshape(weights, (jnt_num, pnt_num, 1)) + + #we also clip the values of depth + dm = tf.squeeze(dm) + dm = tf.divide(dm*data.preprocess.D_RANGE - data.preprocess.D_RANGE*0.5, + data.preprocess.POSE_NORM_RATIO) + indices = tf.stack([vv,uu], axis=-1) + od = tf.gather_nd(dm, indices) + zz = tf.maximum(zz, od) + xyz_pt = tf.stack([xx,yy,zz], axis=-1) + xyz_pt = tf.reshape(xyz_pt, (jnt_num, pnt_num, 3)) + + return [weights, xyz_pt, cfg, hm, dm] + + out_h, out_w = self._output_height, self._output_width + jnt_num = xyz_pts.get_shape()[1].value + pnt_num = xyz_pts.get_shape()[2].value + weights, xyz_pts, _, _, _ = tf.map_fn(fn, [xyz_pts, coms, cfgs, hms, dms]) + return weights, xyz_pts + + def _weighted_mean_shift(self, can_pts, weights, num_it, band_width): + '''mean shift over the candidate point + Args: + can_pts: candidate points, (b,j,n,3) + weights: weights of candidate points, (b,j,n,1) + num_it: number of iterations + band_width: bandwidth of the kernel + Returns: + centers: the density maximal points + ''' + def joint_fn(elems): + '''iteration over joint + Args: + can_pt: (n,3), elems[0] + weight: (n,1), elems[1] + Returns: + center: (3) + ''' + can_pt, weight = elems[0], elems[1] + # initialization + num_quan = 2.0 + quan_pt = tf.clip_by_value((can_pt+1.0)*num_quan, 0, 2*num_quan-0.1) + quan_pt = tf.to_int64(quan_pt) + + quan_hm = tf.scatter_nd(quan_pt, tf.squeeze(weight), + (int(2*num_quan),int(2*num_quan),int(2*num_quan))) + curr_pt = tf.where(tf.equal(quan_hm, tf.reduce_max(quan_hm)))[-1] + curr_pt = tf.divide(tf.to_float(curr_pt), num_quan) - 1.0 + curr_pt += 0.5/num_quan + + # iteration + for _ in range(num_it): + s = tf.reduce_sum((can_pt - curr_pt)**2, axis=-1) + s = tf.expand_dims(tf.exp(inverse_sigma*s), axis=-1) + s = tf.multiply(s, weight) + curr_pt = tf.reduce_sum(tf.multiply(can_pt, s), axis=0) + curr_pt = tf.divide(curr_pt, tf.reduce_sum(s)) + curr_pt = tf.reshape(curr_pt, (1,3)) + + curr_pt = tf.reshape(curr_pt, (3,)) + return [curr_pt, can_pt] + + def batch_fn(elems): + '''iteration over batch + Args: + can_pt: (j,n,3), elems[0] + weights: (j,n,1), elems[1] + Returns: + centers: (j,3) + ''' + return tf.map_fn(joint_fn ,elems) + + num_jnt = can_pts.get_shape()[1].value + num_pt = can_pts.get_shape()[2].value + inverse_sigma = -1.0 / (2*band_width*band_width) + + centers, _ = tf.map_fn(batch_fn, [can_pts, weights]) + return centers + + def _xyz_estimation(self, hms, oms, hm3s, dms, cfgs, coms): + '''use meanshift to get the final estimation + Args: + hms: the heatmap returned from 2D joint detection, (b,h,w,j) + oms: the 3D offset maps, (b,h,w,3*j) + hm3s: the 3D heaetmap, (b,h,w,j) + dms: the normalized depth map, (b,h,w,1) + cfgs: camera configurations, (b,6) + Returns: + xyz_pts: the estimated 3d joint, (b,3*j) + ''' + # get dense joint estimation + jnt_num = hms.get_shape()[-1].value + xyzs = generate_xyzs_from_multi_cfgs(dms, cfgs, coms) + xyzs = tf.tile(xyzs, [1,1,1,self._jnt_num]) + orig_xyzs= xyzs + + xyzs = xyzs + oms + + # get the weight map for candidate selection + # refined_hms = tf.multiply(hms, hm3s) + refined_hms = tf.multiply(hms+1.0, hm3s) + # refined_hms = hm3s + # refined_hms = hms + dms_mask = tf.where(tf.less(dms, -0.99), tf.zeros_like(dms), tf.ones_like(dms)) + refined_hms = tf.multiply(refined_hms, dms_mask) + + num_pt = 5 + can_pts = self._generate_candidates(refined_hms, xyzs, num_pt=num_pt) + + # weighted scheme + can_weights, _ = self._get_candidate_weights(can_pts, coms, cfgs, hms, dms) + xyz_pts = self._weighted_mean_shift(can_pts, can_weights, num_it=10, band_width=0.4) + + # unweighted scheme + # xyz_pts = self._mean_shift(can_pts, num_it=10, band_width=0.4) + + # for visualization + ori_pts = self._generate_candidates(refined_hms, orig_xyzs, num_pt=num_pt) + + self.can_pts = can_pts + self.ori_pts = ori_pts + return xyz_pts + + + def _uvd_estimation_op(self, hms, ds): + ''' find the argmax from heatmaps and corresponding depth maps, and get the final estimation + Args: + hms: the heatmap with the same size as the initial captured image by camera + ds: the depth value of the coresponding points + Returns: + the uvd points of the joint + ''' + width = hms.shape[2] + + def fn(elems): + hough_hm, hough_dm = elems[0], elems[1] + uvd_pts = [] + + hough_hm_list = tf.unstack(hough_hm, axis=-1) + hough_dm_list = tf.unstack(hough_dm, axis=-1) + for j in range(self._jnt_num): + hh = hough_hm_list[j] + hd = hough_dm_list[j] + + idx = tf.where(tf.equal(hh, tf.reduce_max(hh))) + dd = tf.gather_nd(hd, idx) + + uu, vv, dd = tf.cast(idx[0][1],tf.float32), tf.cast(idx[0][0], tf.float32), dd[0] + uvd_pts.append(tf.stack([uu,vv,dd])) + return [tf.concat(uvd_pts, axis=-1), ds] + return tf.map_fn(fn, [hms, ds]) + + def do_test(self, sess, summary_writer, step, names=None): + '''execute computation of the inference + a fast version of inference + ''' + # during training + if names is None: + f = open(self._log_path, 'a') + summary_str, gt_vals, xyz_vals = sess.run( + [self.val_summary_op, self.gt_pose, self.xyz_pts]) + summary_writer.add_summary(summary_str, step) + + maxJntError=[] + f.write('[%s] step %d\n'%(datetime.now(), step)) + for xyz_val, gt_val in zip(xyz_vals, gt_vals): + maxJntError.append(Evaluation.maxJntError(xyz_val, gt_val)) + diff = (xyz_val-gt_val).reshape(-1,3) + dist = alg.norm(diff, axis=1).reshape(-1,1) + error_mat = np.concatenate((diff, dist), axis=1) + print(error_mat) + f.write(np.array_str(error_mat)+'\n') + print('validate error:', maxJntError) + f.write('validation error: {}\n'.format(maxJntError)) + f.flush() + f.close() + return + + if step%100 == 0: + summary_str, xyz_vals, gt_vals, names = sess.run( + [self.val_summary_op, self.xyz_pts, self.gt_pose, names]) + summary_writer.add_summary(summary_str, step) + + maxJntError=[] + for xyz_val, gt_val in zip(xyz_vals, gt_vals): + maxJntError.append(Evaluation.maxJntError(xyz_val, gt_val)) + diff = (xyz_val-gt_val).reshape(-1,3) + dist = alg.norm(diff, axis=1).reshape(-1,1) + print(np.concatenate((diff, dist), axis=1)) + print('[step: %d]test error:'%step, maxJntError) + print('---\n') + return gt_vals, xyz_vals, names + + gt_vals, xyz_vals, names = sess.run([self.gt_pose, self.xyz_pts, names]) + return gt_vals, xyz_vals, names + +'''unit test +''' +def run_train(dataset, val_dataset, restore_step=None): + net_module_name = 'network.'+FLAGS.net_module + + net_module = importlib.import_module(net_module_name, package=None) + net = net_module.detect_net + net_name = net_module.TOWER_NAME + + model = JointDetectionModel(dataset, net, epoch=FLAGS.epoch, net_desc=net_name, + val_dataset = val_dataset) + train(model, restore_step) + +def run_test(train_dataset, test_dataset, selected_step=None): + net_module_name = 'network.'+FLAGS.net_module + + net_module = importlib.import_module(net_module_name, package=None) + net = net_module.detect_net + net_name = net_module.TOWER_NAME + + model = JointDetectionModel(train_dataset, net, epoch=FLAGS.epoch, net_desc=net_name, + val_dataset = test_dataset) + + test(model, selected_step) + +if __name__ == '__main__': + if FLAGS.dataset == 'bighand': + import data.bigHand + dataset = data.bigHand.BigHandDataset('training') + val_dataset = data.bigHand.BigHandDataset('testing') + + elif FLAGS.dataset == 'nyu': + import data.nyu + dataset = data.nyu.NyuDataset('training') + val_dataset = data.nyu.NyuDataset('testing') + + elif FLAGS.dataset == 'icvl': + import data.icvl + dataset = data.icvl.IcvlDataset('training') + val_dataset = data.icvl.IcvlDataset('testing') + + elif FLAGS.dataset == 'msra': + import data.msra + dataset = data.msra.MsraDataset('training', FLAGS.pid) + val_dataset = data.msra.MsraDataset('testing', FLAGS.pid) + + if FLAGS.is_train: + run_train(dataset, val_dataset) + else: + run_test(dataset, val_dataset, -1) diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/test_model.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/test_model.py new file mode 100644 index 0000000000000000000000000000000000000000..a5f06073e93b67a561feee819475157b5eab28cd --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/test_model.py @@ -0,0 +1,94 @@ +from __future__ import print_function, absolute_import, division + +import gpu_config +import tensorflow as tf +import network.slim as slim +import numpy as np +import time, os +import cv2 +from datetime import datetime +from data.evaluation import Evaluation + +FLAGS = tf.app.flags.FLAGS + +def test(model, selected_step): + with tf.Graph().as_default(): + total_test_num = model.val_dataset.exact_num + + dms, poses, cfgs, coms, names = model.batch_input_test(model.val_dataset) + model.test(dms, poses, cfgs, coms, reuse_variables=None) + + # dms, poses, names = model.batch_input_test(model.val_dataset) + # model.test(dms, poses, reuse_variables=None) + + sess = tf.Session(config=tf.ConfigProto( + allow_soft_placement=True, + log_device_placement=False)) + + init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) + sess.run(init_op) + + if selected_step is not None: + checkpoint_path = os.path.join(model.train_dir, 'model.ckpt-%d'%selected_step) + saver = tf.train.Saver(tf.global_variables()) + saver.restore(sess, checkpoint_path) + print('[test_model]model has been resotored from %s'%checkpoint_path) + + tf.train.start_queue_runners(sess=sess) + summary_writer = tf.summary.FileWriter( + model.summary_dir+'_'+model.val_dataset.subset, + graph=sess.graph) + + res_path = os.path.join(model.train_dir, '%s-%s-result'%(model.val_dataset.subset, datetime.now())) + res_path = res_path.replace(' ', '_') + + res_txt_path = res_path+'.txt' + if os.path.exists(res_txt_path): + os.remove(res_txt_path) + err_path = res_path+'_error.txt' + f = open(res_txt_path, 'w') + + # res_vid_path = res_path+'.avi' + # codec = cv2.cv.CV_FOURCC('X','V','I','D') + # the output size is defined by the visualization tool of matplotlib + # vid = cv2.VideoWriter(res_vid_path, codec, 25, (640, 480)) + + print('[test_model]begin test') + test_num = 0 + step = 0 + maxJntError = [] + while True: + start_time = time.time() + try: + gt_vals, xyz_vals, name_vals = model.do_test(sess, summary_writer, step, names) + except tf.errors.OutOfRangeError: + print('run out of range') + break + + duration = time.time()-start_time + + for xyz_val, gt_val, name_val in zip(xyz_vals, gt_vals, name_vals): + maxJntError.append(Evaluation.maxJntError(xyz_val, gt_val)) + + xyz_val = xyz_val.tolist() + res_str = '%s\t%s\n'%(name_val, '\t'.join(format(pt, '.4f') for pt in xyz_val)) + res_str = res_str.replace('/', '\\') + f.write(res_str) + # vid.write(vis_val) + test_num += 1 + if test_num >= total_test_num: + print('finish test') + f.close() + Evaluation.plotError(maxJntError, err_path) + return + f.flush() + + if step%101 == 0: + print('[%s]: %d/%d computed, with %.2fs'%(datetime.now(), step, model.max_steps, duration)) + + step += 1 + + + print('finish test') + f.close() + Evaluation.plotError(maxJntError, 'result.txt') diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_multi_gpu.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_multi_gpu.py new file mode 100644 index 0000000000000000000000000000000000000000..45866a8e3a72011b1001bb6b696bcc45bd5ff765 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_multi_gpu.py @@ -0,0 +1,159 @@ +'''provide a multi-thread training scheme +WARNING: this file is still under development, is not guaranteed to work. +''' +from __future__ import print_function, absolute_import, division + +import gpu_config +import tensorflow as tf +import network.slim as slim +import numpy as np +import time, os +from datetime import datetime +import model.memory_util as memory_util + +FLAGS = tf.app.flags.FLAGS + +def _average_gradients(tower_grads): + '''calcualte the average gradient for each shared variable across all towers on multi gpus + Args: + tower_grads: list of lists of (gradient, variable) tuples. len(tower_grads)=#tower, len(tower_grads[0])=#vars + Returns: + List of paris (gradient, variable) where the gradients has been averaged across + all towers + ''' + average_grads = [] + for grad_and_vars in zip(*tower_grads): + # over different variables + grads = [] + for g, _ in grad_and_vars: + # over different towers + expanded_g = tf.expand_dims(g,0) + grads.append(expanded_g) + + grad = tf.concat(axis=0, values=grads) + grad = tf.reduce_mean(grad, 0) + + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads + +def train(model): + '''train the provided model + model: provide several required interface to train + ''' + with tf.Graph().as_default(), tf.device('/cpu:0'): + global_step = tf.get_variable( + 'global_step', [], + initializer=tf.constant_initializer(0), trainable=False) + lr = tf.train.exponential_decay(model.init_lr, + global_step, + model.decay_steps, + model.lr_decay_factor, + staircase=True) + opt = model.opt(lr) + + '''split the batch into num_gpus groups, + do the backpropagation on each gpu seperately, + then average the gradidents on each of which and update + ''' + assert FLAGS.batch_size % FLAGS.num_gpus == 0, ( + 'the batch_size should be divisible wrt num_gpus') + dms, poses = model.batch_input + dm_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=dms) + pose_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=poses) + + # calculate the gradients for each gpu + tower_grads = [] + reuse_variables = None + + for i in range(FLAGS.num_gpus): + # i = 1 + # with tf.device('/gpu:%d'%gpu_config.gpu_list[i]): + with tf.device('gpu:%d'%i): + with tf.name_scope('%s_%d'%(model.name, i)) as scope: + with slim.arg_scope([slim.variables.variable], device='/cpu:0'): + # with slim.arg_scope([slim.variables.variable], device='/gpu:%d'%gpu_config.gpu_list[i]): + loss = model.loss(dm_splits[i], pose_splits[i], reuse_variables) + + # tf.get_variable_scope().reuse_variables() + # reuse variables after the first tower + reuse_variables = True + # only retain the summaries for the last tower + summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) + # retain the batch-norm optimization only from the last tower + batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION, + scope) + + grads = opt.compute_gradients(loss) + tower_grads.append(grads) + print('setup %dth gpu on %d'%(i, gpu_config.gpu_list[i])) + + grads = _average_gradients(tower_grads) + + # TODO: add input summaries + # summaries.extend(input_summaries) + + summaries.append(tf.summary.scalar('learning_rate', lr)) + + for grad, var in grads: + if grad is not None: + summaries.append(tf.summary.histogram(var.op.name+'/gradients', grad)) + + apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) + + for var in tf.trainable_variables(): + summaries.append(tf.summary.histogram(var.op.name, var)) + + variable_averages = tf.train.ExponentialMovingAverage( + model.moving_average_decay, global_step) + variables_to_average = (tf.trainable_variables()+ + tf.moving_average_variables()) + variable_averages_op = variable_averages.apply(variables_to_average) + + batchnorm_update_op = tf.group(*batchnorm_updates) + # group all training operations into one + train_op = tf.group(apply_gradient_op, variable_averages_op, batchnorm_update_op) + + saver = tf.train.Saver(tf.global_variables()) + summary_op = tf.summary.merge(summaries) + init_op = tf.global_variables_initializer() + + memory_util.vlog(1) + + sess = tf.Session(config=tf.ConfigProto( + allow_soft_placement=True, + log_device_placement=False)) + + sess.run(init_op) + tf.train.start_queue_runners(sess=sess) + + summary_writer = tf.summary.FileWriter( + model.train_dir, + graph=sess.graph) + + # finally into the training loop + print('finally into the long long training loop') + + # for step in range(model.max_steps): + for step in range(1000): + start_time = time.time() + _, loss_value = sess.run([train_op, loss]) + duration = time.time() - start_time + + assert not np.isnan(loss_value), 'Model diverged with loss = NaN' + + if step%10 == 0: + format_str = '[model/train_multi_gpu] %s: step %d, loss = %.2f, %.3f sec/batch, %.3f sec/sample' + print(format_str %(datetime.now(), step, loss_value, duration, duration/FLAGS.batch_size)) + + if step%100 == 0: + summary_str = sess.run(summary_op) + summary_writer.add_summary(summary_str, step) + + if step%1000 == 0 or (step+1) == model.max_steps: + checkpoint_path = os.path.join(model.train_dir, 'model.ckpt') + saver.save(sess, checkpoint_path, global_step=step) + + print('finish train') + diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_single_gpu.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_single_gpu.py new file mode 100644 index 0000000000000000000000000000000000000000..1e72e3f4c260e5f1e9c1d1b7dba877867c057828 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_single_gpu.py @@ -0,0 +1,179 @@ +from __future__ import print_function, absolute_import, division + +import gpu_config +import tensorflow as tf +import network.slim as slim +import numpy as np +import time, os +from datetime import datetime + +FLAGS = tf.app.flags.FLAGS + +def _average_gradients(tower_grads): + '''calcualte the average gradient for each shared variable across all towers on multi gpus + Args: + tower_grads: list of lists of (gradient, variable) tuples. len(tower_grads)=#tower, len(tower_grads[0])=#vars + Returns: + List of paris (gradient, variable) where the gradients has been averaged across + all towers + ''' + average_grads = [] + for grad_and_vars in zip(*tower_grads): + # over different variables + grads = [] + for g, _ in grad_and_vars: + # over different towers + expanded_g = tf.expand_dims(g,0) + grads.append(expanded_g) + + grad = tf.concat(axis=0, values=grads) + grad = tf.reduce_mean(grad, 0) + + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads + +def train(model, restore_step=None): + '''train the provided model + model: provide several required interface to train + ''' + with tf.Graph().as_default(): + global_step = tf.get_variable( + 'global_step', [], + initializer=tf.constant_initializer(0), trainable=False) + lr = tf.train.exponential_decay(model.init_lr, + global_step, + model.decay_steps, + model.lr_decay_factor, + staircase=True) + + print('[train] learning rate decays per %d steps with rate=%f'%( + model.decay_steps,model.lr_decay_factor)) + print('[train] initial learning_rate = %f'%model.init_lr) + tf.summary.scalar('learning_rate', lr) + opt = model.opt(lr) + + batches = model.batch_input(model.train_dataset) + + loss = model.loss(*batches) + tf.summary.scalar('loss', loss) + + if model.is_validate: + # set batch_size as 3 since tensorboard visualization + val_batches = model.batch_input(model.val_dataset, 3) + model.test(*val_batches) # don't need the name + + batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION) + + accu_steps = float(FLAGS.sub_batch) + + grads = opt.compute_gradients(loss) + accum_grads = [] + for grad, var in grads: + if grad is not None: + accum_grads.append(tf.Variable(tf.zeros_like(grad), trainable=False, + collections=[tf.GraphKeys.LOCAL_VARIABLES], + name=var.op.name+'_accu_grad')) + else: + accum_grads.append(tf.Variable(tf.zeros_like(var), trainable=False, + collections=[tf.GraphKeys.LOCAL_VARIABLES], + name=var.op.name+'_accu_grad')) + + reset_op = [grad.assign(tf.zeros_like(grad)) for grad in accum_grads] + accum_op = [accum_grads[i].assign_add(grad[0]) for i, grad in enumerate(grads)if grad[0] is not None] + + ave_grad = [(tf.clip_by_value(tf.divide(accum_grads[i], accu_steps), -0.2, 0.2), + grad[1]) for i, grad in enumerate(grads)] + apply_gradient_op = opt.apply_gradients(ave_grad, + global_step=global_step) + + for ave_grad, grad_and_var in zip(ave_grad, grads): + grad, var = grad_and_var[0], grad_and_var[1] + if grad is not None: + tf.summary.histogram(var.op.name, var) + tf.summary.histogram(var.op.name+'/gradients', ave_grad) + + # variable_averages = tf.train.ExponentialMovingAverage( + # model.moving_average_decay, global_step) + # variables_to_average = tf.trainable_variables() + # var_1, var_2 = tf.moving_average_variables()[0], tf.moving_average_variables()[1] + # variable_averages_op = variable_averages.apply(variables_to_average) + + batchnorm_update_op = tf.group(*batchnorm_updates) + # group all training operations into one + # train_op = tf.group(apply_gradient_op, variable_averages_op) + train_op = tf.group(apply_gradient_op) + + saver = tf.train.Saver(tf.global_variables()) + summary_op = tf.summary.merge_all() + + init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) + + sess = tf.Session(config=tf.ConfigProto( + allow_soft_placement=True, + log_device_placement=False)) + + sess.run(init_op) + start_step = 0 + # to resume the training + if restore_step is not None and restore_step>0: + checkpoint_path = os.path.join(model.train_dir, 'model.ckpt-%d'%restore_step) + saver.restore(sess, checkpoint_path) + start_step = restore_step + + tf.train.start_queue_runners(sess=sess) + + #TODO: change to tf.train.SummaryWriter() + summary_writer = tf.summary.FileWriter( + model.summary_dir, + graph=sess.graph) + + # finally into the training loop + print('finally into the long long training loop') + + log_path = os.path.join(model.train_dir, 'training_log.txt') + f = open(log_path, 'a') + + for step in range(start_step, model.max_steps): + if f.closed: + f = open(log_path, 'a') + + start_time = time.time() + ave_loss = 0 + sess.run(reset_op) + for sub_step in range(int(accu_steps)): + _, _, loss_value = sess.run([accum_op, batchnorm_update_op, loss]) + assert not np.isnan(loss_value), 'Model diverged with loss = NaN' + ave_loss += loss_value + + _ = sess.run([train_op]) + ave_loss /= accu_steps + duration = time.time() - start_time + + if step%5 == 0: + format_str = '[model/train_multi_gpu] %s: step %d/%d, loss = %.3f, %.3f sec/batch, %.3f sec/sample' + print(format_str%(datetime.now(), step, model.max_steps, ave_loss, duration, duration/(FLAGS.batch_size*accu_steps))) + f.write(format_str%(datetime.now(), step, model.max_steps, ave_loss, duration, duration/(FLAGS.batch_size*accu_steps))+'\n') + f.flush() + + if step%20 == 0: + summary_str = sess.run(summary_op) + summary_writer.add_summary(summary_str, step) + + + if step%40 == 0 and hasattr(model, 'do_test'): + model.do_test(sess, summary_writer, step) + + if step%100 == 0 or (step+1) == model.max_steps: + if not os.path.exists(model.train_dir): + os.makedirs(model.train_dir) + checkpoint_path = os.path.join(model.train_dir, 'model.ckpt') + saver.save(sess, checkpoint_path, global_step=step) + print('model has been saved to %s\n'%checkpoint_path) + f.write('model has been saved to %s\n'%checkpoint_path) + f.flush() + + print('finish train') + f.close() + diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/.keep b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/__init__.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..26ca233c929c1bac6f1ee07145fe59658711c502 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TF-Slim grouped API. Please see README.md for details and usage.""" +# pylint: disable=unused-import + +# Collapse tf-slim into a single namespace. +from __future__ import absolute_import + +from network.slim import losses +from network.slim import ops +from network.slim import scopes +from network.slim import variables +from network.slim.scopes import arg_scope diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/losses.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..78298d092fab3afc264e427fb060602c27ea97b0 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/losses.py @@ -0,0 +1,174 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains convenience wrappers for various Neural Network TensorFlow losses. + + All the losses defined here add themselves to the LOSSES_COLLECTION + collection. + + l1_loss: Define a L1 Loss, useful for regularization, i.e. lasso. + l2_loss: Define a L2 Loss, useful for regularization, i.e. weight decay. + cross_entropy_loss: Define a cross entropy loss using + softmax_cross_entropy_with_logits. Useful for classification. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +# In order to gather all losses in a network, the user should use this +# key for get_collection, i.e: +# losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) +LOSSES_COLLECTION = '_losses' + + +def l1_regularizer(weight=1.0, scope=None): + """Define a L1 regularizer. + + Args: + weight: scale the loss by this factor. + scope: Optional scope for name_scope. + + Returns: + a regularizer function. + """ + def regularizer(tensor): + with tf.name_scope(scope, 'L1Regularizer', [tensor]): + l1_weight = tf.convert_to_tensor(weight, + dtype=tensor.dtype.base_dtype, + name='weight') + return tf.multiply(l1_weight, tf.reduce_sum(tf.abs(tensor)), name='value') + return regularizer + + +def l2_regularizer(weight=1.0, scope=None): + """Define a L2 regularizer. + + Args: + weight: scale the loss by this factor. + scope: Optional scope for name_scope. + + Returns: + a regularizer function. + """ + def regularizer(tensor): + with tf.name_scope(scope, 'L2Regularizer', [tensor]): + l2_weight = tf.convert_to_tensor(weight, + dtype=tensor.dtype.base_dtype, + name='weight') + return tf.multiply(l2_weight, tf.nn.l2_loss(tensor), name='value') + return regularizer + + +def l1_l2_regularizer(weight_l1=1.0, weight_l2=1.0, scope=None): + """Define a L1L2 regularizer. + + Args: + weight_l1: scale the L1 loss by this factor. + weight_l2: scale the L2 loss by this factor. + scope: Optional scope for name_scope. + + Returns: + a regularizer function. + """ + def regularizer(tensor): + with tf.name_scope(scope, 'L1L2Regularizer', [tensor]): + weight_l1_t = tf.convert_to_tensor(weight_l1, + dtype=tensor.dtype.base_dtype, + name='weight_l1') + weight_l2_t = tf.convert_to_tensor(weight_l2, + dtype=tensor.dtype.base_dtype, + name='weight_l2') + reg_l1 = tf.multiply(weight_l1_t, tf.reduce_sum(tf.abs(tensor)), + name='value_l1') + reg_l2 = tf.multiply(weight_l2_t, tf.nn.l2_loss(tensor), + name='value_l2') + return tf.add(reg_l1, reg_l2, name='value') + return regularizer + + +def l1_loss(tensor, weight=1.0, scope=None): + """Define a L1Loss, useful for regularize, i.e. lasso. + + Args: + tensor: tensor to regularize. + weight: scale the loss by this factor. + scope: Optional scope for name_scope. + + Returns: + the L1 loss op. + """ + with tf.name_scope(scope, 'L1Loss', [tensor]): + weight = tf.convert_to_tensor(weight, + dtype=tensor.dtype.base_dtype, + name='loss_weight') + loss = tf.multiply(weight, tf.reduce_sum(tf.abs(tensor)), name='value') + tf.add_to_collection(LOSSES_COLLECTION, loss) + return loss + + +def l2_loss(tensor, weight=1.0, scope=None): + """Define a L2Loss, useful for regularize, i.e. weight decay. + + Args: + tensor: tensor to regularize. + weight: an optional weight to modulate the loss. + scope: Optional scope for name_scope. + + Returns: + the L2 loss op. + """ + with tf.name_scope(scope, 'L2Loss', [tensor]): + weight = tf.convert_to_tensor(weight, + dtype=tensor.dtype.base_dtype, + name='loss_weight') + loss = tf.multiply(weight, tf.nn.l2_loss(tensor), name='value') + tf.add_to_collection(LOSSES_COLLECTION, loss) + return loss + + +def cross_entropy_loss(logits, one_hot_labels, label_smoothing=0, + weight=1.0, scope=None): + """Define a Cross Entropy loss using softmax_cross_entropy_with_logits. + + It can scale the loss by weight factor, and smooth the labels. + + Args: + logits: [batch_size, num_classes] logits outputs of the network . + one_hot_labels: [batch_size, num_classes] target one_hot_encoded labels. + label_smoothing: if greater than 0 then smooth the labels. + weight: scale the loss by this factor. + scope: Optional scope for name_scope. + + Returns: + A tensor with the softmax_cross_entropy loss. + """ + logits.get_shape().assert_is_compatible_with(one_hot_labels.get_shape()) + with tf.name_scope(scope, 'CrossEntropyLoss', [logits, one_hot_labels]): + num_classes = one_hot_labels.get_shape()[-1].value + one_hot_labels = tf.cast(one_hot_labels, logits.dtype) + if label_smoothing > 0: + smooth_positives = 1.0 - label_smoothing + smooth_negatives = label_smoothing / num_classes + one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives + cross_entropy = tf.contrib.nn.deprecated_flipped_softmax_cross_entropy_with_logits( + logits, one_hot_labels, name='xentropy') + + weight = tf.convert_to_tensor(weight, + dtype=logits.dtype.base_dtype, + name='loss_weight') + loss = tf.multiply(weight, tf.reduce_mean(cross_entropy), name='value') + tf.add_to_collection(LOSSES_COLLECTION, loss) + return loss diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/ops.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/ops.py new file mode 100644 index 0000000000000000000000000000000000000000..3e047cc85351b96e98af9a2ae13a526ea40e0f76 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/ops.py @@ -0,0 +1,781 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains convenience wrappers for typical Neural Network TensorFlow layers. + + Additionally it maintains a collection with update_ops that need to be + updated after the ops have been computed, for example to update moving means + and moving variances of batch_norm. + + Ops that have different behavior during training or eval have an is_training + parameter. Additionally Ops that contain variables.variable have a trainable + parameter, which control if the ops variables are trainable or not. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import tensorflow as tf + +from tensorflow.python.training import moving_averages + +from network.slim import losses +from network.slim import scopes +from network.slim import variables + +# Used to keep the update ops done by batch_norm. +UPDATE_OPS_COLLECTION = '_update_ops_' + + +# the batch_norm here is batch_renorm implementation instead of batch norm +@scopes.add_arg_scope +def batch_norm(inputs, + decay=0.999, + center=True, + scale=False, + epsilon=0.001, + moving_vars='moving_vars', + activation=None, + is_training=True, + trainable=True, + restore=True, + scope=None, + reuse=None): + """Adds a Batch ReNormalization layer. + + Args: + inputs: a tensor of size [batch_size, height, width, channels] + or [batch_size, channels]. + decay: decay for the moving average. + center: If True, subtract beta. If False, beta is not created and ignored. + scale: If True, multiply by gamma. If False, gamma is + not used. When the next layer is linear (also e.g. ReLU), this can be + disabled since the scaling can be done by the next layer. + epsilon: small float added to variance to avoid dividing by zero. + moving_vars: collection to store the moving_mean and moving_variance. + activation: activation function. + is_training: whether or not the model is in training mode. + trainable: whether or not the variables should be trainable or not. + restore: whether or not the variables should be marked for restore. + scope: Optional scope for variable_scope. + reuse: whether or not the layer and its variables should be reused. To be + able to reuse the layer scope must be given. + + Returns: + a tensor representing the output of the operation. + + """ + inputs_shape = inputs.get_shape() + with tf.variable_scope(scope, 'BatchReNorm', [inputs], reuse=reuse): + axis = list(range(len(inputs_shape) - 1)) + params_shape = inputs_shape[-1:] + # Allocate parameters for the beta and gamma of the normalization. + beta, gamma = None, None + if center: + beta = variables.variable('beta', + params_shape, + initializer=tf.zeros_initializer(), + trainable=trainable, + restore=restore) + if scale: + gamma = variables.variable('gamma', + params_shape, + initializer=tf.ones_initializer(), + trainable=trainable, + restore=restore) + # Create moving_mean and moving_variance add them to + # GraphKeys.MOVING_AVERAGE_VARIABLES collections. + moving_collections = [moving_vars, tf.GraphKeys.MOVING_AVERAGE_VARIABLES] + moving_mean = variables.variable('moving_mean', + params_shape, + initializer=tf.zeros_initializer(), + trainable=False, + restore=restore, + collections=moving_collections) + moving_variance = variables.variable('moving_variance', + params_shape, + initializer=tf.ones_initializer(), + trainable=False, + restore=restore, + collections=moving_collections) + + r_max = variables.variable('r_max', + (1,), + initializer=tf.ones_initializer(), + trainable=False, + restore=restore) + d_max = variables.variable('d_max', + (1,), + initializer=tf.zeros_initializer(), + trainable=False, + restore=restore) + curr_t = variables.variable('curr_t', + (1,), + initializer=tf.zeros_initializer(), + trainable=False, + restore=restore) + + if is_training: + # Calculate the moments based on the individual batch. + mean, variance = tf.nn.moments(inputs, axis) + + update_moving_mean = moving_averages.assign_moving_average( + moving_mean, mean, decay) + tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) + update_moving_variance = moving_averages.assign_moving_average( + moving_variance, variance, decay) + tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) + + r_max_val = 3.0 + new_r = tf.divide(r_max_val, 1.0+(r_max_val-1.0)*tf.exp(-curr_t)) + update_r = r_max.assign(new_r) + tf.add_to_collection(UPDATE_OPS_COLLECTION, update_r) + + d_max_val = 5.0 + new_d = tf.divide(d_max_val, (1.0+(d_max_val/1e-3)-1.0)*tf.exp(-2.0*curr_t)) + update_d = d_max.assign(new_d) + tf.add_to_collection(UPDATE_OPS_COLLECTION, update_d) + + new_t = curr_t+1e-5 + update_t = curr_t.assign(new_t) + tf.add_to_collection(UPDATE_OPS_COLLECTION, update_t) + + # batch renorm + std = tf.sqrt(variance+epsilon) + moving_std = tf.sqrt(moving_variance+epsilon) + r = tf.divide(std, moving_std) + r = tf.stop_gradient(tf.clip_by_value(r, 1.0/r_max, r_max)) + + d = tf.divide(mean - moving_mean, moving_std) + d = tf.stop_gradient(tf.clip_by_value(d, -d_max, d_max)) + + outputs = tf.nn.batch_normalization( + inputs, mean, variance, None, None, epsilon) + outputs = tf.multiply(outputs, r) + d + + if scale: + outputs = tf.multiply(outputs, gamma) + if center: + outputs += beta + + else: + # Just use the moving_mean and moving_variance. + mean = moving_mean + variance = moving_variance + + # Normalize the activations. + outputs = tf.nn.batch_normalization( + inputs, mean, variance, beta, gamma, epsilon) + + outputs.set_shape(inputs.get_shape()) + if activation: + outputs = activation(outputs) + return outputs + + +def _two_element_tuple(int_or_tuple): + """Converts `int_or_tuple` to height, width. + + Several of the functions that follow accept arguments as either + a tuple of 2 integers or a single integer. A single integer + indicates that the 2 values of the tuple are the same. + + This functions normalizes the input value by always returning a tuple. + + Args: + int_or_tuple: A list of 2 ints, a single int or a tf.TensorShape. + + Returns: + A tuple with 2 values. + + Raises: + ValueError: If `int_or_tuple` it not well formed. + """ + if isinstance(int_or_tuple, (list, tuple)): + if len(int_or_tuple) != 2: + raise ValueError('Must be a list with 2 elements: %s' % int_or_tuple) + return int(int_or_tuple[0]), int(int_or_tuple[1]) + if isinstance(int_or_tuple, int): + return int(int_or_tuple), int(int_or_tuple) + if isinstance(int_or_tuple, tf.TensorShape): + if len(int_or_tuple) == 2: + return int_or_tuple[0], int_or_tuple[1] + raise ValueError('Must be an int, a list with 2 elements or a TensorShape of ' + 'length 2') + + +@scopes.add_arg_scope +def conv2d(inputs, + num_filters_out, + kernel_size, + stride=1, + padding='SAME', + activation=tf.nn.relu, + stddev=0.01, + bias=0.0, + weight_decay=0, + batch_norm_params=None, + is_training=True, + trainable=True, + restore=True, + scope=None, + reuse=None): + """Adds a 2D convolution followed by an optional batch_norm layer. + + conv2d creates a variable called 'weights', representing the convolutional + kernel, that is convolved with the input. If `batch_norm_params` is None, a + second variable called 'biases' is added to the result of the convolution + operation. + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_filters_out: the number of output filters. + kernel_size: a list of length 2: [kernel_height, kernel_width] of + of the filters. Can be an int if both values are the same. + stride: a list of length 2: [stride_height, stride_width]. + Can be an int if both strides are the same. Note that presently + both strides must have the same value. + padding: one of 'VALID' or 'SAME'. + activation: activation function. + stddev: standard deviation of the truncated guassian weight distribution. + bias: the initial value of the biases. + weight_decay: the weight decay. + batch_norm_params: parameters for the batch_norm. If is None don't use it. + is_training: whether or not the model is in training mode. + trainable: whether or not the variables should be trainable or not. + restore: whether or not the variables should be marked for restore. + scope: Optional scope for variable_scope. + reuse: whether or not the layer and its variables should be reused. To be + able to reuse the layer scope must be given. + Returns: + a tensor representing the output of the operation. + + """ + with tf.variable_scope(scope, 'Conv', [inputs], reuse=reuse): + kernel_h, kernel_w = _two_element_tuple(kernel_size) + stride_h, stride_w = _two_element_tuple(stride) + num_filters_in = inputs.get_shape()[-1] + weights_shape = [kernel_h, kernel_w, + num_filters_in, num_filters_out] + weights_initializer = tf.truncated_normal_initializer(stddev=stddev) + l2_regularizer = None + if weight_decay and weight_decay > 0: + l2_regularizer = losses.l2_regularizer(weight_decay) + weights = variables.variable('weights', + shape=weights_shape, + initializer=weights_initializer, + regularizer=l2_regularizer, + trainable=trainable, + restore=restore) + conv = tf.nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], + padding=padding) + if batch_norm_params is not None: + with scopes.arg_scope([batch_norm], is_training=is_training, + trainable=trainable, restore=restore): + outputs = batch_norm(conv, **batch_norm_params) + else: + bias_shape = [num_filters_out,] + bias_initializer = tf.constant_initializer(bias) + biases = variables.variable('biases', + shape=bias_shape, + initializer=bias_initializer, + trainable=trainable, + restore=restore) + outputs = tf.nn.bias_add(conv, biases) + if activation: + outputs = activation(outputs) + return outputs + +@scopes.add_arg_scope +def depthwise_conv2d(inputs, + num_filters_out, + kernel_size, + stride=1, + padding='VALID', + activation=tf.nn.relu, + stddev=0.01, + bias=0.0, + weight_decay=0, + is_norm=False, + is_training=True, + trainable=True, + restore=True, + scope=None, + reuse=None): + """Adds a 2D depth wise convolution followed by an optional batch_norm layer. + this applies channels differnt filters to each channel independently + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_filters_out: the number of output filters. + kernel_size: a list of length 2: [kernel_height, kernel_width] of + of the filters. Can be an int if both values are the same. + stride: a list of length 2: [stride_height, stride_width]. + Can be an int if both strides are the same. Note that presently + both strides must have the same value. + padding: one of 'VALID' or 'SAME'. + activation: activation function. + stddev: standard deviation of the truncated guassian weight distribution. + bias: the initial value of the biases. + weight_decay: the weight decay. + is_training: whether or not the model is in training mode. + trainable: whether or not the variables should be trainable or not. + restore: whether or not the variables should be marked for restore. + scope: Optional scope for variable_scope. + reuse: whether or not the layer and its variables should be reused. To be + able to reuse the layer scope must be given. + Returns: + a tensor representing the output of the operation. + + """ + with tf.variable_scope(scope, 'ConvDepthWise', [inputs], reuse=reuse): + kernel_h, kernel_w = _two_element_tuple(kernel_size) + stride_h, stride_w = _two_element_tuple(stride) + num_filters_in = inputs.get_shape()[-1].value + weights_shape = [kernel_h, kernel_w, + num_filters_in, num_filters_out] + weights_initializer = tf.truncated_normal_initializer(stddev=stddev) + l2_regularizer = None + if weight_decay and weight_decay > 0: + l2_regularizer = losses.l2_regularizer(weight_decay) + weights = variables.variable('weights', + shape=weights_shape, + initializer=weights_initializer, + regularizer=l2_regularizer, + trainable=trainable, + restore=restore) + + batch_size = inputs.get_shape()[0].value + num_pt = inputs.get_shape()[1].value + + conv = tf.nn.depthwise_conv2d(inputs, weights, [1, stride_h, stride_w, 1], + padding=padding) + + if is_norm: + outputs = tf.reshape(conv, (batch_size*num_pt, num_filters_out, num_filters_in)) + outputs = batch_norm(outputs, decay=0.999) + outputs = tf.reshape(conv, (batch_size, num_pt, num_filters_out, num_filters_in)) + else: + bias_shape = [conv.get_shape()[-1],] + bias_initializer = tf.constant_initializer(bias) + biases = variables.variable('biases', + shape=bias_shape, + initializer=bias_initializer, + trainable=trainable, + restore=restore) + outputs = tf.nn.bias_add(conv, biases) + outputs = tf.reshape(outputs, (batch_size,num_pt,num_filters_out,num_filters_in)) + + if activation: + outputs = activation(outputs) + return outputs + +@scopes.add_arg_scope +def depthwise_conv2d_v1(inputs, + num_filters_out, + kernel_size, + stride=1, + padding='VALID', + activation=tf.nn.relu, + stddev=0.01, + bias=0.0, + weight_decay=0, + batch_norm_params=None, + is_training=True, + trainable=True, + restore=True, + scope=None, + reuse=None): + """Adds a 2D depth wise convolution followed by an optional batch_norm layer. + this applies channels differnt filters to each channel independently + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_filters_out: the number of output filters. + kernel_size: a list of length 2: [kernel_height, kernel_width] of + of the filters. Can be an int if both values are the same. + stride: a list of length 2: [stride_height, stride_width]. + Can be an int if both strides are the same. Note that presently + both strides must have the same value. + padding: one of 'VALID' or 'SAME'. + activation: activation function. + stddev: standard deviation of the truncated guassian weight distribution. + bias: the initial value of the biases. + weight_decay: the weight decay. + batch_norm_params: parameters for the batch_norm. If is None don't use it. + is_training: whether or not the model is in training mode. + trainable: whether or not the variables should be trainable or not. + restore: whether or not the variables should be marked for restore. + scope: Optional scope for variable_scope. + reuse: whether or not the layer and its variables should be reused. To be + able to reuse the layer scope must be given. + Returns: + a tensor representing the output of the operation. + + """ + with tf.variable_scope(scope, 'ConvDepthWise', [inputs], reuse=reuse): + kernel_h, kernel_w = _two_element_tuple(kernel_size) + stride_h, stride_w = _two_element_tuple(stride) + num_filters_in = inputs.get_shape()[-1] + weights_shape = [kernel_h, kernel_w, + num_filters_in, num_filters_out] + weights_initializer = tf.truncated_normal_initializer(stddev=stddev) + l2_regularizer = None + if weight_decay and weight_decay > 0: + l2_regularizer = losses.l2_regularizer(weight_decay) + weights = variables.variable('weights', + shape=weights_shape, + initializer=weights_initializer, + regularizer=l2_regularizer, + trainable=trainable, + restore=restore) + conv = tf.nn.depthwise_conv2d(inputs, weights, [1, stride_h, stride_w, 1], + padding=padding) + if batch_norm_params is not None: + with scopes.arg_scope([batch_norm], is_training=is_training, + trainable=trainable, restore=restore): + outputs = batch_norm(conv, **batch_norm_params) + else: + bias_shape = [conv.get_shape()[-1],] + bias_initializer = tf.constant_initializer(bias) + biases = variables.variable('biases', + shape=bias_shape, + initializer=bias_initializer, + trainable=trainable, + restore=restore) + outputs = tf.nn.bias_add(conv, biases) + if activation: + outputs = activation(outputs) + return outputs + +def _deconv_output_length(input_length, filter_size, padding, stride): + """determines output length of a transposed convolution given input length, stride and kernel + Args: + padding: 'SAME', 'VALID' or 'FULL' + Returns: + output length + """ + padding = padding.upper + if input_length is None: + return None + input_length *= stride + if padding == 'VALID': + input_length += max(filter_size-stride, 0) + elif padding == 'FULL': + input_length -= (stride + filter_size - 2) + return input_length + +@scopes.add_arg_scope +def deconv(inputs, + num_filters_out, + kernel_size, + stride, + padding='SAME', + activation=tf.nn.relu, + stddev=0.01, + bias=0.0, + weight_decay=0, + batch_norm_params=None, + is_training=True, + trainable=True, + restore=True, + scope=None, + reuse=None): + """Adds a 2D deconvolution operator followed by optional batch_norm layer + args: + inputs: with size [batch_size, height, widht, channels] + num_filters_out: number of output feature channels + padding: 'VALID', 'SAME' 'FULL' + returns: + a tensor representing the output of the operation + """ + with tf.variable_scope(scope, 'Deconv', [inputs], reuse=reuse): + batch_size = inputs.get_shape()[0] + height, width = inputs.get_shape()[1], inputs.get_shape()[2] + num_filters_in = inputs.get_shape()[-1] + + kernel_h, kernel_w = _two_element_tuple(kernel_size) + stride_h, stride_w = _two_element_tuple(stride) + + weights_shape = [kernel_h, kernel_w, + num_filters_out, num_filters_in] + weights_initializer = tf.truncated_normal_initializer(stddev=stddev) + l2_regularizer = None + if weight_decay and weight_decay>0: + l2_regularizer = losses.l2_regularizer(weight_decay) + weights = variables.variable('weights', + shape=weights_shape, + initializer=weights_initializer, + regularizer=l2_regularizer, + trainable=trainable, + restore=restore) + + out_height = _deconv_output_length(height, kernel_h, padding, stride_h) + out_width = _deconv_output_length(width, kernel_w, padding, stride_w) + output_shape = tf.stack([batch_size, out_height, out_width, num_filters_out]) + deconv = tf.nn.conv2d_transpose(inputs, weights, output_shape, [1, stride_h, stride_w, 1], padding=padding) + + if batch_norm_params is not None: + with scopes.arg_scope([batch_norm], is_training=is_training, + trainable=trainable, restore=restore): + outputs = batch_norm(deconv, **batch_norm_params) + else: + bias_shape = [num_filters_out,] + bias_initializer = tf.constant_initializer(bias) + biases = variables.variable('biases', + shape=bias_shape, + initializer=bias_initializer, + trainable=trainable, + restore=restore) + outputs = tf.nn.bias_add(deconv, biases) + if activation: + outputs = activation(outputs) + return outputs + + +@scopes.add_arg_scope +def fc(inputs, + num_units_out, + activation=tf.nn.relu, + stddev=0.01, + bias=0.0, + weight_decay=0, + batch_norm_params=None, + is_training=True, + trainable=True, + restore=True, + scope=None, + reuse=None): + """Adds a fully connected layer followed by an optional batch_norm layer. + + FC creates a variable called 'weights', representing the fully connected + weight matrix, that is multiplied by the input. If `batch_norm` is None, a + second variable called 'biases' is added to the result of the initial + vector-matrix multiplication. + + Args: + inputs: a [B x N] tensor where B is the batch size and N is the number of + input units in the layer. + num_units_out: the number of output units in the layer. + activation: activation function. + stddev: the standard deviation for the weights. + bias: the initial value of the biases. + weight_decay: the weight decay. + batch_norm_params: parameters for the batch_norm. If is None don't use it. + is_training: whether or not the model is in training mode. + trainable: whether or not the variables should be trainable or not. + restore: whether or not the variables should be marked for restore. + scope: Optional scope for variable_scope. + reuse: whether or not the layer and its variables should be reused. To be + able to reuse the layer scope must be given. + + Returns: + the tensor variable representing the result of the series of operations. + """ + with tf.variable_scope(scope, 'FC', [inputs], reuse=reuse): + num_units_in = inputs.get_shape()[1] + weights_shape = [num_units_in, num_units_out] + weights_initializer = tf.truncated_normal_initializer(stddev=stddev) + l2_regularizer = None + if weight_decay and weight_decay > 0: + l2_regularizer = losses.l2_regularizer(weight_decay) + weights = variables.variable('weights', + shape=weights_shape, + initializer=weights_initializer, + regularizer=l2_regularizer, + trainable=trainable, + restore=restore) + if batch_norm_params is not None: + outputs = tf.matmul(inputs, weights) + with scopes.arg_scope([batch_norm], is_training=is_training, + trainable=trainable, restore=restore): + outputs = batch_norm(outputs, **batch_norm_params) + else: + bias_shape = [num_units_out,] + bias_initializer = tf.constant_initializer(bias) + biases = variables.variable('biases', + shape=bias_shape, + initializer=bias_initializer, + trainable=trainable, + restore=restore) + outputs = tf.nn.xw_plus_b(inputs, weights, biases) + if activation: + outputs = activation(outputs) + return outputs + + +def one_hot_encoding(labels, num_classes, scope=None): + """Transform numeric labels into onehot_labels. + + Args: + labels: [batch_size] target labels. + num_classes: total number of classes. + scope: Optional scope for name_scope. + Returns: + one hot encoding of the labels. + """ + with tf.name_scope(scope, 'OneHotEncoding', [labels]): + batch_size = labels.get_shape()[0] + indices = tf.expand_dims(tf.range(0, batch_size), 1) + labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype) + concated = tf.concat(axis=1, values=[indices, labels]) + onehot_labels = tf.sparse_to_dense( + concated, tf.stack([batch_size, num_classes]), 1.0, 0.0) + onehot_labels.set_shape([batch_size, num_classes]) + return onehot_labels + + +@scopes.add_arg_scope +def max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None): + """Adds a Max Pooling layer. + + It is assumed by the wrapper that the pooling is only done per image and not + in depth or batch. + + Args: + inputs: a tensor of size [batch_size, height, width, depth]. + kernel_size: a list of length 2: [kernel_height, kernel_width] of the + pooling kernel over which the op is computed. Can be an int if both + values are the same. + stride: a list of length 2: [stride_height, stride_width]. + Can be an int if both strides are the same. Note that presently + both strides must have the same value. + padding: the padding method, either 'VALID' or 'SAME'. + scope: Optional scope for name_scope. + + Returns: + a tensor representing the results of the pooling operation. + Raises: + ValueError: if 'kernel_size' is not a 2-D list + """ + with tf.name_scope(scope, 'MaxPool', [inputs]): + kernel_h, kernel_w = _two_element_tuple(kernel_size) + stride_h, stride_w = _two_element_tuple(stride) + return tf.nn.max_pool(inputs, + ksize=[1, kernel_h, kernel_w, 1], + strides=[1, stride_h, stride_w, 1], + padding=padding) + +@scopes.add_arg_scope +def upsampling_nearest(inputs, scale): + assert scale>1, 'scale of upsampling should be larger then 1' + new_h = int(inputs.shape[1]*scale) + new_w = int(inputs.shape[2]*scale) + return tf.image.resize_images(inputs, [new_h, new_w], + method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) + + +@scopes.add_arg_scope +def avg_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None): + """Adds a Avg Pooling layer. + + It is assumed by the wrapper that the pooling is only done per image and not + in depth or batch. + + Args: + inputs: a tensor of size [batch_size, height, width, depth]. + kernel_size: a list of length 2: [kernel_height, kernel_width] of the + pooling kernel over which the op is computed. Can be an int if both + values are the same. + stride: a list of length 2: [stride_height, stride_width]. + Can be an int if both strides are the same. Note that presently + both strides must have the same value. + padding: the padding method, either 'VALID' or 'SAME'. + scope: Optional scope for name_scope. + + Returns: + a tensor representing the results of the pooling operation. + """ + with tf.name_scope(scope, 'AvgPool', [inputs]): + kernel_h, kernel_w = _two_element_tuple(kernel_size) + stride_h, stride_w = _two_element_tuple(stride) + return tf.nn.avg_pool(inputs, + ksize=[1, kernel_h, kernel_w, 1], + strides=[1, stride_h, stride_w, 1], + padding=padding) + + +@scopes.add_arg_scope +def dropout(inputs, keep_prob=0.5, is_training=True, scope=None): + """Returns a dropout layer applied to the input. + + Args: + inputs: the tensor to pass to the Dropout layer. + keep_prob: the probability of keeping each input unit. + is_training: whether or not the model is in training mode. If so, dropout is + applied and values scaled. Otherwise, inputs is returned. + scope: Optional scope for name_scope. + + Returns: + a tensor representing the output of the operation. + """ + if is_training and keep_prob > 0: + with tf.name_scope(scope, 'Dropout', [inputs]): + return tf.nn.dropout(inputs, keep_prob) + else: + return inputs + + +def flatten(inputs, scope=None): + """Flattens the input while maintaining the batch_size. + + Assumes that the first dimension represents the batch. + + Args: + inputs: a tensor of size [batch_size, ...]. + scope: Optional scope for name_scope. + + Returns: + a flattened tensor with shape [batch_size, k]. + Raises: + ValueError: if inputs.shape is wrong. + """ + if len(inputs.get_shape()) < 2: + raise ValueError('Inputs must be have a least 2 dimensions') + dims = inputs.get_shape()[1:] + k = dims.num_elements() + with tf.name_scope(scope, 'Flatten', [inputs]): + return tf.reshape(inputs, [-1, k]) + + +def repeat_op(repetitions, inputs, op, *args, **kwargs): + """Build a sequential Tower starting from inputs by using an op repeatedly. + + It creates new scopes for each operation by increasing the counter. + Example: given repeat_op(3, _, ops.conv2d, 64, [3, 3], scope='conv1') + it will repeat the given op under the following variable_scopes: + conv1/Conv + conv1/Conv_1 + conv1/Conv_2 + + Args: + repetitions: number or repetitions. + inputs: a tensor of size [batch_size, height, width, channels]. + op: an operation. + *args: args for the op. + **kwargs: kwargs for the op. + + Returns: + a tensor result of applying the operation op, num times. + Raises: + ValueError: if the op is unknown or wrong. + """ + scope = kwargs.pop('scope', None) + with tf.variable_scope(scope, 'RepeatOp', [inputs]): + tower = inputs + for _ in range(repetitions): + tower = op(tower, *args, **kwargs) + return tower + diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/scopes.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/scopes.py new file mode 100644 index 0000000000000000000000000000000000000000..2c2fb0a2efa7d30eaddb36fc30265f30cbaeb9ef --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/scopes.py @@ -0,0 +1,170 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains the new arg_scope used for TF-Slim ops. + + Allows one to define models much more compactly by eliminating boilerplate + code. This is accomplished through the use of argument scoping (arg_scope). + + Example of how to use scopes.arg_scope: + + with scopes.arg_scope(ops.conv2d, padding='SAME', + stddev=0.01, weight_decay=0.0005): + net = ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') + net = ops.conv2d(net, 256, [5, 5], scope='conv2') + + The first call to conv2d will overwrite padding: + ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', + stddev=0.01, weight_decay=0.0005, scope='conv1') + + The second call to Conv will use predefined args: + ops.conv2d(inputs, 256, [5, 5], padding='SAME', + stddev=0.01, weight_decay=0.0005, scope='conv2') + + Example of how to reuse an arg_scope: + with scopes.arg_scope(ops.conv2d, padding='SAME', + stddev=0.01, weight_decay=0.0005) as conv2d_arg_scope: + net = ops.conv2d(net, 256, [5, 5], scope='conv1') + .... + + with scopes.arg_scope(conv2d_arg_scope): + net = ops.conv2d(net, 256, [5, 5], scope='conv2') + + Example of how to use scopes.add_arg_scope: + + @scopes.add_arg_scope + def conv2d(*args, **kwargs) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import functools + +from tensorflow.python.framework import ops + +_ARGSTACK_KEY = ("__arg_stack",) + +_DECORATED_OPS = set() + + +def _get_arg_stack(): + stack = ops.get_collection(_ARGSTACK_KEY) + if stack: + return stack[0] + else: + stack = [{}] + ops.add_to_collection(_ARGSTACK_KEY, stack) + return stack + + +def _current_arg_scope(): + stack = _get_arg_stack() + return stack[-1] + + +def _add_op(op): + key_op = (op.__module__, op.__name__) + if key_op not in _DECORATED_OPS: + _DECORATED_OPS.add(key_op) + + +@contextlib.contextmanager +def arg_scope(list_ops_or_scope, **kwargs): + """Stores the default arguments for the given set of list_ops. + + For usage, please see examples at top of the file. + + Args: + list_ops_or_scope: List or tuple of operations to set argument scope for or + a dictionary containg the current scope. When list_ops_or_scope is a dict, + kwargs must be empty. When list_ops_or_scope is a list or tuple, then + every op in it need to be decorated with @add_arg_scope to work. + **kwargs: keyword=value that will define the defaults for each op in + list_ops. All the ops need to accept the given set of arguments. + + Yields: + the current_scope, which is a dictionary of {op: {arg: value}} + Raises: + TypeError: if list_ops is not a list or a tuple. + ValueError: if any op in list_ops has not be decorated with @add_arg_scope. + """ + if isinstance(list_ops_or_scope, dict): + # Assumes that list_ops_or_scope is a scope that is being reused. + if kwargs: + raise ValueError("When attempting to re-use a scope by suppling a" + "dictionary, kwargs must be empty.") + current_scope = list_ops_or_scope.copy() + try: + _get_arg_stack().append(current_scope) + yield current_scope + finally: + _get_arg_stack().pop() + else: + # Assumes that list_ops_or_scope is a list/tuple of ops with kwargs. + if not isinstance(list_ops_or_scope, (list, tuple)): + raise TypeError("list_ops_or_scope must either be a list/tuple or reused" + "scope (i.e. dict)") + try: + current_scope = _current_arg_scope().copy() + for op in list_ops_or_scope: + key_op = (op.__module__, op.__name__) + if not has_arg_scope(op): + raise ValueError("%s is not decorated with @add_arg_scope", key_op) + if key_op in current_scope: + current_kwargs = current_scope[key_op].copy() + current_kwargs.update(kwargs) + current_scope[key_op] = current_kwargs + else: + current_scope[key_op] = kwargs.copy() + _get_arg_stack().append(current_scope) + yield current_scope + finally: + _get_arg_stack().pop() + + +def add_arg_scope(func): + """Decorates a function with args so it can be used within an arg_scope. + + Args: + func: function to decorate. + + Returns: + A tuple with the decorated function func_with_args(). + """ + @functools.wraps(func) + def func_with_args(*args, **kwargs): + current_scope = _current_arg_scope() + current_args = kwargs + key_func = (func.__module__, func.__name__) + if key_func in current_scope: + current_args = current_scope[key_func].copy() + current_args.update(kwargs) + return func(*args, **current_args) + _add_op(func) + return func_with_args + + +def has_arg_scope(func): + """Checks whether a func has been decorated with @add_arg_scope or not. + + Args: + func: function to check. + + Returns: + a boolean. + """ + key_op = (func.__module__, func.__name__) + return key_op in _DECORATED_OPS diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/variables.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/variables.py new file mode 100644 index 0000000000000000000000000000000000000000..fcd6e55d1bb8df016b1dd0591a650b0c93d3a530 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/variables.py @@ -0,0 +1,289 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains convenience wrappers for creating variables in TF-Slim. + +The variables module is typically used for defining model variables from the +ops routines (see slim.ops). Such variables are used for training, evaluation +and inference of models. + +All the variables created through this module would be added to the +MODEL_VARIABLES collection, if you create a model variable outside slim, it can +be added with slim.variables.add_variable(external_variable, reuse). + +Usage: + weights_initializer = tf.truncated_normal_initializer(stddev=0.01) + l2_regularizer = lambda t: losses.l2_loss(t, weight=0.0005) + weights = variables.variable('weights', + shape=[100, 100], + initializer=weights_initializer, + regularizer=l2_regularizer, + device='/cpu:0') + + biases = variables.variable('biases', + shape=[100], + initializer=tf.zeros_initializer(), + device='/cpu:0') + + # More complex example. + + net = slim.ops.conv2d(input, 32, [3, 3], scope='conv1') + net = slim.ops.conv2d(net, 64, [3, 3], scope='conv2') + with slim.arg_scope([variables.variable], restore=False): + net = slim.ops.conv2d(net, 64, [3, 3], scope='conv3') + + # Get all model variables from all the layers. + model_variables = slim.variables.get_variables() + + # Get all model variables from a specific the layer, i.e 'conv1'. + conv1_variables = slim.variables.get_variables('conv1') + + # Get all weights from all the layers. + weights = slim.variables.get_variables_by_name('weights') + + # Get all bias from all the layers. + biases = slim.variables.get_variables_by_name('biases') + + # Get all variables to restore. + # (i.e. only those created by 'conv1' and 'conv2') + variables_to_restore = slim.variables.get_variables_to_restore() + +************************************************ +* Initializing model variables from a checkpoint +************************************************ + +# Create some variables. +v1 = slim.variables.variable(name="v1", ..., restore=False) +v2 = slim.variables.variable(name="v2", ...) # By default restore=True +... +# The list of variables to restore should only contain 'v2'. +variables_to_restore = slim.variables.get_variables_to_restore() +restorer = tf.train.Saver(variables_to_restore) +with tf.Session() as sess: + # Restore variables from disk. + restorer.restore(sess, "/tmp/model.ckpt") + print("Model restored.") + # Do some work with the model + ... + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from network.slim import scopes + +# Collection containing all the variables created using slim.variables +MODEL_VARIABLES = '_model_variables_' + +# Collection containing the slim.variables that are created with restore=True. +VARIABLES_TO_RESTORE = '_variables_to_restore_' + + +def add_variable(var, restore=True): + """Adds a variable to the MODEL_VARIABLES collection. + + Optionally it will add the variable to the VARIABLES_TO_RESTORE collection. + Args: + var: a variable. + restore: whether the variable should be added to the + VARIABLES_TO_RESTORE collection. + + """ + collections = [MODEL_VARIABLES] + if restore: + collections.append(VARIABLES_TO_RESTORE) + for collection in collections: + if var not in tf.get_collection(collection): + tf.add_to_collection(collection, var) + + +def get_variables(scope=None, suffix=None): + """Gets the list of variables, filtered by scope and/or suffix. + + Args: + scope: an optional scope for filtering the variables to return. + suffix: an optional suffix for filtering the variables to return. + + Returns: + a copied list of variables with scope and suffix. + """ + candidates = tf.get_collection(MODEL_VARIABLES, scope)[:] + if suffix is not None: + candidates = [var for var in candidates if var.op.name.endswith(suffix)] + return candidates + + +def get_variables_to_restore(): + """Gets the list of variables to restore. + + Returns: + a copied list of variables. + """ + return tf.get_collection(VARIABLES_TO_RESTORE)[:] + + +def get_variables_by_name(given_name, scope=None): + """Gets the list of variables that were given that name. + + Args: + given_name: name given to the variable without scope. + scope: an optional scope for filtering the variables to return. + + Returns: + a copied list of variables with the given name and prefix. + """ + return get_variables(scope=scope, suffix=given_name) + + +def get_unique_variable(name): + """Gets the variable uniquely identified by that name. + + Args: + name: a name that uniquely identifies the variable. + + Returns: + a tensorflow variable. + + Raises: + ValueError: if no variable uniquely identified by the name exists. + """ + candidates = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, name) + if not candidates: + raise ValueError('Couldnt find variable %s' % name) + + for candidate in candidates: + if candidate.op.name == name: + return candidate + raise ValueError('Variable %s does not uniquely identify a variable', name) + + +class VariableDeviceChooser(object): + """Slim device chooser for variables. + + When using a parameter server it will assign them in a round-robin fashion. + When not using a parameter server it allows GPU:0 placement otherwise CPU:0. + """ + + def __init__(self, + num_parameter_servers=0, + ps_device='/job:ps', + placement='CPU:0'): + """Initialize VariableDeviceChooser. + + Args: + num_parameter_servers: number of parameter servers. + ps_device: string representing the parameter server device. + placement: string representing the placement of the variable either CPU:0 + or GPU:0. When using parameter servers forced to CPU:0. + """ + self._num_ps = num_parameter_servers + self._ps_device = ps_device + self._placement = placement if num_parameter_servers == 0 else 'CPU:0' + self._next_task_id = 0 + + def __call__(self, op): + device_string = '' + if self._num_ps > 0: + task_id = self._next_task_id + self._next_task_id = (self._next_task_id + 1) % self._num_ps + device_string = '%s/task:%d' % (self._ps_device, task_id) + device_string += '/%s' % self._placement + return device_string + + +# TODO(sguada) Remove once get_variable is able to colocate op.devices. +def variable_device(device, name): + """Fix the variable device to colocate its ops.""" + if callable(device): + var_name = tf.get_variable_scope().name + '/' + name + var_def = tf.NodeDef(name=var_name, op='Variable') + device = device(var_def) + if device is None: + device = '' + return device + + +@scopes.add_arg_scope +def global_step(device=''): + """Returns the global step variable. + + Args: + device: Optional device to place the variable. It can be an string or a + function that is called to get the device for the variable. + + Returns: + the tensor representing the global step variable. + """ + global_step_ref = tf.get_collection(tf.GraphKeys.GLOBAL_STEP) + if global_step_ref: + return global_step_ref[0] + else: + collections = [ + VARIABLES_TO_RESTORE, + tf.GraphKeys.GLOBAL_VARIABLES, + tf.GraphKeys.GLOBAL_STEP, + ] + # Get the device for the variable. + with tf.device(variable_device(device, 'global_step')): + return tf.get_variable('global_step', shape=[], dtype=tf.int64, + initializer=tf.zeros_initializer(), + trainable=False, collections=collections) + + +@scopes.add_arg_scope +def variable(name, shape=None, dtype=tf.float32, initializer=None, + regularizer=None, trainable=True, collections=None, device='', + restore=True): + """Gets an existing variable with these parameters or creates a new one. + + It also add itself to a group with its name. + + Args: + name: the name of the new or existing variable. + shape: shape of the new or existing variable. + dtype: type of the new or existing variable (defaults to `DT_FLOAT`). + initializer: initializer for the variable if one is created. + regularizer: a (Tensor -> Tensor or None) function; the result of + applying it on a newly created variable will be added to the collection + GraphKeys.REGULARIZATION_LOSSES and can be used for regularization. + trainable: If `True` also add the variable to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). + collections: A list of collection names to which the Variable will be added. + Note that the variable is always also added to the tf.GraphKeys.GLOBAL_VARIABLES + and MODEL_VARIABLES collections. + device: Optional device to place the variable. It can be an string or a + function that is called to get the device for the variable. + restore: whether the variable should be added to the + VARIABLES_TO_RESTORE collection. + + Returns: + The created or existing variable. + """ + collections = list(collections or []) + + # Make sure variables are added to tf.GraphKeys.GLOBAL_VARIABLES and MODEL_VARIABLES + collections += [tf.GraphKeys.GLOBAL_VARIABLES, MODEL_VARIABLES] + # Add to VARIABLES_TO_RESTORE if necessary + if restore: + collections.append(VARIABLES_TO_RESTORE) + # Remove duplicates + collections = set(collections) + # Get the device for the variable. + with tf.device(variable_device(device, name)): + return tf.get_variable(name, shape=shape, dtype=dtype, + initializer=initializer, regularizer=regularizer, + trainable=trainable, collections=collections) diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/readme.md b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..7cf15b11cfdbc91262fb733922ba320d5257f774 --- /dev/null +++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/readme.md @@ -0,0 +1,40 @@ +# Dense 3D Regression for Hand Pose Estimation + +This respository contains tensorflow implementation of the [paper](https://arxiv.org/abs/1711.08996). It is developped and tested on Debian GNU/Linux 8 64-bit. + +## Requirements: +- python 2.7 +- tensorflow == 1.3 +- [tfplot](https://github.com/wookayin/tensorflow-plot) (for visualization on tf summary files) +- matplotlib >= 2.0.2 +- numpy +- opencv >= 2.4 (optional, for cpu visualization) + +## Data Preparations: +Download the datasets, create soft links for them to [exp/data](./exp/data) and run `python data/${dataset}.py` to create the TFRecord files. Details are [here](./exp/data). + +## Usage: +Both training and testing functions are provided by `model/hourglass_um_crop_tiny.py`. Here is an example: +```bash +python model/hourglass_um_crop_tiny.py --dataset 'icvl' --batch_size 40 --num_stack 2 --fea_num 128 --debug_level 2 --is_train True +``` +where the hyper parameter configuration is explained in the source python files. + +## Results: +We provide the estimation results by the proposed method for [ICVL](./exp/result/icvl.txt), [NYU](./exp/result/nyu.txt), [MSRA15](./exp/result/msra.txt). They are xyz coordinates in mm, the 2D projection method is in the function _xyz2uvd_ from [here](data/util.py#L23). Check [here](https://github.com/xinghaochen/awesome-hand-pose-estimation/tree/master/evaluation) for comparison to other methods. Thanks @xinghaochen for providing the comparison. + +## Pretrained Models: +Run the script to download and install the corresponding trained model of datasets. $ROOT denote the root path of this project. +```bash +cd $ROOT +./exp/scripts/fetch_icvl_models.sh +./exp/scripts/fetch_msra_models.sh +./exp/scripts/fetch_nyu_models.sh +``` +To perform testing, simply run +``` +python model/hourglass_um_crop_tiny.py --dataset 'icvl' --batch_size 3 --num_stack 2 --num_fea 128 --debug_level 2 --is_train False +python model/hourglass_um_crop_tiny.py --dataset 'nyu' --batch_size 3 --num_stack 2 --num_fea 128 --debug_level 2 --is_train False +python model/hourglass_um_crop_tiny.py --dataset 'msra' --pid 0 --batch_size 3 --num_stack 2 --num_fea 128 --debug_level 2 --is_train False +``` +in which msra dataset should use pid to indicate which person to test on. In the [testing function](data/hourglass_um_crop_tiny.py#L23), the third augument is used to indicate which model with corresponding training step will be restored. We use step of -1 to indicate our pre-trained model.