diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/.keep b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/.keep b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/__init__.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/__init__.py
@@ -0,0 +1 @@
+
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/dataset_base.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/dataset_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7ed20d802ca7c5e8bc3976e74d9186c29f07511
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/dataset_base.py
@@ -0,0 +1,240 @@
+# the base class of dataset
+
+from __future__ import print_function, division, absolute_import
+
+import gpu_config
+import tensorflow as tf
+
+from collections import namedtuple
+import time, os, cPickle, sys, threading, glob
+from datetime import datetime
+import time
+
+import numpy as np
+import cv2
+
+from data.util import *
+Annotation = namedtuple('Annotation', 'name,pose')
+
+def _float_feature(value):
+    if isinstance(value, np.ndarray):
+        value = value
+    elif not isinstance(value, list):
+        value = [value]
+    return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+
+def _bytes_feature(value):
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+class BaseDataset(object):
+    '''provide basic utilities to convert the initial dataset to TFRecord files
+       and the interface to define readdata on the graph
+    '''
+    def __init__(self, subset):
+        '''subset: e.g., train, validation, test, or train_1 in the nyu (with view 1)
+        '''
+        self.subset = subset
+
+    def loadAnnotation(self):
+        '''the annotation is a sequential list of Annotation namedtuple 
+        '''
+        raise NotImplementedError
+
+    @property
+    def annotations(self):
+        raise NotImplementedError
+    
+    def convert_to_example(self, label):
+        '''load the image corresponding to the label.name, 
+           then serialize the structure to tf.train.Example
+        '''
+        raise NotImplementedError
+
+    def saveSampleToRecord(self, idx_list, tar_file_path):
+        curr_list = [self.annotations[idx] for idx in idx_list]
+        # if os.path.exists(tar_file_path):
+            # print('%s alread written'%tar_file_path)
+            # sys.stdout.flush()
+            # return
+
+        writer = tf.python_io.TFRecordWriter(tar_file_path)
+        for label in curr_list:
+            example = self.convert_to_example(label)
+            writer.write(example.SerializeToString())
+        writer.close()
+
+    def write_TFRecord_single_thread(self, thread_idx, thread_range, num_shards_per_thread):
+        print('Launching thread %d, with files from %d to %d'%(thread_idx, thread_range[0], thread_range[1]))
+        sys.stdout.flush()
+        spacing = np.linspace(thread_range[0], thread_range[1], num_shards_per_thread+1).astype(np.int)
+
+        shard_range = []
+        for idx in range(num_shards_per_thread):
+            shard_range.append((spacing[idx], spacing[idx+1]))
+
+        if not hasattr(self, 'num_shards'):
+            '''in case of single thread
+            '''
+            self.num_shards = num_shards_per_thread
+
+        for curr_shard_idx, shard in enumerate(shard_range):
+            file_idx = thread_idx*num_shards_per_thread + curr_shard_idx
+            file_name = '%s-%d-of-%d'%(self.subset, file_idx, self.num_shards)
+            file_path = os.path.join(self.tf_dir, file_name)
+            print('[Thread %d] begin processing %d - %d images, to %s'%(
+                thread_idx,shard[0],shard[1],file_path))
+            t1 = time.time()
+            sys.stdout.flush()
+            self.saveSampleToRecord(range(shard[0], shard[1]), file_path)
+            t2 = time.time()
+            print('[Thread {}]end at ={}, with {}s'.format(thread_idx, datetime.now(), t2-t1))
+        
+    def write_TFRecord_multi_thread(self, num_threads, num_shards):
+        '''convert all the dataset to several file shards
+        num_threads: number of threads to load and save the data
+        num_shards: number of file segment on the harddisk
+        '''
+        if not os.path.exists(self.tf_dir):
+            os.mkdir(self.tf_dir)
+
+        assert not num_shards % num_threads, (
+            'please make the num_threads commensurate with file_shards')
+        self.num_shards = num_shards
+        self.num_threads = num_threads
+        num_shards_per_thread = int(num_shards/num_threads)
+
+        self.loadAnnotation()
+        
+        spacing = np.linspace(0, len(self.annotations), num_threads+1).astype(np.int)
+        thread_range = []
+        for idx in range(num_threads):
+            thread_range.append((spacing[idx], spacing[idx+1]))
+        
+        coord = tf.train.Coordinator()
+        threads = []
+        print('begin writing at ', datetime.now())
+        sys.stdout.flush()
+        for thread_idx in range(len(thread_range)):
+            args = (thread_idx, 
+                    thread_range[thread_idx],
+                    num_shards_per_thread)
+                    
+            t = threading.Thread(target=self.write_TFRecord_single_thread, args=args)
+            t.start()
+            threads.append(t)
+
+        # wait all thread end
+        coord.join(threads)
+
+    # interface to the batch iteration
+    @property
+    def filenames(self):
+        if self.subset == 'testing':
+            pattern = os.path.join(self.tf_dir, '%s-*'%'testing')
+        else:
+            pattern = os.path.join(self.tf_dir, '%s-*'%'training')
+        files = glob.glob(pattern)
+        print('[data.dataset_base]total file found = %d'%(len(files)))
+        return files 
+
+    @property
+    def is_train(self):
+        raise NotImplementedError
+
+    @property
+    def approximate_num(self):
+        '''return:
+            the approximate total number of training set
+        '''
+        raise NotImplementedError
+
+    def get_batch_op(self, 
+                     batch_size, num_readers=1, num_preprocess_threads=1, 
+                     preprocess_op=None,
+                     is_train=None):
+        '''return the operation on tf graph of 
+        iteration over the given dataset
+        '''
+        if is_train == None:
+            is_train = self.is_train
+
+        with tf.name_scope('batch_processing'):
+            min_queue_examples = batch_size*1 
+
+            if is_train:
+                assert num_readers >1, 'during training, num_readers should be greater than 1, to shuffle the input' 
+                filename_queue = tf.train.string_input_producer(
+                    self.filenames, capacity=32, shuffle=True)
+
+                example_queue = tf.RandomShuffleQueue(
+                    capacity=self.approximate_num_per_file*8 + 3*batch_size,
+                    min_after_dequeue=self.approximate_num_per_file*8,
+                    dtypes=[tf.string])
+                    
+            else:
+                filename_queue = tf.train.string_input_producer(
+                    self.filenames, capacity=1, shuffle=False)
+                example_queue = tf.FIFOQueue(
+                    capacity=min_queue_examples+batch_size,
+                    dtypes=[tf.string])
+            
+            if num_readers > 1:
+                enqueue_ops = []
+                for _ in range(num_readers):
+                    reader = tf.TFRecordReader()
+                    _, value = reader.read(filename_queue)
+                    enqueue_ops.append(example_queue.enqueue([value]))
+
+                tf.train.queue_runner.add_queue_runner(
+                    tf.train.queue_runner.QueueRunner(example_queue, enqueue_ops))
+                example_serialized = example_queue.dequeue()
+            else:
+                reader = tf.TFRecordReader()
+                _, example_serialized = reader.read(filename_queue)
+
+            results = []
+            for thread_idx in range(num_preprocess_threads):
+                dm, pose, name = self.parse_example(example_serialized)
+                if preprocess_op != None:
+                    result = preprocess_op(dm, pose, self.cfg)
+                    results.append(list(result))
+                else:
+                    results.append([dm, pose])
+
+            batch = tf.train.batch_join(
+                results, batch_size=batch_size, capacity=2*num_preprocess_threads*batch_size)
+
+            return batch
+
+    # TODO: merge this function to get_batch_op
+    def get_batch_op_test(self, batch_size, preprocess_op=None):
+        '''return the operation on tf graph of 
+        iteration over the given dataset
+        '''
+        with tf.name_scope('batch_processing'):
+            min_queue_examples = 1 
+
+            filename_queue = tf.train.string_input_producer(
+                self.filenames, num_epochs=1, capacity=1, shuffle=False)
+            example_queue = tf.FIFOQueue(
+                capacity=10,
+                dtypes=[tf.string])
+            
+            reader = tf.TFRecordReader()
+            _, example_serialized = reader.read(filename_queue)
+
+            results = []
+
+            dm, pose, name = self.parse_example(example_serialized)
+            if preprocess_op != None:
+                result = preprocess_op(dm, pose, self.cfg)
+                results.append(list(result)+[name])
+            else:
+                results.append([dm, pose, name])
+
+            batch = tf.train.batch_join(
+                results, batch_size=batch_size, capacity=2)
+            return batch
+
+    def parse_example(self, example_serialized):
+        raise NotImplementedError
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/evaluation.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab7958393f39c03db7312ffa85394e8c61cfc088
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/evaluation.py
@@ -0,0 +1,109 @@
+import matplotlib.pyplot as plt
+import numpy.linalg as alg
+
+class Evaluation(object):
+    def __init__(self):
+        pass
+
+    @classmethod
+    def maxJntError(cls_obj, skel1, skel2):
+        diff = skel1.reshape(-1,3) - skel2.reshape(-1,3)
+        diff = alg.norm(diff, axis=1)
+        return diff.max() 
+
+    @classmethod
+    def meanJntError(cls_obj, skel1, skel2):
+        diff = skel1.reshape(-1,3) - skel2.reshape(-1,3)
+        diff = alg.norm(diff, axis=1)
+        return diff.mean() 
+
+    
+    @classmethod
+    def averageMaxJntError(cls_obj, score_list):
+        score_list = sorted(score_list)
+
+        th_idx = 0
+        for i in range(0, len(score_list)):
+            if(score_list[i]<=10.5):
+                th_idx += 1
+        print '10mm percentage: %f'%(float(th_idx)/len(score_list))
+
+        th_idx = 0
+        for i in range(0, len(score_list)):
+            if(score_list[i]<=20.5):
+                th_idx += 1
+        print '20mm percentage: %f'%(float(th_idx)/len(score_list))
+
+        th_idx = 0
+        for i in range(0, len(score_list)):
+            if(score_list[i]<=30.5):
+                th_idx += 1
+        print '30mm percentage: %f'%(float(th_idx)/len(score_list))
+
+        th_idx = 0
+        for i in range(0, len(score_list)):
+            if(score_list[i]<=40.5):
+                th_idx += 1
+        print '40mm percentage: %f'%(float(th_idx)/len(score_list))
+
+        thresh_list = [thresh*5.0+0.5 for thresh in range(0, 17)]
+        precent_list = [1]*len(thresh_list)
+
+        cur_score_idx = 0
+        for i in range(0, len(thresh_list)):
+            th_idx = 0
+            for j in range(0, len(score_list)):
+                if(score_list[j]<thresh_list[i]):
+                    th_idx += 1
+            precent_list[i] = float(th_idx) / len(score_list)
+
+        return (thresh_list, precent_list)
+
+    @classmethod
+    def plotError(cls_obj, score_list, fig_path):
+        score_list = sorted(score_list)
+
+        th_idx = 0
+        for i in range(0, len(score_list)):
+            if(score_list[i]<=10.5):
+                th_idx += 1
+        print '10mm percentage: %f'%(float(th_idx)/len(score_list))
+
+        th_idx = 0
+        for i in range(0, len(score_list)):
+            if(score_list[i]<=20.5):
+                th_idx += 1
+        print '20mm percentage: %f'%(float(th_idx)/len(score_list))
+
+        th_idx = 0
+        for i in range(0, len(score_list)):
+            if(score_list[i]<=30.5):
+                th_idx += 1
+        print '30mm percentage: %f'%(float(th_idx)/len(score_list))
+
+        th_idx = 0
+        for i in range(0, len(score_list)):
+            if(score_list[i]<=40.5):
+                th_idx += 1
+        print '40mm percentage: %f'%(float(th_idx)/len(score_list))
+
+        thresh_list = [thresh*5.0+0.5 for thresh in range(0, 17)]
+        precent_list = [1]*len(thresh_list)
+
+        cur_score_idx = 0
+        for i in range(0, len(thresh_list)):
+            th_idx = 0
+            for j in range(0, len(score_list)):
+                if(score_list[j]<thresh_list[i]):
+                    th_idx += 1
+            precent_list[i] = float(th_idx) / len(score_list)
+
+        f = open(fig_path, 'w')
+        for thresh, p in zip(thresh_list, precent_list):
+            f.write('%f %f\n'%(thresh, p*100.))
+
+        # plt.clf();
+        # plt.plot(thresh_list, precent_list, '-', color='b')
+        # plt.grid()
+        # plt.savefig(fig_path)
+
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/icvl.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/icvl.py
new file mode 100644
index 0000000000000000000000000000000000000000..563054bfe584f2130b4ed4d9d309bc16216d11be
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/icvl.py
@@ -0,0 +1,264 @@
+from __future__ import print_function, division, absolute_import
+
+from data.dataset_base import *
+from data.dataset_base import _float_feature, _bytes_feature
+import data.dataset_base
+import scipy.io as sio
+from data.preprocess import crop_from_xyz_pose, crop_from_bbx, center_of_mass
+from data.util import uvd2xyz, xyz2uvd 
+Annotation = data.dataset_base.Annotation
+
+class IcvlDataset(BaseDataset):
+    cfg = CameraConfig(fx=241.42, fy=241.42, cx=160, cy=120, w=320, h=240)
+    approximate_num_per_file = 220 
+    name = 'icvl'
+    max_depth = 500.0
+    pose_dim = 48
+    jnt_num = 16
+    
+    '''nyu hand dataset contrains train, test sub-directories, both with annotated joint
+    '''
+    directory = './exp/data/icvl/'
+
+    def __init__(self, subset):
+        if not subset in set(['training', 'training_small', 'validation', 'testing']):
+            raise ValueError('unknown sub %s set to ICVL hand datset'%subset)
+        super(IcvlDataset, self).__init__(subset)
+
+        if subset in set(['training', 'training_small', 'validation']):
+            self.src_dir = os.path.join(self.directory, 'Training')
+            assert os.path.exists(self.src_dir)
+            self.img_dir = os.path.join(self.src_dir, 'Depth')
+            self.tf_dir = os.path.join(self.directory, 'tf_train')
+
+        elif subset == 'testing':
+            self.src_dir = os.path.join(self.directory, 'Testing')
+            assert os.path.exists(self.src_dir)
+            self.img_dir = os.path.join(self.src_dir, 'Depth')
+            self.tf_dir = os.path.join(self.directory, 'tf_test')
+
+
+    @property
+    def annotations(self):
+        return self._annotations
+
+    @property
+    def is_train(self):
+        return True
+        if self.subset == 'training' or self.subset == 'training_small':
+            return True
+        else:
+            return False
+
+    @property
+    def filenames(self):
+        if self.subset == 'training':
+            files = [os.path.join(self.tf_dir,'training-%d-of-100'%i) for i in range(100)]
+            files += [files[-1]]
+            print('[data.IcvlDataset] total files for training=%d'%len(files))
+            return files
+        elif self.subset == 'training_small':
+            files = [os.path.join(self.tf_dir,'training-%d-of-100'%i) for i in range(10)]
+            files = [f for idx, f in enumerate(files) if idx%10==0]
+            print('[data.IcvlDataset] total files for training=%d'%len(files))
+            return files
+        elif self.subset == 'validation':
+            files = [os.path.join(self.tf_dir,'training-%d-of-100'%i) for i in range(10)]
+            files = [f for idx, f in enumerate(files) if idx%21==0]
+            print('[data.IcvlDataset] total files for training=%d'%len(files))
+            return files
+        elif self.subset == 'testing':
+            files = [os.path.join(self.tf_dir,'testing-%d-of-4'%i) for i in range(4)]
+            files += [files[-1]]
+            print('[data.IcvlDataset] total files for testing=%d'%len(files))
+            return files
+
+    @property
+    def approximate_num(self):
+        return self.approximate_num_per_file*len(self.filenames)
+
+    @property
+    def exact_num(self):
+        if self.subset in set(['training', 'training_small', 'validation']):
+            return self.approximate_num
+        elif self.subset == 'testing':
+            return 1596 
+
+    '''compress part: 
+     to convert initial dataset TFRecord files
+    '''
+    def loadAnnotation(self):
+        t1 = time.time()
+        path = os.path.join(self.src_dir, 'labels')
+
+        if os.path.exists(path+'.pkl'): 
+            with open(path+'.pkl', 'rb') as f:
+                t1 = time.time()
+                self._annotations = cPickle.load(f)
+        else:
+            print('[data.icvl] pkl files does not exist, need to load from txt file')
+            with open(path+'.txt', 'r') as f:
+                t1 = time.time()
+                self._annotations = []
+                for line in f:
+                    if self.is_train and not line.startswith('2014'):
+                        continue
+                    buf = line.split()
+                    name = buf[0]
+                    pose = np.array([float(d) for d in buf[1:]])
+                    pose = np.reshape(uvd2xyz(pose, self.cfg), (-1,)).tolist()
+                    self._annotations.append(Annotation(name, pose))
+
+                path = os.path.join(self.src_dir, 'labels.pkl')
+                with open(path, 'wb') as f:
+                    cPickle.dump(self._annotations, f, protocol=cPickle.HIGHEST_PROTOCOL)
+
+        print('[data.icvl]annotation has been loaded with %d samples, %fs'%\
+              (len(self._annotations), time.time()-t1))
+
+    def convert_to_example(self, label):
+        img_path = os.path.join(self.img_dir, label.name)
+        with tf.gfile.FastGFile(img_path, 'r') as f:
+            img_data = f.read()
+
+        example = tf.train.Example(features=tf.train.Features(feature={
+            'name':_bytes_feature(label.name),
+            'xyz_pose':_float_feature(label.pose),
+            'png16':_bytes_feature(img_data)}))
+        return example
+
+    # decoding part for batch iteration interface
+    def parse_example(self, example_serialized):
+        feature_map = {
+            'name': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
+            'xyz_pose': tf.FixedLenFeature([self.pose_dim], dtype=tf.float32),
+            'png16': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
+        }
+        features = tf.parse_single_example(example_serialized, feature_map)
+        image = tf.image.decode_png(features['png16'], channels=1, dtype=tf.uint16)
+        image = tf.to_float(image)
+        pose = features['xyz_pose']
+        image = tf.reshape(image, [self.cfg.h,self.cfg.w,1])
+        name = features['name']
+        return image, pose, name 
+
+    def preprocess_op(self, input_width, input_height):
+        def preprocess_op(dm, pose, cfg):
+            dm, pose, cfg = crop_from_xyz_pose(dm, pose, cfg, input_width, input_height)
+            com = center_of_mass(dm, cfg) 
+            return [dm, pose, cfg, com]
+        return preprocess_op
+
+def saveTFRecord():
+    # reader = IcvlDataset('training')
+    # reader.write_TFRecord_multi_thread(num_threads=20, num_shards=100)
+
+    reader = IcvlDataset('testing')
+    reader.write_TFRecord_multi_thread(num_threads=2, num_shards=4)
+
+def run_check_record():
+    import data.util
+    import matplotlib.pyplot as plt
+
+    dataset = IcvlDataset('training')
+
+    dms, poses = dataset.get_batch_op(batch_size=10,
+                                     num_readers=4,
+                                     num_preprocess_threads=1,
+                                     preprocess_op=None)
+    cfg = dataset.cfg
+    def fn(poses):
+        return data.util.xyz2uvd_op(poses, cfg)
+    uvd_poses = tf.map_fn(fn, poses) 
+    dms = tf.squeeze(dms, axis=-1)
+    
+    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
+    print('computational graph established')
+
+    with tf.Session() as sess:
+        sess.run(init_op)
+        coord = tf.train.Coordinator()
+        threads = tf.train.start_queue_runners(coord=coord)
+
+        dm_val, uvd_val = sess.run([dms, uvd_poses])
+    coord.request_stop()
+    coord.join(threads)
+    print('ok with tf part')
+
+    print(uvd_val.shape)
+    print(dm_val.shape)
+
+    for p, d in zip(uvd_val, dm_val):
+        plt.clf()
+        plt.imshow(d)
+        p = p.reshape((-1,3))
+        plt.scatter(p[:,0], p[:,1], c='r')
+        plt.show()
+
+def run_preprocess():
+    import data.util
+    import matplotlib.pyplot as plt
+
+    dataset = IcvlDataset('testing')
+
+    dms, poses, cfgs, coms = dataset.get_batch_op(
+        batch_size=10,
+        num_readers = 2,
+        num_preprocess_threads = 2,
+        preprocess_op=dataset.preprocess_op(128, 128))
+    orig_dm = tf.squeeze(dms, axis=-1)
+    orig_pose = poses
+    dms, poses = data.preprocess.data_aug(dms, poses, cfgs, coms)
+
+    normed_dms = data.preprocess.norm_dm(dms, coms)
+    normed_dms = tf.squeeze(normed_dms, axis=-1)
+    norm_poses = data.preprocess.norm_xyz_pose(poses, coms, None)
+
+    def fn(elems):
+        return [data.util.xyz2uvd_op(elems[0], CameraConfig(*tf.unstack(elems[1],axis=0))), elems[0]]
+    uvd_poses,_ = tf.map_fn(fn, [poses, cfgs])
+
+    hms, dcs = data.preprocess.pose_sync(norm_poses, normed_dms, cfgs, coms, 128, 128)
+    
+    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
+    print('computational graph established')
+
+    with tf.Session() as sess:
+        sess.run(init_op)
+        coord = tf.train.Coordinator()
+        threads = tf.train.start_queue_runners(coord=coord)
+
+        [normed_dms_val, orig_dm_val, coms_val, pose_val, orig_pose_val, dc_val] = sess.run(
+            [normed_dms, orig_dm, coms, uvd_poses, orig_pose, dcs])
+
+    coord.request_stop()
+    coord.join(threads)
+    print('ok with tf part')
+
+    for d, od, c, p, op, dc in zip(normed_dms_val, orig_dm_val, coms_val, pose_val, orig_pose_val, dc_val):
+        print('c_z', c)
+        print('op', op)
+        print('rp', p)
+        print('---')
+
+        fig = plt.figure()
+        ax = fig.add_subplot(131)
+        ax.imshow(dc[:,:,0])
+
+        ax = fig.add_subplot(132)
+        ax.set_title('unexplained_map')
+        ax.imshow(dc[:,:,-1])
+
+        ax = fig.add_subplot(133)
+        ax.set_title('dm')
+        ax.imshow(d)
+        # p = p.reshape((-1,3))
+        # plt.scatter(p[:,0], p[:,1], c='r')
+        plt.show()
+
+
+
+if __name__ == '__main__':
+    # saveTFRecord()
+    # run_check_record()
+    run_preprocess()
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/msra.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/msra.py
new file mode 100644
index 0000000000000000000000000000000000000000..743dda31da21c462503d65291a7ce53c182cb621
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/msra.py
@@ -0,0 +1,219 @@
+from __future__ import print_function, division, absolute_import
+
+from data.dataset_base import *
+from data.dataset_base import _float_feature, _bytes_feature
+import data.dataset_base
+import scipy.io as sio
+from data.preprocess import crop_from_xyz_pose, crop_from_bbx, center_of_mass
+from data.util import uvd2xyz, xyz2uvd 
+Annotation = data.dataset_base.Annotation
+import cv2, struct, numpy as np
+
+class MsraDataset(BaseDataset):
+    cfg = CameraConfig(fx=241.42, fy=241.42, cx=160, cy=120, w=320, h=240)
+    approximate_num_per_file = 85 
+    max_depth = 1000.0
+    pose_dim = 63 
+    jnt_num = 21
+    pose_list = '1 2 3 4 5 6 7 8 9 I IP L MP RP T TIP Y'.split()
+    
+    '''nyu hand dataset contrains train, test sub-directories, both with annotated joint
+    '''
+    directory = './exp/data/msra15/'
+
+    def __init__(self, subset, pid):
+        if not subset in set(['training', 'testing']):
+            raise ValueError('unknown sub %s set to MSRA hand datset'%subset)
+        super(MsraDataset, self).__init__(subset)
+
+        self.src_dir = os.path.join(self.directory, 'P%d'%pid)
+        assert os.path.exists(self.src_dir)
+        self.img_dir = self.src_dir
+        self.tf_dir = os.path.join(self.directory, 'tf')
+        self.pid = pid
+        self.name = 'msra_P%d'%(self.pid)
+
+    @property
+    def annotations(self):
+        return self._annotations
+
+    @property
+    def is_train(self):
+        return True
+        if self.subset == 'training' or self.subset == 'training_small':
+            return True
+        else:
+            return False
+
+    @property
+    def filenames(self):
+        if self.subset == 'training':
+            files = []
+            for pid in range(9):
+                if pid == self.pid:
+                    continue
+                files += [os.path.join(self.tf_dir,'P%d-%d-of-100'%(self.pid, i)) for i in range(100)]
+            files += [files[-1]]
+            print('[data.MsraDataset] total files for training=%d'%len(files))
+            return files
+
+        elif self.subset == 'testing':
+            files = [os.path.join(self.tf_dir,'P%d-%d-of-100'%(self.pid, i)) for i in range(100)]
+            files += [files[-1]]
+            print('[data.MsraDataset] total files for testing=%d'%len(files))
+            return files
+
+    @property
+    def approximate_num(self):
+        return self.approximate_num_per_file*len(self.filenames)
+
+    pid_num = [8499, 8492, 8412, 8488, 8500, 8497, 8497, 8498, 8492]
+    @property
+    def exact_num(self):
+        if self.subset in set(['training', 'training_small', 'validation']):
+            return self.approximate_num
+        elif self.subset == 'testing':
+            return self.pid_num[self.pid] 
+
+    '''compress part: 
+     to convert initial dataset TFRecord files
+    '''
+    def loadAnnotation(self):
+        t1 = time.time()
+        path = os.path.join(self.src_dir, 'labels.pkl')
+
+        if os.path.exists(path): 
+        # if False:
+            with open(path, 'rb') as f:
+                t1 = time.time()
+                self._annotations = cPickle.load(f)
+        else:
+            print('[data.msra] pkl files does not exist, need to load from txt file')
+            self._annotations = []
+            t1 = time.time()
+            for pose_name in self.pose_list:
+                path = os.path.join(self.src_dir, pose_name, 'joint.txt')
+                with open(path, 'r') as f:
+                    for frmIdx, line in enumerate(f):
+                        if frmIdx == 0:
+                            continue
+
+                        buf = line.split()
+                        name = os.path.join(pose_name, '%06i_depth'%(frmIdx-1)) 
+                        pose = []
+                        for idx, d in enumerate(buf):
+                            if idx%3 == 0:
+                                pose.append(float(d))
+                            elif idx%3 == 1:
+                                pose.append(-float(d))
+                            elif idx%3 == 2:
+                                pose.append(-float(d))
+                        self._annotations.append(Annotation(name, pose))
+
+            path = os.path.join(self.src_dir, 'labels.pkl')
+            with open(path, 'wb') as f:
+                cPickle.dump(self._annotations, f, protocol=cPickle.HIGHEST_PROTOCOL)
+
+        print('[data.msra]annotation has been loaded with %d samples, %fs'%\
+              (len(self._annotations), time.time()-t1))
+
+    def cvtBin2Png(self):
+        MSRA_size = namedtuple('MSRA_size', ['cols', 'rows', 'left', 'top', 'right', 'bottom'])
+
+        prevDmData = None
+        self.loadAnnotation()
+        for idx, anno in enumerate(self._annotations):
+            path = os.path.join(self.img_dir, anno.name+'.bin')
+            with open(path, 'rb') as f:
+                shape = [struct.unpack('i', f.read(4))[0] for i in range(6)]
+                shape = MSRA_size(*shape)
+                cropDmData = np.fromfile(f, dtype=np.float32)
+
+            crop_rows, crop_cols = shape.bottom - shape.top, shape.right - shape.left
+            cropDmData = cropDmData.reshape(crop_rows, crop_cols)
+
+            # expand the cropped dm to full-size make later process in a uniformed way
+            dmData = np.zeros((shape.rows, shape.cols), np.float32)
+            np.copyto(dmData[shape.top:shape.bottom, shape.left:shape.right], cropDmData)
+
+            # for empty image, just copy the previous frame
+            if dmData.sum() < 10:
+                print('[warning] %s is empty'%anno.name)
+                if prevDmData is not None:
+                    dmData = prevDmData
+            prevDmData = dmData.copy()
+
+            path = os.path.join(self.img_dir, anno.name+'.png')
+            cv2.imwrite(path, dmData.astype('uint16'))
+            if idx%500 == 0:
+                print('%d frames has been converted'%idx)
+
+    def write_TFRecord_single_thread(self, thread_idx, thread_range, num_shards_per_thread):
+        print('[P%d] Launching thread %d, with files from %d to %d'%(self.pid, thread_idx, thread_range[0], thread_range[1]))
+        sys.stdout.flush()
+        spacing = np.linspace(thread_range[0], thread_range[1], num_shards_per_thread+1).astype(np.int)
+
+        shard_range = []
+        for idx in range(num_shards_per_thread):
+            shard_range.append((spacing[idx], spacing[idx+1]))
+
+        if not hasattr(self, 'num_shards'):
+            '''in case of single thread
+            '''
+            self.num_shards = num_shards_per_thread
+
+        for curr_shard_idx, shard in enumerate(shard_range):
+            file_idx = thread_idx*num_shards_per_thread + curr_shard_idx
+            file_name = 'P%d-%d-of-%d'%(self.pid, file_idx, self.num_shards)
+            file_path = os.path.join(self.tf_dir, file_name)
+            print('[Thread %d] begin processing %d - %d images, to %s'%(
+                thread_idx,shard[0],shard[1],file_path))
+            t1 = time.time()
+            sys.stdout.flush()
+            self.saveSampleToRecord(range(shard[0], shard[1]), file_path)
+            t2 = time.time()
+            print('[Thread {}]end at ={}, with {}s'.format(thread_idx, datetime.now(), t2-t1))
+
+    def convert_to_example(self, label):
+        img_path = os.path.join(self.img_dir, label.name+'.png')
+        with tf.gfile.FastGFile(img_path, 'r') as f:
+            img_data = f.read()
+
+        example = tf.train.Example(features=tf.train.Features(feature={
+            'name':_bytes_feature(label.name),
+            'xyz_pose':_float_feature(label.pose),
+            'png16':_bytes_feature(img_data)}))
+        return example
+
+    # decoding part for batch iteration interface
+    def parse_example(self, example_serialized):
+        feature_map = {
+            'name': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
+            'xyz_pose': tf.FixedLenFeature([self.pose_dim], dtype=tf.float32),
+            'png16': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
+        }
+        features = tf.parse_single_example(example_serialized, feature_map)
+        image = tf.image.decode_png(features['png16'], channels=1, dtype=tf.uint16)
+        image = tf.to_float(image)
+        pose = features['xyz_pose']
+        image = tf.reshape(image, [self.cfg.h,self.cfg.w,1])
+        name = features['name']
+        return image, pose, name 
+
+    def preprocess_op(self, input_width, input_height):
+        def preprocess_op(dm, pose, cfg):
+            dm, pose, cfg = crop_from_xyz_pose(dm, pose, cfg, input_width, input_height)
+            com = center_of_mass(dm, cfg) 
+            return [dm, pose, cfg, com]
+        return preprocess_op
+
+def saveTFRecord():
+    for p in range(9):
+        reader = MsraDataset('training', p)
+        reader.loadAnnotation()
+        reader.cvtBin2Png()
+        reader.write_TFRecord_multi_thread(num_threads=20, num_shards=100)
+
+if __name__ == '__main__':
+    saveTFRecord()
+
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/nyu.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/nyu.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc586195b13d204492f19e5a8ecceec8c4fca2dc
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/nyu.py
@@ -0,0 +1,310 @@
+from __future__ import print_function, division, absolute_import
+
+from data.dataset_base import *
+from data.dataset_base import _float_feature, _bytes_feature
+import data.dataset_base
+import scipy.io as sio
+import cPickle
+from data.preprocess import crop_from_xyz_pose, crop_from_bbx, center_of_mass
+
+Annotation = namedtuple('Annotation', 'name,pose,bbx')
+
+class NyuDataset(BaseDataset):
+    cfg = CameraConfig(fx=588.235, fy=587.084, cx=320, cy=240, w=640, h=480)
+    approximate_num_per_file = 730 
+    name = 'nyu'
+    max_depth = 1500.0
+    
+    '''nyu hand dataset contrains train, test sub-directories, both with annotated joint
+    '''
+    # directory = './exp/data/nyu/'
+    directory = '/srv/glusterfs/wanc/data/nyu/'
+
+    def __init__(self, subset):
+        if not subset in set(['training', 'training_small', 'validation', 'testing']):
+            raise ValueError('unknown sub %s set to NYU hand datset'%subset)
+        super(NyuDataset, self).__init__(subset)
+
+        if subset in set(['training', 'training_small', 'validation']):
+            self.src_dir = os.path.join(self.directory, 'dataset/train')
+            assert os.path.exists(self.src_dir)
+            self.img_dir = self.src_dir
+            self.tf_dir = os.path.join(self.directory, 'tf_train')
+
+        elif subset == 'testing':
+            self.src_dir = os.path.join(self.directory, 'dataset/test')
+            assert os.path.exists(self.src_dir)
+            self.img_dir = self.src_dir
+            self.tf_dir = os.path.join(self.directory, 'tf_test')
+
+        l = [0,3,6,9,12,15,18,21,24,25,27,30,31,32]
+        keep_pose_idx = [[ll*3, ll*3+1, ll*3+2] for ll in l]
+        self.keep_pose_idx = np.array([idx for sub_idx in keep_pose_idx for idx in sub_idx]).reshape((-1,1))
+        self.orig_pose_dim = 108
+        self.pose_dim = len(self.keep_pose_idx)
+        self.jnt_num = int(self.pose_dim/3)
+        print('[NyuDataset] we only keep %d joints, with %d dim'%(self.jnt_num, self.pose_dim))
+
+    @property
+    def annotations(self):
+        return self._annotations
+
+    @property
+    def is_train(self):
+        return True
+        if self.subset == 'training' or self.subset == 'training_small':
+            return True
+        else:
+            return False
+
+    @property
+    def filenames(self):
+        if self.subset == 'training':
+            files = [os.path.join(self.tf_dir,'training-%d-of-300'%i) for i in range(100)]
+            files += [files[-1]]
+            print('[data.NyuDataset] total files for training=%d'%len(files))
+            return files
+        elif self.subset == 'training_small':
+            files = [os.path.join(self.tf_dir,'training-%d-of-300'%i) for i in range(30)]
+            files = [f for idx, f in enumerate(files) if idx%10==0]
+            print('[data.NyuDataset] total files for training=%d'%len(files))
+            return files
+        elif self.subset == 'validation':
+            files = [os.path.join(self.tf_dir,'training-%d-of-300'%i) for i in range(100)]
+            files = [f for idx, f in enumerate(files) if idx%21==0]
+            print('[data.NyuDataset] total files for training=%d'%len(files))
+            return files
+        elif self.subset == 'testing':
+            files = [os.path.join(self.tf_dir,'testing-%d-of-16'%i) for i in range(16)]
+            files += [files[-1]]
+            print('[data.NyuDataset] total files for testing=%d'%len(files))
+            return files
+
+    @property
+    def approximate_num(self):
+        return self.approximate_num_per_file*len(self.filenames)
+
+    @property
+    def exact_num(self):
+        if self.subset in set(['training', 'training_small', 'validation']):
+            return self.approximate_num
+        elif self.subset == 'testing':
+            return 8252
+
+    '''compress part: 
+     to convert initial dataset TFRecord files
+    '''
+    def loadAnnotation(self, is_trun=False):
+        '''is_trun: 
+            True: to load 14 joints from self.keep_list
+            False: to load all joints
+        '''
+        t1 = time.time()
+        path = os.path.join(self.src_dir, 'joint_data.mat')
+        mat = sio.loadmat(path)
+        camera_num = 1 if self.subset=='testing' else 3
+        joints = [mat['joint_xyz'][idx] for idx in range(camera_num)]
+        names = [['depth_{}_{:07d}.png'.format(camera_idx+1, idx+1) for idx in range(len(joints[camera_idx]))] for camera_idx in range(camera_num)]
+
+        if self.subset == 'testing':
+            with open('data/nyu_bbx.pkl', 'rb') as f:
+                bbxes = [cPickle.load(f)]
+
+        self._annotations = []
+        if self.subset == 'testing':
+            for c_j, c_n, c_b in zip(joints, names, bbxes):
+                for j, n, b in zip(c_j, c_n, c_b):
+                    j = j.reshape((-1,3))
+                    j[:,1] *= -1.0
+                    j = j.reshape((-1,))
+                    if is_trun:
+                        j = j[self.keep_pose_idx]
+                    b = np.asarray(b).reshape((-1,))
+                    self._annotations.append(Annotation(n, j.reshape((-1,)), b))
+        else:
+            for c_j, c_n in zip(joints, names):
+                for j, n in zip(c_j, c_n):
+                    j = j.reshape((-1,3))
+                    j[:,1] *= -1.0
+                    j = j.reshape((-1,))
+                    if is_trun:
+                        j = j[self.keep_pose_idx]
+                    self._annotations.append(Annotation(n, j.reshape((-1,)), None))
+
+        print('[data.NyuDataset]annotation has been loaded with %d samples, %fs'%\
+              (len(self._annotations), time.time()-t1))
+
+    def loadImage(self, idxes):
+        ''' directly load images and annotations from the source directory
+        '''
+        dms, poses = [], []
+        for idx in idxes:
+            ann = self._annotations[idx]
+            img_path = os.path.join(self.img_dir, ann.name)
+            dms.append(cv2.imread(img_path,-1))
+            poses.append(ann.pose)
+        return dms, poses
+
+    def _decode_png(self, img_data):
+        image = tf.image.decode_png(img_data, channels=3, dtype=tf.uint8)
+        image = tf.reshape(image, (self.cfg.h, self.cfg.w, 3))
+
+        _,g,b = tf.unstack(image, axis=-1)
+        g,b = tf.cast(g, tf.uint16), tf.cast(b, tf.uint16)
+        g = tf.multiply(g, 256) # left shift with 8 bits
+        d = tf.expand_dims(tf.bitwise.bitwise_or(g, b), -1)
+        return tf.to_float(d)
+
+    def convert_to_example(self, label):
+        img_path = os.path.join(self.img_dir, label.name)
+        with tf.gfile.FastGFile(img_path, 'r') as f:
+            img_data = f.read()
+            self._decode_png(img_data)
+
+        if self.subset == 'testing':
+            example = tf.train.Example(features=tf.train.Features(feature={
+                'name':_bytes_feature(label.name),
+                'xyz_pose':_float_feature(label.pose),
+                'bbx':_float_feature(label.bbx),
+                'png16':_bytes_feature(img_data)}))
+        else:
+            example = tf.train.Example(features=tf.train.Features(feature={
+                'name':_bytes_feature(label.name),
+                'xyz_pose':_float_feature(label.pose),
+                'png16':_bytes_feature(img_data)}))
+        return example
+
+    # decoding part for batch iteration interface
+    def parse_example(self, example_serialized):
+        feature_map = {
+            'name': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
+            'xyz_pose': tf.FixedLenFeature([self.orig_pose_dim], dtype=tf.float32),
+            'png16': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
+        }
+        features = tf.parse_single_example(example_serialized, feature_map)
+        image = self._decode_png(features['png16'])
+        pose = features['xyz_pose']
+        pose = tf.gather_nd(pose, self.keep_pose_idx)
+        image = tf.reshape(image, [self.cfg.h,self.cfg.w,1])
+        name = features['name']
+        return image, pose, name 
+
+    def parse_example_test(self, example_serialized):
+        feature_map = {
+            'name': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
+            'xyz_pose': tf.FixedLenFeature([self.orig_pose_dim], dtype=tf.float32),
+            'bbx': tf.FixedLenFeature([5], dtype=tf.float32),
+            'png16': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
+        }
+        features = tf.parse_single_example(example_serialized, feature_map)
+        image = self._decode_png(features['png16'])
+        pose = features['xyz_pose']
+        pose = tf.gather_nd(pose, self.keep_pose_idx)
+        bbx = features['bbx']
+        image = tf.reshape(image, [self.cfg.h,self.cfg.w,1])
+        name = features['name']
+        return image, pose, bbx, name 
+
+    def preprocess_op(self, input_width, input_height):
+        if self.subset == 'testing':
+            def preprocess_op(dm, pose, bbx, cfg):
+                dm, pose, cfg = crop_from_bbx(dm, pose, bbx, cfg, input_width, input_height)
+                com = center_of_mass(dm, cfg) 
+                return [dm, pose, cfg, com]
+            return preprocess_op
+        else: 
+            def preprocess_op(dm, pose, cfg):
+                dm, pose, cfg = crop_from_xyz_pose(dm, pose, cfg, input_width, input_height)
+                com = center_of_mass(dm, cfg) 
+                return [dm, pose, cfg, com]
+            return preprocess_op
+
+    def get_batch_op_test(self, batch_size, preprocess_op=None):
+        '''return the operation on tf graph of 
+        iteration over the given dataset
+        '''
+        with tf.name_scope('batch_processing'):
+            min_queue_examples = 1 
+
+            filename_queue = tf.train.string_input_producer(
+                self.filenames, num_epochs=1, capacity=1, shuffle=False)
+            example_queue = tf.FIFOQueue(
+                capacity=10,
+                dtypes=[tf.string])
+            
+            reader = tf.TFRecordReader()
+            _, example_serialized = reader.read(filename_queue)
+
+            results = []
+
+            dm, pose, bbx, name = self.parse_example_test(example_serialized)
+            if preprocess_op != None:
+                result = preprocess_op(dm, pose, bbx, self.cfg)
+                results.append(list(result)+[name])
+            else:
+                results.append([dm, pose, name])
+
+            batch = tf.train.batch_join(
+                results, batch_size=batch_size, capacity=2)
+            return batch
+
+    def get_batch_op(self, 
+                     batch_size, num_readers=1, num_preprocess_threads=1, 
+                     preprocess_op=None,
+                     is_train=None):
+        '''return the operation on tf graph of 
+        iteration over the given dataset
+        '''
+        if self.subset == 'testing': 
+            with tf.name_scope('batch_processing'):
+                min_queue_examples = batch_size*1 
+
+                filename_queue = tf.train.string_input_producer(
+                    self.filenames, capacity=32, shuffle=True)
+
+                example_queue = tf.RandomShuffleQueue(
+                    capacity=self.approximate_num_per_file*8 + 3*batch_size,
+                    min_after_dequeue=self.approximate_num_per_file*8,
+                    dtypes=[tf.string])
+                
+                if num_readers > 1:
+                    enqueue_ops = []
+                    for _ in range(num_readers):
+                        reader = tf.TFRecordReader()
+                        _, value = reader.read(filename_queue)
+                        enqueue_ops.append(example_queue.enqueue([value]))
+
+                    tf.train.queue_runner.add_queue_runner(
+                        tf.train.queue_runner.QueueRunner(example_queue, enqueue_ops))
+                    example_serialized = example_queue.dequeue()
+                else:
+                    reader = tf.TFRecordReader()
+                    _, example_serialized = reader.read(filename_queue)
+
+                results = []
+                for thread_idx in range(num_preprocess_threads):
+                    dm, pose, bbx, name = self.parse_example_test(example_serialized)
+                    if preprocess_op != None:
+                        result = preprocess_op(dm, pose, bbx, self.cfg)
+                        results.append(list(result))
+                    else:
+                        results.append([dm, pose])
+                batch = tf.train.batch_join(
+                    results, batch_size=batch_size, capacity=2*num_preprocess_threads*batch_size)
+                return batch
+        else:
+            return super(NyuDataset, self).get_batch_op(batch_size, 
+                                                        num_readers, 
+                                                        num_preprocess_threads,
+                                                        preprocess_op,
+                                                        is_train)
+
+def saveTFRecord():
+    # reader = NyuDataset('training')
+    # reader.write_TFRecord_multi_thread(num_threads=30, num_shards=300)
+
+    reader = NyuDataset('testing')
+    reader.write_TFRecord_multi_thread(num_threads=16, num_shards=16)
+
+if __name__ == '__main__':
+    saveTFRecord()
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/nyu_bbx.pkl b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/nyu_bbx.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..944d67e56d04480da5065707b7a53e4d63583f23
Binary files /dev/null and b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/nyu_bbx.pkl differ
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/preprocess.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..46425e94c2f59cb9fb2c11fb3a102872ec6dd4de
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/preprocess.py
@@ -0,0 +1,268 @@
+from __future__ import print_function, absolute_import, division
+
+import gpu_config
+import numpy as np
+import tensorflow as tf
+import data.util
+from data.util import xyz2uvd_op, uvd2xyz_op, heatmap_from_xyz_op, CameraConfig
+FLAGS = tf.app.flags.FLAGS
+
+def crop_from_xyz_pose(dm, pose, cfg, out_w, out_h, pad=20.0):
+    '''crop depth map by generate the bounding box according to the pose
+    Args:
+        dms: depth map
+        poses: either estimated or groundtruth in xyz coordinate
+        cfg: the initial camera configuration
+        out_w: output width
+        out_h: output height
+    Returns:
+        crop_dm: the cropped depth map
+        new_cfg: the new camera configuration for the cropped depth map
+    '''
+    with tf.name_scope('crop'):
+        # determine bouding box from pose
+        in_h, in_w = dm.get_shape()[0].value, dm.get_shape()[1].value
+        uvd_pose = tf.reshape(xyz2uvd_op(pose,cfg), (-1,3))
+        min_coor = tf.reduce_min(uvd_pose, axis=0)
+        max_coor = tf.reduce_max(uvd_pose, axis=0)
+
+        top = tf.minimum(tf.maximum(min_coor[1]-pad, 0.0), cfg.h-2*pad)
+        left = tf.minimum(tf.maximum(min_coor[0]-pad, 0.0), cfg.w-2*pad)
+        bottom = tf.maximum(tf.minimum(max_coor[1]+pad, cfg.h), tf.cast(top, tf.float32)+2*pad-1)
+        right = tf.maximum(tf.minimum(max_coor[0]+pad, cfg.w), tf.cast(left, tf.float32)+2*pad-1)
+
+        top = tf.cast(top, tf.int32)
+        left = tf.cast(left, tf.int32)
+        bottom = tf.cast(bottom, tf.int32)
+        right = tf.cast(right, tf.int32)
+
+        cropped_dm = tf.image.crop_to_bounding_box(dm,
+                                                  offset_height=top,
+                                                  offset_width=left,
+                                                  target_height=bottom-top,
+                                                  target_width=right-left)
+
+        longer_edge = tf.maximum(bottom-top, right-left)
+        offset_height = tf.to_int32(tf.divide(longer_edge-bottom+top, 2))
+        offset_width = tf.to_int32(tf.divide(longer_edge-right+left, 2))
+        cropped_dm = tf.image.pad_to_bounding_box(cropped_dm,
+                                                 offset_height=offset_height,
+                                                 offset_width=offset_width,
+                                                 target_height=longer_edge,
+                                                 target_width=longer_edge)
+        cropped_dm = tf.image.resize_images(cropped_dm, (out_h, out_w))
+
+        # to further earse the background
+        uvd_list = tf.unstack(uvd_pose, axis=-1)
+
+        uu = tf.clip_by_value(tf.to_int32(uvd_list[0]), 0, in_w-1)
+        vv = tf.clip_by_value(tf.to_int32(uvd_list[1]), 0, in_h-1)
+
+        dd = tf.gather_nd(dm, tf.stack([vv,uu], axis=-1))
+        dd = tf.boolean_mask(dd, tf.greater(dd, 100))
+        d_th = tf.reduce_min(dd) + 250.0
+        if FLAGS.dataset == 'icvl':
+            cropped_dm = tf.where(tf.less(cropped_dm,500.0), cropped_dm, tf.zeros_like(cropped_dm))
+        else:
+            cropped_dm = tf.where(tf.less(cropped_dm,d_th), cropped_dm, tf.zeros_like(cropped_dm))
+
+    with tf.name_scope('cfg'):
+        ratio_x = tf.cast(longer_edge/out_w, tf.float32)
+        ratio_y = tf.cast(longer_edge/out_h, tf.float32)
+        top = tf.cast(top, tf.float32)
+        left = tf.cast(left, tf.float32)
+
+        new_cfg = tf.stack([cfg.fx/ratio_x, cfg.fy/ratio_y, 
+                            (cfg.cx-left+tf.to_float(offset_width))/ratio_x, 
+                            (cfg.cy-top+tf.to_float(offset_height))/ratio_y,
+                            tf.cast(out_w,tf.float32), tf.cast(out_h,tf.float32)], axis=0) 
+    return [cropped_dm, pose, new_cfg]
+
+def crop_from_bbx(dm, pose, bbx, cfg, out_w, out_h):
+    '''crop depth map by generate the bounding box according to the pose
+    Args:
+        dms: depth map
+        pose: groundtruth pose for further error evaluation
+        bbx: bounding box 
+        cfg: the initial camera configuration
+        out_w: output width
+        out_h: output height
+    Returns:
+        crop_dm: the cropped depth map
+        new_cfg: the new camera configuration for the cropped depth map
+    '''
+    with tf.name_scope('crop'):
+        top, left, bottom, right, d_th = bbx[0], bbx[1], bbx[2], bbx[3], bbx[4]
+
+        top = tf.cast(top, tf.int32)
+        left = tf.cast(left, tf.int32)
+        bottom = tf.cast(bottom, tf.int32)
+        right = tf.cast(right, tf.int32)
+
+        cropped_dm = tf.image.crop_to_bounding_box(dm,
+                                                  offset_height=top,
+                                                  offset_width=left,
+                                                  target_height=bottom-top,
+                                                  target_width=right-left)
+
+        longer_edge = tf.maximum(bottom-top, right-left)
+        offset_height = tf.to_int32(tf.divide(longer_edge-bottom+top, 2))
+        offset_width = tf.to_int32(tf.divide(longer_edge-right+left, 2))
+        cropped_dm = tf.image.pad_to_bounding_box(cropped_dm,
+                                                 offset_height=offset_height,
+                                                 offset_width=offset_width,
+                                                 target_height=longer_edge,
+                                                 target_width=longer_edge)
+        cropped_dm = tf.image.resize_images(cropped_dm, (out_h, out_w))
+        cropped_dm = tf.where(tf.less(cropped_dm,d_th), cropped_dm, tf.zeros_like(cropped_dm))
+
+    with tf.name_scope('cfg'):
+        ratio_x = tf.cast(longer_edge/out_w, tf.float32)
+        ratio_y = tf.cast(longer_edge/out_h, tf.float32)
+        top = tf.cast(top, tf.float32)
+        left = tf.cast(left, tf.float32)
+
+        new_cfg = tf.stack([cfg.fx/ratio_x, cfg.fy/ratio_y, 
+                            (cfg.cx-left+tf.to_float(offset_width))/ratio_x, 
+                            (cfg.cy-top+tf.to_float(offset_height))/ratio_y,
+                            tf.cast(out_w,tf.float32), tf.cast(out_h,tf.float32)], axis=0) 
+    return [cropped_dm, pose, new_cfg]
+
+def center_of_mass(dm, cfg):
+    shape = tf.shape(dm)
+    c_h, c_w = shape[0], shape[1]
+    ave_u, ave_v = tf.cast(c_w/2, tf.float32), tf.cast(c_h/2, tf.float32)
+    ave_d = tf.reduce_mean(tf.boolean_mask(dm, tf.greater(dm,0)))
+
+    ave_d = tf.maximum(ave_d, 200.0)
+
+    ave_x = (ave_u-cfg[2])*ave_d/cfg[0]
+    ave_y = (ave_v-cfg[3])*ave_d/cfg[1]
+    ave_xyz=tf.stack([ave_x,ave_y,ave_d],axis=0)
+    return ave_xyz
+
+def norm_xyz_pose(xyz_poses, coms, pca_para=None):
+    jnt_num = int(xyz_poses.shape[1].value/3) 
+    def fn(elems):
+        xyz_pose, com = elems[0], elems[1]
+        norm_xyz_pose = tf.divide(xyz_pose - tf.tile(com,[jnt_num]), POSE_NORM_RATIO)
+        return [norm_xyz_pose, com]
+
+    norm_xyz_poses, _ = tf.map_fn(fn, [xyz_poses,coms])
+    if pca_para is not None:
+        norm_xyz_poses = tf.nn.xw_plus_b(norm_xyz_poses, tf.transpose(pca_para[0]), pca_para[2]) 
+        norm_xyz_poses = tf.divide(norm_xyz_poses, PCA_NORM_RATIO)
+    return norm_xyz_poses
+
+def unnorm_xyz_pose(norm_xyz_poses, coms, pca_para=None):
+    if pca_para is not None:
+        norm_xyz_poses = tf.multiply(norm_xyz_poses, PCA_NORM_RATIO)
+        norm_xyz_poses = tf.nn.xw_plus_b(norm_xyz_poses, pca_para[0], pca_para[1])
+        # norm_xyz_poses = tf.matmul(norm_xyz_poses, pca_para[0])
+
+    jnt_num = int(norm_xyz_poses.shape[1].value/3) 
+    def fn(elems):
+        norm_xyz_pose, com = elems[0], elems[1]
+        xyz_pose = tf.multiply(norm_xyz_pose, POSE_NORM_RATIO) + tf.tile(com,[jnt_num])
+        return [xyz_pose, com]
+
+    xyz_poses, _ = tf.map_fn(fn, [norm_xyz_poses,coms])
+    return xyz_poses
+
+D_RANGE=300.0
+POSE_NORM_RATIO = 100.0
+PCA_NORM_RATIO  = 5.0
+
+def norm_dm(dms, coms):
+    def fn(elems):
+        dm, com = elems[0], elems[1]
+        max_depth = com[2]+D_RANGE*0.5
+        min_depth = com[2]-D_RANGE*0.5
+        mask = tf.logical_and(tf.less(dm, max_depth), tf.greater(dm, min_depth-D_RANGE*0.5))
+        normed_dm = tf.where(mask, tf.divide(dm-min_depth, D_RANGE), -1.0*tf.ones_like(dm))
+        return [normed_dm, com]
+
+    norm_dms, _ = tf.map_fn(fn, [dms, coms])
+
+    return norm_dms
+
+def generate_xyzs_from_multi_cfgs(dms, cfgs, coms):
+    '''generate the point cloud from depth map
+    Args:
+        dms: the normalized depth map, (b,h,w,1)
+        cfgs: the corresponding camera configurations, (b, 6)
+        coms: the corresponding center of mass, (b, 3)
+    Returns:
+        xyzs: the normalized xyz point cloud, (b, h, w, 3)
+    '''
+
+    def fn(elem):
+        dm, cfg, com = elem[0], elem[1], elem[2]
+
+        zz = tf.squeeze(dm, axis=-1)
+        min_depth = com[2]-D_RANGE*0.5
+        max_depth = com[2]+D_RANGE*0.5
+        zz = tf.where(tf.less(zz, -0.99), 
+                      tf.ones_like(zz)*max_depth, 
+                      tf.multiply(zz, D_RANGE)+min_depth)
+
+        xx, yy = tf.meshgrid(tf.range(h), tf.range(w))
+        xx = tf.to_float(xx)
+        yy = tf.to_float(yy)
+
+        w_ratio = cfg[4]/w
+        h_ratio = cfg[5]/h
+        new_cfg = CameraConfig(cfg[0]/w_ratio, cfg[1]/h_ratio,
+                               cfg[2]/w_ratio, cfg[3]/h_ratio,
+                               w, h)
+
+        xx = tf.multiply(xx-new_cfg[2], tf.divide(zz, new_cfg[0]))
+        yy = tf.multiply(yy-new_cfg[3], tf.divide(zz, new_cfg[1]))
+
+        # renormalize the points as normalizing the pose
+        xx = tf.divide(xx-com[0], POSE_NORM_RATIO)
+        yy = tf.divide(yy-com[1], POSE_NORM_RATIO)
+        zz = tf.divide(zz-com[2], POSE_NORM_RATIO)
+
+        xyz = tf.stack([xx,yy,zz], axis=-1)
+        return [xyz, cfg, com]
+
+    h, w = dms.get_shape()[1].value, dms.get_shape()[2].value
+    xyzs, _, _ = tf.map_fn(fn, [dms, cfgs, coms])
+    return xyzs
+
+def data_aug(dms, poses, cfgs, coms):
+    def fn(elems):
+        dm, pose, cfg, com = elems[0], elems[1], elems[2], elems[3]
+        # random rotation
+        angle = tf.random_uniform((1,),-1*np.pi,np.pi)
+        rot_dm = tf.contrib.image.rotate(dm,angle)
+        
+        uv_com = xyz2uvd_op(com, cfg)
+        uvd_pt = xyz2uvd_op(pose, cfg) - tf.tile(uv_com,[jnt_num])
+        cost, sint = tf.cos(angle)[0], tf.sin(angle)[0]
+        rot_mat = tf.stack([cost,-sint,0, sint,cost,0, 0.0,0.0,1.0], axis=0)
+        rot_mat = tf.reshape(rot_mat, (3,3))
+
+        uvd_pt = tf.reshape(uvd_pt, (-1,3))
+        rot_pose = tf.reshape(tf.matmul(uvd_pt, rot_mat), (-1,))
+       
+        # random elongate x,y edge
+        edge_ratio = tf.clip_by_value(tf.random_normal((2,), 1.0, 0.2), 0.9, 1.1)
+        target_height = tf.to_int32(tf.to_float(tf.shape(dm)[0])*edge_ratio[0])
+        target_width = tf.to_int32(tf.to_float(tf.shape(dm)[1])*edge_ratio[1])
+        # 1 stands for nearest neighour interpolation
+        rot_dm = tf.image.resize_images(rot_dm, (target_height, target_width), 1)
+        rot_dm = tf.image.resize_image_with_crop_or_pad(rot_dm, tf.shape(dm)[0], tf.shape(dm)[1])
+        rot_pose = tf.multiply(rot_pose, tf.tile([edge_ratio[1],edge_ratio[0],1.0], [jnt_num]))
+
+        rot_pose = rot_pose + tf.tile(uv_com, [jnt_num])
+        rot_pose = uvd2xyz_op(rot_pose, cfg)
+        rot_pose = tf.reshape(rot_pose, (-1,))
+        return [rot_dm, rot_pose, cfgs, coms]
+
+    jnt_num = tf.to_int32(tf.shape(poses)[1]/3)
+    aug_dms, aug_poses, _, _ = tf.map_fn(fn, [dms, poses, cfgs, coms])
+    aug_dms = tf.reshape(aug_dms, tf.shape(dms))
+    return aug_dms, aug_poses
+
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/util.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7b5a81dc1087983d159763b2243b11fc55970c7
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/util.py
@@ -0,0 +1,188 @@
+from __future__ import print_function, division, absolute_import
+import collections
+import cv2, numpy as np
+import scipy.stats as st
+
+import gpu_config
+import tensorflow as tf
+
+CameraConfig = collections.namedtuple('CameraConfig', 'fx,fy,cx,cy,w,h')
+
+'''utilities for 2D-3D conversions
+function with _op suffix returns a tf operation
+'''
+
+'''_pro: perspective transformation
+   _bpro: back perspective transformation
+'''
+# fx, fy, cx, cy, w, h
+# 0,  1,  2,  3,  4, 5
+_pro = lambda pt3, cfg: [pt3[0]*cfg[0]/pt3[2]+cfg[2], pt3[1]*cfg[1]/pt3[2]+cfg[3], pt3[2]]
+_bpro = lambda pt2, cfg: [(pt2[0]-cfg[2])*pt2[2]/cfg[0], (pt2[1]-cfg[3])*pt2[2]/cfg[1], pt2[2]] 
+
+def xyz2uvd(xyz, cfg):
+    '''xyz: list of xyz points
+    cfg: camera configuration
+    '''
+    xyz = xyz.reshape((-1,3))
+    # perspective projection function
+    uvd = [_pro(pt3, cfg) for pt3 in xyz]
+    return np.array(uvd)
+
+def uvd2xyz(uvd, cfg):
+    '''uvd: list of uvd points
+    cfg: camera configuration
+    '''
+    uvd = uvd.reshape((-1,3))
+    # backprojection
+    xyz = [_bpro(pt2, cfg) for pt2 in uvd]
+    return np.array(xyz)
+
+def xyz2uvd_op(xyz_pts, cfg):
+    '''xyz_pts: tensor of xyz points
+       camera_cfg: constant tensor of camera configuration
+    '''
+    xyz_pts = tf.reshape(xyz_pts, (-1,3))
+    xyz_list = tf.unstack(xyz_pts)
+    uvd_list = [_pro(pt, cfg) for pt in xyz_list]
+    uvd_pts = tf.stack(uvd_list)
+    return tf.reshape(uvd_pts, shape=(-1,))
+
+def uvd2xyz_op(uvd_pts, cfg):
+    uvd_pts = tf.reshape(uvd_pts, (-1,3))
+    uvd_list = tf.unstack(uvd_pts)
+    xyz_list = [_bpro(pt, cfg) for pt in uvd_list]
+    xyz_pts = tf.stack(xyz_list)
+    return tf.reshape(xyz_pts, (-1,))
+
+'''as a pre-processing step
+'''
+def _gaussian_kern(filter_size=10, sigma=3):
+    '''
+        return an np array of a Gaussian kernel
+    '''
+    interval = (2*sigma+1.0)/(filter_size)
+    x = np.linspace(-sigma-interval/2., sigma+interval/2., filter_size+1)
+    kern1d = np.diff(st.norm.cdf(x))
+    kernel_raw = np.sqrt(np.outer(kern1d, kern1d))
+    kernel = kernel_raw/kernel_raw.sum()
+    return kernel
+
+def gaussian_filter(filter_size=10, sigma=3):
+    gau_init = tf.constant(_gaussian_kern(filter_size,sigma), tf.float32)
+    with tf.variable_scope('preprocess') as scope:
+        try:
+            gaussian_filter = tf.get_variable('gaussian_filter',
+                                              initializer=gau_init, trainable=False)
+            gaussian_filter = tf.reshape(gaussian_filter, (filter_size,filter_size,1,1))
+        except ValueError:
+            scope.reuse_variables()
+            gaussian_filter = tf.get_variable('gaussian_filter',
+                                              initializer=gau_init, trainable=False)
+            gaussian_filter = tf.reshape(gaussian_filter, (filter_size,filter_size,1,1))
+        return gaussian_filter
+
+def heatmap_from_uvd_op(uvd_pts, cfg, gaussian_filter):
+    '''we firstly construct a sparse tensor from the coordinate
+    val: the value at the center of corresponding point
+    '''
+    with tf.name_scope('preprocess'):
+        uvd_pts = tf.reshape(uvd_pts, (-1,3))
+        num_pt = uvd_pts.shape[0]
+        num_pt_op = tf.to_int64(num_pt)
+
+        nn = tf.range(num_pt, dtype=tf.int64)
+        nn = tf.reshape(nn, (-1,1))
+
+        xx = uvd_pts[:,0]
+        xx = tf.clip_by_value(xx, 0, cfg.w-1)
+        xx = tf.to_int64(xx)
+        xx = tf.reshape(xx, (-1,1))
+
+        yy = uvd_pts[:,1]
+        yy = tf.clip_by_value(yy, 0, cfg.h-1)
+        yy = tf.to_int64(yy)
+        yy = tf.reshape(yy, (-1,1))
+        indices = tf.concat([nn,yy,xx], axis=1)
+
+        val = 1.0
+        raw_hm = tf.sparse_to_dense(sparse_indices=indices,
+                                    output_shape=[num_pt_op,cfg.h,cfg.w],
+                                    sparse_values=val)
+        raw_hm = tf.expand_dims(raw_hm, axis=[-1])
+        raw_hm = tf.cast(raw_hm, tf.float32)
+        
+        hm = tf.nn.conv2d(raw_hm, gaussian_filter, strides=[1,1,1,1], 
+                         padding='SAME', data_format='NHWC')
+        hm = tf.nn.conv2d(hm, gaussian_filter, strides=[1,1,1,1], 
+                         padding='SAME', data_format='NHWC')
+        hm = tf.divide(hm, tf.reduce_max(hm))
+        
+        # shuffle dimensions of hm
+        hm_list = tf.unstack(hm, axis=0)
+        hm = tf.concat(hm_list, axis=2)
+        return hm 
+
+def heatmap_from_xyz_op(xyz_pts, cfg, gaussian_filter):
+    return heatmap_from_uvd_op(xyz2uvd_op(xyz_pts, cfg), cfg, gaussian_filter)
+
+
+'''utilities for visualization
+'''
+def visHeatMap(dm, pose, ch_flag=None):
+    raise NotImplementedError
+
+def visDepthMap(dm, thresh=750, isHeatmap=True):
+    dm[dm>thresh] = 0
+    ratio = 255/thresh
+    dm = dm*ratio
+    if False:
+        dm = dm/dm.max()
+        dm_color = cv2.applyColorMap(dm, cv2.COLORMAP_JET)
+        dm = dm_color
+    else:
+        dm = cv2.cvtColor(dm.astype('uint8'), cv2.COLOR_GRAY2BGR)
+    return dm
+
+def visAnnotatedDepthMap(dm, pose, cfg, thresh=750):
+    dm = visDepthMap(dm, thresh)
+    pose = xyz2uvd(pose,cfg)
+    for pt2 in pose:
+        cv2.circle(dm, (int(pt2[0]), int(pt2[1])), 3, (0,0,255), -1)
+    return dm
+
+def visAnnotatedDepthMap_uvd(dm, pose, thresh=750):
+    dm = visDepthMap(dm, thresh)
+    for pt2 in pose:
+        cv2.circle(dm, (int(pt2[0]), int(pt2[1])), 3, (0,0,255), -1)
+    return dm
+
+'''unit test
+'''
+def run_heatmap_from_xyz():
+    from data.bigHand import BigHandDataset
+
+    pts = np.array([-67.4598, 5.3851, 584.7425, -55.6470, 8.8958, 587.4889, -35.5874, -54.6665, 583.3420, -54.7895, -53.8799, 577.8048, -71.0328, -51.3926, 573.4493, -88.8696, -46.2022, 569.1099, -32.8905, -20.8474, 553.7415, -18.7491, -39.3305, 532.7702, -19.8893, -56.4645, 516.0034, -35.5810, -69.2128, 545.6373, -35.5768, -78.8591, 520.6336, -35.2772, -75.8186, 501.8809, -52.5099, -66.7139, 535.8283, -51.0812, -74.7579, 509.5187, -51.7939, -78.6711, 488.8988, -72.3119, -85.2855, 549.0604, -73.1781, -108.2356, 532.5458, -69.9800, -125.8427, 521.5565, -101.7839, -74.5066, 557.4333, -110.1215, -92.7800, 549.8948, -117.0142, -109.9064, 545.4029
+]) 
+    pts = pts.reshape((-1,)).astype(np.float32)
+    
+    tf.reset_default_graph()
+    xyz_pts = tf.placeholder(tf.float32,(BigHandDataset.pose_dim,))
+    cfg = BigHandDataset.cfg
+    heatmap_op = heatmap_from_xyz_op(xyz_pts, cfg)
+
+    with tf.Session() as sess:
+        (heatmap,) = sess.run([heatmap_op], {xyz_pts:pts})
+        print('gaussian blurred')
+        summap = np.zeros((BigHandDataset.cfg.h, BigHandDataset.cfg.w))
+        print(heatmap.shape)
+        for hm in heatmap:
+            summap += hm
+
+        summap /= summap.max()
+        import matplotlib.pyplot as plt
+        plt.imshow(summap, interpolation='none')
+        plt.show()
+
+if __name__ == '__main__':
+    run_heatmap_from_xyz()
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/visualization.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/visualization.py
new file mode 100644
index 0000000000000000000000000000000000000000..adc8217cdf7403774a2e3dc62f644ab7e340b71d
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/data/visualization.py
@@ -0,0 +1,137 @@
+# for matplotlib wrapper to tf summary
+import tensorflow as tf
+import tfplot, matplotlib
+import matplotlib.lines as lines
+
+FLAGS = tf.app.flags.FLAGS
+
+def figure_heatmap(hm):
+    fig = matplotlib.figure.Figure()
+    ax = fig.add_subplot(1,1,1)
+    im = ax.imshow(hm, cmap=matplotlib.cm.jet)
+    fig.colorbar(im)
+    return fig
+
+def figure_joint(dm, uvd_pt):
+    fig = matplotlib.figure.Figure()
+    ax = fig.add_subplot(1,1,1)
+    ax.imshow(dm, cmap=matplotlib.cm.Greys)
+
+    if FLAGS.dataset == 'bighand':
+        ax.scatter(uvd_pt[0,0], uvd_pt[0,1], s=200, c='w')
+        ax.scatter(uvd_pt[1:6,0], uvd_pt[1:6,1], s=100, c='w')
+        ax.scatter(uvd_pt[6:9,0], uvd_pt[6:9,1], s=60, c='c')
+        ax.scatter(uvd_pt[9:12,0], uvd_pt[9:12,1], s=60, c='m')
+        ax.scatter(uvd_pt[12:15,0], uvd_pt[12:15,1], s=60, c='y')
+        ax.scatter(uvd_pt[15:18,0], uvd_pt[15:18,1], s=60, c='g')
+        ax.scatter(uvd_pt[18:,0], uvd_pt[18:,1], s=60, c='r')
+    elif FLAGS.dataset == 'nyu':
+        ax.scatter(uvd_pt[10:,0], uvd_pt[10:,1], s=200, c='w')
+        ax.scatter(uvd_pt[0,0], uvd_pt[0,1], s=60, c='c')
+        ax.scatter(uvd_pt[1,0], uvd_pt[1,1], s=90, c='c')
+        ax.scatter(uvd_pt[2,0], uvd_pt[2,1], s=60, c='m')
+        ax.scatter(uvd_pt[3,0], uvd_pt[3,1], s=90, c='m')
+        ax.scatter(uvd_pt[4,0], uvd_pt[4,1], s=60, c='y')
+        ax.scatter(uvd_pt[5,0], uvd_pt[5,1], s=90, c='y')
+        ax.scatter(uvd_pt[6,0], uvd_pt[6,1], s=60, c='g')
+        ax.scatter(uvd_pt[7,0], uvd_pt[7,1], s=90, c='g')
+        ax.scatter(uvd_pt[8,0], uvd_pt[8,1], s=60, c='r')
+        ax.scatter(uvd_pt[9,0], uvd_pt[9,1], s=90, c='r')
+    elif FLAGS.dataset == 'msra':
+        fig_color = ['c', 'm', 'y', 'g', 'r']
+        ax.scatter(uvd_pt[0:,0], uvd_pt[0:,1], s=200, c='w')
+        for f in range(5):
+            ax.scatter(uvd_pt[f*4+1,0], uvd_pt[f*4+1,1], s=90, c=fig_color[f])
+            ax.scatter(uvd_pt[f*4+2,0], uvd_pt[f*4+2,1], s=80, c=fig_color[f])
+            ax.scatter(uvd_pt[f*4+3,0], uvd_pt[f*4+3,1], s=70, c=fig_color[f])
+            ax.scatter(uvd_pt[f*4+4,0], uvd_pt[f*4+4,1], s=60, c=fig_color[f])
+
+    elif FLAGS.dataset == 'icvl':
+        fig_color = ['c', 'm', 'y', 'g', 'r']
+        ax.scatter(uvd_pt[0:,0], uvd_pt[0:,1], s=200, c='w')
+        for f in range(5):
+            ax.scatter(uvd_pt[f*3+1,0], uvd_pt[f*3+1,1], s=90, c=fig_color[f])
+            ax.scatter(uvd_pt[f*3+2,0], uvd_pt[f*3+2,1], s=80, c=fig_color[f])
+            ax.scatter(uvd_pt[f*3+3,0], uvd_pt[f*3+3,1], s=60, c=fig_color[f])
+    return fig
+
+def figure_joint_skeleton(dm, uvd_pt):
+    fig = matplotlib.figure.Figure()
+    ax = fig.add_subplot(1,1,1)
+    ax.imshow(dm, cmap=matplotlib.cm.Greys)
+
+    if FLAGS.dataset == 'bighand':
+        ax.scatter(uvd_pt[0,0], uvd_pt[0,1], s=200, c='w')
+        ax.scatter(uvd_pt[1:6,0], uvd_pt[1:6,1], s=100, c='w')
+        ax.scatter(uvd_pt[6:9,0], uvd_pt[6:9,1], s=60, c='c')
+        ax.scatter(uvd_pt[9:12,0], uvd_pt[9:12,1], s=60, c='m')
+        ax.scatter(uvd_pt[12:15,0], uvd_pt[12:15,1], s=60, c='y')
+        ax.scatter(uvd_pt[15:18,0], uvd_pt[15:18,1], s=60, c='g')
+        ax.scatter(uvd_pt[18:,0], uvd_pt[18:,1], s=60, c='r')
+    elif FLAGS.dataset == 'nyu':
+        fig_color = ['c', 'm', 'y', 'g', 'r']
+        for f in range(5):
+            ax.plot([uvd_pt[f*2,0], uvd_pt[f*2+1,0]], 
+                    [uvd_pt[f*2,1], uvd_pt[f*2+1,1]], color=fig_color[f], linewidth=3)
+            ax.scatter(uvd_pt[f*2,0],uvd_pt[f*2,1],s=60,c=fig_color[f])
+            ax.scatter(uvd_pt[f*2+1,0],uvd_pt[f*2+1,1],s=60,c=fig_color[f])
+            if f<4:
+                ax.plot([uvd_pt[13,0], uvd_pt[f*2+1,0]], 
+                        [uvd_pt[13,1], uvd_pt[f*2+1,1]], color=fig_color[f], linewidth=3)
+        ax.plot([uvd_pt[9,0], uvd_pt[10,0]], 
+                [uvd_pt[9,1], uvd_pt[10,1]], color='r', linewidth=3)
+
+        ax.scatter(uvd_pt[13,0], uvd_pt[13,1], s=200, c='w')
+        ax.scatter(uvd_pt[11,0], uvd_pt[11,1], s=100, c='b')
+        ax.scatter(uvd_pt[12,0], uvd_pt[12,1], s=100, c='b')
+
+        ax.plot([uvd_pt[13,0], uvd_pt[11,0]], 
+                [uvd_pt[13,1], uvd_pt[11,1]], color='b', linewidth=3)
+        ax.plot([uvd_pt[13,0], uvd_pt[12,0]], 
+                [uvd_pt[13,1], uvd_pt[12,1]], color='b', linewidth=3)
+        ax.plot([uvd_pt[13,0], uvd_pt[10,0]], 
+                [uvd_pt[13,1], uvd_pt[10,1]], color='r', linewidth=3)
+
+    elif FLAGS.dataset == 'msra':
+        fig_color = ['c', 'm', 'y', 'g', 'r']
+        ax.scatter(uvd_pt[0:,0], uvd_pt[0:,1], s=200, c='w')
+        for f in range(5):
+            ax.scatter(uvd_pt[f*4+1,0], uvd_pt[f*4+1,1], s=90, c=fig_color[f])
+            ax.scatter(uvd_pt[f*4+2,0], uvd_pt[f*4+2,1], s=80, c=fig_color[f])
+            ax.scatter(uvd_pt[f*4+3,0], uvd_pt[f*4+3,1], s=70, c=fig_color[f])
+            ax.scatter(uvd_pt[f*4+4,0], uvd_pt[f*4+4,1], s=60, c=fig_color[f])
+            ax.plot([uvd_pt[f*4+1,0], uvd_pt[f*4+2,0]],
+                    [uvd_pt[f*4+1,1], uvd_pt[f*4+2,1]], color=fig_color[f], linewidth=3)
+            ax.plot([uvd_pt[f*4+2,0], uvd_pt[f*4+3,0]],
+                    [uvd_pt[f*4+2,1], uvd_pt[f*4+3,1]], color=fig_color[f], linewidth=3)
+            ax.plot([uvd_pt[f*4+3,0], uvd_pt[f*4+4,0]],
+                    [uvd_pt[f*4+3,1], uvd_pt[f*4+4,1]], color=fig_color[f], linewidth=3)
+    elif FLAGS.dataset == 'icvl':
+        fig_color = ['c', 'm', 'y', 'g', 'r']
+        ax.scatter(uvd_pt[0:,0], uvd_pt[0:,1], s=200, c='w')
+        for f in range(5):
+            ax.scatter(uvd_pt[f*3+1,0], uvd_pt[f*3+1,1], s=90, c=fig_color[f])
+            ax.scatter(uvd_pt[f*3+2,0], uvd_pt[f*3+2,1], s=80, c=fig_color[f])
+            ax.scatter(uvd_pt[f*3+3,0], uvd_pt[f*3+3,1], s=60, c=fig_color[f])
+            ax.plot([uvd_pt[f*3+1,0], uvd_pt[f*3+2,0]],
+                    [uvd_pt[f*3+1,1], uvd_pt[f*3+2,1]], color=fig_color[f], linewidth=3)
+            ax.plot([uvd_pt[f*3+2,0], uvd_pt[f*3+3,0]],
+                    [uvd_pt[f*3+2,1], uvd_pt[f*3+3,1]], color=fig_color[f], linewidth=3)
+
+    return fig
+
+def figure_smp_pts(dm, pts1, pts2):
+    fig = matplotlib.figure.Figure()
+    ax = fig.add_subplot(1,1,1)
+    ax.imshow(dm, cmap=matplotlib.cm.jet)
+
+    for pt1, pt2 in zip(pts1, pts2):
+        ax.plot([pt1[0], pt2[0]], [pt1[1], pt2[1]])
+        ax.scatter(pt1[0], pt1[1], s=60, c='w')
+        ax.scatter(pt2[0], pt2[1], s=60, c='m')
+    return fig
+
+tf_heatmap_wrapper = tfplot.wrap(figure_heatmap, batch=True, name='hm_summary')
+tf_jointplot_wrapper = tfplot.wrap(figure_joint_skeleton, batch=True, name='pt_summary')
+tf_smppt_wrapper = tfplot.wrap(figure_smp_pts, batch=True, name='smppt_summary')
+
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/.keep b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_icvl_model.sh b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_icvl_model.sh
new file mode 100644
index 0000000000000000000000000000000000000000..84406e467eedf320e07ec2e12285b73da48c3ce4
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_icvl_model.sh
@@ -0,0 +1,25 @@
+cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
+cd $cur_dir
+model_dir=${cur_dir}/train_cache/icvl_training_s2_f128_daug_um_v1/
+if ! [ -d $model_dir ]; then
+    mkdir -p $model_dir
+fi
+
+cd $model_dir
+url=https://polybox.ethz.ch/index.php/s/f9EWUGSpTeKmFDo/download
+fname=icvl.tar.gz
+
+if [ -f $fname ]; then
+    echo "file already exists, no need to download again"
+else
+    echo "downloading the pretrained model(62M)..."
+    wget $url
+    mv download $fname
+fi
+
+echo "unzipping..."
+tar xvzf $fname
+mv icvl/*.* ./
+rmdir icvl/
+
+echo "done."
\ No newline at end of file
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_msra_model.sh b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_msra_model.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e1db07d0ebcde7e9bd8cb867dc3560ce42459eaa
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_msra_model.sh
@@ -0,0 +1,31 @@
+cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
+cd $cur_dir
+
+cache_dir=${cur_dir}/msra_model
+if ! [ -d $cache_dir ]; then
+    mkdir $cache_dir
+fi
+cd $cache_dir
+
+fname=msra.tar.gz
+url=https://polybox.ethz.ch/index.php/s/B2W1ngyUAitsv2e/download
+if [ -f $fname ]; then
+    echo "file already exists, no need to download again"
+else 
+    echo "downloading the pretrained model(566M)..."
+    wget $url
+    mv download $fname
+fi
+echo "unzipping..."
+tar xvzf $fname
+
+
+cd $cur_dir
+for pid in {0..8}; do
+    tar_dir=${cur_dir}/train_cache/msra_P${pid}_training_s2_f128_daug_um_v1/
+    src_dir=${cache_dir}/msra/P${pid}/
+    mv $src_dir $tar_dir
+done
+
+rmdir ${cache_dir}/msra
+echo "done."
\ No newline at end of file
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_nyu_model.sh b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_nyu_model.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9654cd227a627a9d7019d55e908da428a68553d2
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/exp/fetch_nyu_model.sh
@@ -0,0 +1,25 @@
+cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
+cd $cur_dir
+model_dir=${cur_dir}/train_cache/nyu_training_s2_f128_daug_um_v1/
+if ! [ -d $model_dir ]; then
+    mkdir -p $model_dir
+fi
+
+cd $model_dir
+url=https://polybox.ethz.ch/index.php/s/Q4GS7bgRRM3zK5J/download
+fname=nyu.tar.gz
+
+if [ -f $fname ]; then
+    echo "file already exists, no need to download again"
+else
+    echo "downloading the pretrained model(61M)..."
+    wget $url
+    mv download $fname
+fi
+
+echo "unzipping..."
+tar xvzf $fname
+mv nyu/*.* ./
+rmdir nyu/
+
+echo "done."
\ No newline at end of file
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/gpu_config.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/gpu_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e3d0280cfd0cc2e20c64a882803965182bc605f
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/gpu_config.py
@@ -0,0 +1,41 @@
+from __future__ import print_function, division, absolute_import
+import os
+import commands
+
+# check the job id 
+gpu_lock_path = '/tmp/lock-gpu*/info.txt'
+lock_str = commands.getstatusoutput('cat %s'%gpu_lock_path)
+lock_str = lock_str[1]
+lock_str = lock_str.split('\n')
+
+
+# on gpu server, use the gpu for tensorflow
+if 'SGE_GPU' in os.environ:
+    gpulist = []
+    for line in lock_str:
+        if line.find('wanc') == -1:
+            continue
+        line = line.split(' ')
+        job_idx = int(line[7])
+        gpu_idx = int(line[1])
+        gpulist.append((gpu_idx, job_idx))
+    gpulist = sorted(gpulist, key=lambda x:x[1])
+    gpu_idx,job_idx = gpulist[-1]
+
+    gpu_list = [gpu_idx]
+    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(gpu) for gpu in gpu_list)
+    print('use GPU for tensorflow')
+else:
+    os.environ['CUDA_VISIBLE_DEVICES'] = ''
+    gpu_list = []
+    print('\x1b[0;31;47m use CPU for tensorflow \x1b[0m')
+
+num_gpus = len(gpu_list)
+print('available gpu list, ', gpu_list)
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+import tensorflow as tf
+config = tf.ConfigProto()
+config.allow_soft_placement = True 
+config.gpu_options.allow_growth = True 
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/.keep b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/__init__.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/hourglass_um_crop_tiny.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/hourglass_um_crop_tiny.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9f6cc47d62850594eef9962512c5879c1f336f0
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/hourglass_um_crop_tiny.py
@@ -0,0 +1,909 @@
+'''to simultaneously regress the 3D joint offset and the 2D joint heatmap
+'''
+from __future__ import print_function,absolute_import, division
+
+import time, os
+import numpy as np
+from datetime import datetime
+import importlib
+
+import gpu_config
+import tensorflow as tf
+import data.util
+from data.util import heatmap_from_xyz_op, CameraConfig, xyz2uvd_op, uvd2xyz_op
+import data.util
+import data.preprocess
+import numpy as np, numpy.linalg as alg
+
+# from model_new.train_single_gpu import train
+from model.train_single_gpu import train
+from model.test_model import test
+import network.slim as slim
+
+from data.preprocess import generate_xyzs_from_multi_cfgs, crop_from_xyz_pose, crop_from_bbx, center_of_mass, norm_xyz_pose, unnorm_xyz_pose
+
+from data.visualization import tf_heatmap_wrapper, tf_jointplot_wrapper, tf_smppt_wrapper
+from data.evaluation import Evaluation
+
+# implementation setting
+tf.app.flags.DEFINE_integer('num_gpus', 1, #gpu_config.num_gpus,
+                           """how many gpu to be used""")
+# use cpu instead if no gpu is available
+tf.app.flags.DEFINE_integer('batch_size', 40,
+                           '''batch size''')
+tf.app.flags.DEFINE_integer('debug_level', 1,
+                            '''the higher, the more saved to summary''')
+tf.app.flags.DEFINE_integer('sub_batch', 5,
+                           '''batch size''')
+tf.app.flags.DEFINE_integer('pid', 0,
+                           '''for msra person id''')
+tf.app.flags.DEFINE_boolean('is_train', True,
+                            '''True for traning, False for testing''')
+
+# the network architecture to be used
+tf.app.flags.DEFINE_string('net_module', 'um_v1',
+                          '''the module containing the network architecture''')
+tf.app.flags.DEFINE_boolean('is_aug', True,
+                            '''whether to augment data''')
+tf.app.flags.DEFINE_string('dataset', 'nyu',
+                           '''the dataset to conduct experiments''')
+# epoch
+tf.app.flags.DEFINE_integer('epoch', 80,
+                            '''number of epoches''')
+
+# network specification
+tf.app.flags.DEFINE_integer('num_stack', 2,
+                            'number of stacked hourglass')
+tf.app.flags.DEFINE_integer('num_fea', 128,
+                            'number of feature maps in hourglass')
+tf.app.flags.DEFINE_integer('kernel_size', 3,
+                            'kernel size for the residual module')
+
+FLAGS = tf.app.flags.FLAGS
+
+MAXIMUM_DEPTH = 600.0
+
+class JointDetectionModel(object):
+    _moving_average_decay = 0.9999
+    _batchnorm_moving_average_decay = 0.9997
+    _init_lr = 0.001 
+    if FLAGS.dataset == 'nyu':
+        _num_epochs_per_decay = 10
+    elif FLAGS.dataset == 'msra':
+        _num_epochs_per_decay = 20 
+    _lr_decay_factor = 0.1
+
+    _adam_beta1 = 0.5
+    
+    # maximum allowed depth
+    _max_depth = 600.0
+
+    # input size: the input of the network
+    _input_height = 128 
+    _input_width = 128
+    
+    # output size: the output size of network, as well as the largest size of hourglass model
+    _output_height = 32 
+    _output_width = 32
+
+    _gau_sigma = 3.0 
+    _gau_filter_size = 10 
+
+    _base_dir = './exp/train_cache/'
+
+
+    def __init__(self, dataset, detect_net, epoch, net_desc='dummy', val_dataset=None):
+        '''
+        args:
+            dataset: data.xxxdataset isinstance
+            detect_net: funtional input of the net
+            desc: string, the name of the corresponding cache folder 
+        notice:
+            any tf operations on the graph cannot be defined here,
+            they can only be defined after the graph is initialized by the training module
+        '''
+        self._dataset = dataset
+        self._jnt_num = int(dataset.jnt_num)
+        self._cfg = self._dataset.cfg
+
+        self._num_batches_per_epoch = dataset.approximate_num / (FLAGS.batch_size*FLAGS.sub_batch)
+        self._net_desc = net_desc
+        self._net = detect_net
+        self._max_steps = int(epoch*self._num_batches_per_epoch)
+
+        self._val_dataset = val_dataset 
+        self._model_desc = '%s_%s_s%d_f%d'%(dataset.name, dataset.subset, FLAGS.num_stack, FLAGS.num_fea)
+        if FLAGS.is_aug:
+            self._model_desc += '_daug'
+
+        if self._val_dataset:
+            assert self._jnt_num == self._val_dataset.jnt_num, (
+                'the validation dataset should be with the same number of joints to the traning dataset') 
+
+        if not os.path.exists(self._base_dir):
+            os.makedirs(self._base_dir)
+
+        self._log_path = os.path.join(self._base_dir, self.name, 'validation_log.txt')
+
+    '''data interface
+    1. initialize the dataset
+    2. the global setting of the batch_size
+    3. total number of steps
+    '''
+    def batch_input(self, dataset, batch_size=None):
+        if batch_size is None:
+            batch_size = FLAGS.batch_size
+        dm_batch, pose_batch, cfg_batch, com_batch = dataset.get_batch_op(
+            batch_size=batch_size,
+            num_readers = 2,
+            num_preprocess_threads = 2,
+            preprocess_op=dataset.preprocess_op(self._input_width, self._input_height))
+        return [dm_batch, pose_batch, cfg_batch, com_batch]
+
+    def batch_input_test(self, dataset):
+        dm_batch, pose_batch, cfg_batch, com_batch, name_batch = dataset.get_batch_op_test(
+            batch_size = FLAGS.batch_size, 
+            preprocess_op = dataset.preprocess_op(self._input_width, self._input_height))
+        return [dm_batch, pose_batch, cfg_batch, com_batch, name_batch]
+
+    @property
+    def train_dataset(self):
+        return self._dataset
+
+    @property
+    def val_dataset(self):
+        return self._val_dataset
+
+    '''hyper parameters
+    '''
+    @property
+    def init_lr(self):
+        '''the initial learning rate
+        '''
+        return self._init_lr
+    @property
+    def lr_decay_factor(self):
+        '''the rate of exponential decay of learning rate
+        '''
+        return self._lr_decay_factor
+
+    @property
+    def decay_steps(self):
+        '''lr does not decay when global_step < decay_steps
+        '''
+        return self._num_batches_per_epoch * self._num_epochs_per_decay
+
+    @property
+    def moving_average_decay(self):
+        return self._moving_average_decay
+
+    @property
+    def max_steps(self):
+        return self._max_steps
+
+    '''training operation
+    '''
+    def inference(self, normed_dms, cfgs, coms, reuse_variables, is_training=True):
+        with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
+            # resize the image to fit the network input
+            # during training, inference is called by loss function, where dms are resized
+            end_points = self._net(normed_dms, cfgs, coms, self._jnt_num, is_training)
+            return end_points 
+
+    max_dist_2d = 4.0 # 4 pixels
+    max_dist_3d = 0.8 # 80mm 3d distance 
+    def _hm_3d(self, oms):
+        '''generate 3D distance heatmap according to the offset map
+        Args:
+            oms: the normalized xyz offset maps, (b,h,w,3*j)
+        Returns:
+            hms: the 3D heatmap, (b,h,w,j)
+        '''
+        om_list = tf.unstack(oms, axis=-1)
+        hm_list = []
+        for j in range(self._jnt_num):
+            xx,yy,zz = om_list[j*3], om_list[j*3+1], om_list[j*3+2]
+            hm = tf.sqrt(xx**2+yy**2+zz**2)
+            hm = tf.divide(self.max_dist_3d-hm, self.max_dist_3d)
+            hm = tf.maximum(hm, tf.zeros_like(hm))
+            hm_list.append(hm)
+        hms = tf.stack(hm_list, axis=-1)
+        return hms
+
+    def _hm_2d(self, poses, cfgs, out_h, out_w):
+        '''synthesize the 2d heatmap
+        Args:
+            poses: unnormed xyz pose, (b,j*3)
+            cfgs: camera configuration, (b, 6)
+            out_h, out_w: output of heatmap size
+        Returns:
+            hm2: 2D heatmap, (b, out_h, out_w, j)
+        '''
+        def fn(elems):
+            xyz_pose, cfg = elems[0], elems[1]
+
+            w_ratio = cfg[4] / out_w
+            h_ratio = cfg[5] / out_h
+            new_cfg = CameraConfig(cfg[0]/w_ratio, cfg[1]/h_ratio,
+                                   cfg[2]/w_ratio, cfg[3]/h_ratio,
+                                   out_w, out_h)
+
+            xx, yy = tf.meshgrid(tf.range(out_h), tf.range(out_w))
+            xx, yy = tf.cast(xx, tf.float32), tf.cast(yy, tf.float32)
+            xx = tf.tile(tf.expand_dims(xx, axis=-1), [1, 1, self._jnt_num])
+            yy = tf.tile(tf.expand_dims(yy, axis=-1), [1, 1, self._jnt_num])
+
+            uvd_pose = tf.reshape(data.util.xyz2uvd_op(xyz_pose, new_cfg), (-1,3))
+            [uu,vv,dd] = tf.unstack(uvd_pose, axis=-1)
+            uu = tf.reshape(uu, (1,1,-1))
+            vv = tf.reshape(vv, (1,1,-1))
+            
+            hm = tf.maximum(self.max_dist_2d-tf.sqrt(tf.square(xx-uu)+tf.square(yy-vv)),
+                            tf.zeros_like(xx))/self.max_dist_2d
+            return [hm, cfg]
+
+        with tf.name_scope('pose_sync'):
+            hms, _ = tf.map_fn(fn, [poses, cfgs])
+            return hms
+
+    def _um(self, om, hm_3d):
+        '''get the unit offset vector map from offset maps
+        Args:
+            om: the offset map, (b,h,w,j*3)
+            hm_3d: the offset norm, (b,h,w,j)
+        Returns:
+            um: the unit offset map, (b,h,w,j*3)
+        '''
+        om_list = tf.unstack(om, axis=-1)
+
+        dm_3d = self.max_dist_3d - tf.multiply(hm_3d, self.max_dist_3d)
+        dm_list = tf.unstack(dm_3d, axis=-1)
+
+        um_list = []
+
+        for j in range(self._jnt_num):
+            x,y,z = om_list[j*3], om_list[j*3+1], om_list[j*3+2]
+            d = dm_list[j]
+
+            mask = tf.less(d, self.max_dist_3d-1e-2) 
+
+            x = tf.where(mask, tf.divide(x, d), tf.zeros_like(x))
+            y = tf.where(mask, tf.divide(y, d), tf.zeros_like(y))
+            z = tf.where(mask, tf.divide(z, d), tf.zeros_like(z))
+            um_list += [x,y,z]
+        return tf.stack(um_list, axis=-1)
+
+    def _resume_om(self, hm_3d, um):
+        '''resume the offset map from the 3d heatmap and unit offset vector
+        Args:
+            hm_3d: the 3D heatmap, (b,h,w,j)
+            um: the 3D unit offset vector, (b,h,w,j*3)
+        Returns:
+            om: the 3D offset vector, (b,h,w,j)
+        '''
+        # um = tf.clip_by_value(um, -1.0, 1.0)
+        um_list = tf.unstack(um, axis=-1)
+
+        dm_3d = self.max_dist_3d - tf.multiply(hm_3d, self.max_dist_3d)
+        dm_list = tf.unstack(dm_3d, axis=-1)
+
+        om_list = []
+
+        for j in range(self._jnt_num):
+            x,y,z = um_list[j*3], um_list[j*3+1], um_list[j*3+2]
+            d = dm_list[j]
+            x = tf.multiply(x,d)
+            y = tf.multiply(y,d)
+            z = tf.multiply(z,d)
+            om_list += [x,y,z]
+        return tf.stack(om_list, axis=-1)
+
+    def _vis_um_xy(self, ums):
+        '''visualize the xy plane angle of ums
+        '''
+        um_list = tf.unstack(ums, axis=-1)
+        angle_list = []
+        for j in range(self._jnt_num):
+            x,y,z = um_list[j*3], um_list[j*3+1], um_list[j*3+2]
+            d = tf.sqrt(x**2+y**2)
+            sin = tf.where(tf.less(d**2+z**2, 0.1), tf.ones_like(d), tf.sin(tf.divide(x,d)))
+            angle_list.append(sin)
+        return tf.stack(angle_list, axis=-1)
+
+    def _vis_um_z(self, ums):
+        '''visuzlie the z plane angle of ums
+        '''
+        um_list = tf.unstack(ums, axis=-1)
+        angle_list = []
+        for j in range(self._jnt_num):
+            angle_list.append(um_list[j*3+2])
+        return tf.stack(angle_list, axis=-1)
+
+    # training
+    def loss(self, dms, poses, cfgs, coms):
+        ''' the losses for the training
+        Args:
+            dms:
+            poses:
+            reuse_variables:
+        Returns:
+            the total loss 
+        '''
+        if FLAGS.is_aug:
+            dms, poses = data.preprocess.data_aug(dms, poses, cfgs, coms)
+
+        # generate ground truth
+        gt_hms = self._hm_2d(poses, cfgs, self._output_height, self._output_width)
+
+        gt_normed_poses = norm_xyz_pose(poses, coms) 
+        normed_dms = data.preprocess.norm_dm(dms, coms)
+        tiny_normed_dms = tf.image.resize_images(normed_dms, (self._output_height, self._output_width), 2)
+        xyzs = generate_xyzs_from_multi_cfgs(tiny_normed_dms, cfgs, coms)
+        xyzs = tf.tile(xyzs, [1,1,1,self._jnt_num])
+        gt_oms = tf.reshape(gt_normed_poses, (-1,1,1,3*self._jnt_num)) - xyzs
+
+        gt_hm3s = self._hm_3d(gt_oms)
+        gt_ums = self._um(gt_oms, gt_hm3s)
+
+        # generate estimation
+        end_points = self.inference(normed_dms, cfgs, coms, reuse_variables=None, is_training=True)
+        
+        # heatmap loss
+        est_hm_list = end_points['hm_outs'] 
+        hm_losses = [tf.nn.l2_loss(est_hms-gt_hms) for est_hms in est_hm_list]
+
+        # 3D heatmap loss
+        est_hm3_list = end_points['hm3_outs']
+        hm3_losses = [tf.nn.l2_loss(est_hm3-gt_hm3s) for est_hm3 in est_hm3_list]
+
+        # offsetmap loss
+        # we only consider the nearby point offset maps
+        # in order to make the oms loss on the same scale w.r.t. hms loss
+        est_um_list = end_points['um_outs'] 
+        um_losses = [tf.nn.l2_loss(est_ums-gt_ums) for est_ums in est_um_list]
+
+        # add the weight decay loss
+        reg_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), 'reg_loss')
+        hm_loss = tf.add_n(hm_losses, 'hm_loss')
+        um_loss = tf.add_n(um_losses, 'um_loss')
+        hm3_loss = tf.add_n(hm3_losses, 'hm3_loss')
+
+        total_loss = reg_loss+hm_loss+um_loss+hm3_loss
+
+        tf.summary.scalar('tra/um_loss', um_loss)
+        tf.summary.scalar('tra/hm_loss', hm_loss)
+        tf.summary.scalar('tra/hm3_loss', hm3_loss)
+
+        # to visualize the training error, 
+        # only pick the first three for tensorboard visualization
+        est_hms = est_hm_list[-1][0:3,:,:,:]
+        est_ums = est_um_list[-1][0:3,:,:,:]
+        est_hm3s = est_hm3_list[-1][0:3,:,:,:]
+        tiny_normed_dms = tiny_normed_dms[0:3,:,:,:]
+        cfgs = cfgs[0:3,:]
+        coms = coms[0:3,:]
+        dms = dms[0:3,:,:,:]
+        est_oms = self._resume_om(est_hm3s, est_ums)
+
+        # get point estimation
+        est_normed_poses = self._xyz_estimation(est_hms, est_oms, est_hm3s, tiny_normed_dms, cfgs, coms)
+        est_normed_poses = tf.reshape(est_normed_poses,
+                                      (est_normed_poses.get_shape()[0].value, -1))
+        xyz_pts = unnorm_xyz_pose(est_normed_poses, coms)
+
+        # 2d joint detection
+        def to_uvd_fn(elem):
+            xyz_pt, cfg = elem[0], elem[1]
+            return [data.util.xyz2uvd_op(xyz_pt, cfg), cfg]
+        uvd_pts, _ = tf.map_fn(to_uvd_fn, [xyz_pts, cfgs])
+        resized_hms = tf.image.resize_images(est_hms, (self._input_height, self._input_width), 2)
+        hm_uvd_pts, _ = self._uvd_estimation_op(resized_hms, tf.ones_like(resized_hms))
+
+        # for visualization
+        gt_xy_angle = self._vis_um_xy(gt_ums)
+        gt_z_angle = self._vis_um_z(gt_ums)
+        est_xy_angle = self._vis_um_xy(est_ums)
+        est_z_angle = self._vis_um_z(est_ums)
+
+        if FLAGS.debug_level > 0:
+            tf.summary.image('tra_dm/', dms)
+            tf.summary.image('tra_pts/', tf_jointplot_wrapper(tf.squeeze(dms,axis=-1), 
+                                                              tf.reshape(uvd_pts, (3,-1,3))))
+            tf.summary.image('tra_pt_hm/', tf_jointplot_wrapper(tf.squeeze(dms, axis=-1),
+                                                                tf.reshape(hm_uvd_pts, (3,-1,3))))
+        if FLAGS.debug_level > 1:
+            tf.summary.image('tra_hm_est_0/', tf_heatmap_wrapper(est_hms[:,:,:,0]))
+            tf.summary.image('tra_hm_gt_0/', tf_heatmap_wrapper(gt_hms[:,:,:,0]))
+            tf.summary.image('tra_3d_hm_est_0/', tf_heatmap_wrapper(est_hm3s[:,:,:,0]))
+            tf.summary.image('tra_3d_hm_gt_0/', tf_heatmap_wrapper(gt_hm3s[:,:,:,0]))
+            tf.summary.image('tra_um_xy_gt_0', tf_heatmap_wrapper(gt_xy_angle[:,:,:,0]))
+            tf.summary.image('tra_um_z_gt_0', tf_heatmap_wrapper(gt_z_angle[:,:,:,0]))
+            tf.summary.image('tra_um_xy_est_0', tf_heatmap_wrapper(est_xy_angle[:,:,:,0]))
+            tf.summary.image('tra_um_z_est_0', tf_heatmap_wrapper(est_z_angle[:,:,:,0]))
+
+        if FLAGS.debug_level > 2:
+            tf.summary.image('tra_hm_gt_1/', tf_heatmap_wrapper(gt_hms[:,:,:,5]))
+            tf.summary.image('tra_hm_est_1/', tf_heatmap_wrapper(est_hms[:,:,:,5]))
+            tf.summary.image('tra_3d_hm_est_1/', tf_heatmap_wrapper(est_hm3s[:,:,:,5]))
+            tf.summary.image('tra_3d_hm_gt_1/', tf_heatmap_wrapper(gt_hm3s[:,:,:,5]))
+            tf.summary.image('tra_um_xy_est_1', tf_heatmap_wrapper(est_xy_angle[:,:,:,5]))
+            tf.summary.image('tra_um_z_est_1', tf_heatmap_wrapper(est_z_angle[:,:,:,5]))
+            tf.summary.image('tra_um_xy_gt_1', tf_heatmap_wrapper(gt_xy_angle[:,:,:,5]))
+            tf.summary.image('tra_um_z_gt_1', tf_heatmap_wrapper(gt_z_angle[:,:,:,5]))
+
+        return total_loss 
+
+    def opt(self, lr):
+        '''return the optimizer of the model
+        '''
+        return tf.train.AdamOptimizer(lr, beta1=self._adam_beta1)
+
+    # validation and test
+    def test(self, dms, poses, cfgs, coms, reuse_variables=True):
+        '''the validation step to show the result from the validation set
+
+        '''
+        batch_size = dms.get_shape()[0].value
+        # 1st phase, gpu computation
+        normed_dms = data.preprocess.norm_dm(dms, coms)
+        end_points = self.inference(normed_dms, cfgs, coms, reuse_variables=reuse_variables, is_training=False)
+
+        est_hms = end_points['hm_outs'][-1]
+        
+        tiny_normed_dms = tf.image.resize_images(normed_dms, (self._output_height, self._output_width), 2)
+        est_ums = end_points['um_outs'][-1]
+        est_hm3s = end_points['hm3_outs'][-1]
+
+        est_oms = self._resume_om(est_hm3s, est_ums)
+
+        est_normed_poses = self._xyz_estimation(est_hms, est_oms, est_hm3s, tiny_normed_dms, cfgs, coms)
+        est_normed_poses = tf.reshape(est_normed_poses,
+                                      (est_normed_poses.get_shape()[0].value, -1))
+        xyz_pts = unnorm_xyz_pose(est_normed_poses, coms)
+
+        def to_uvd_fn(elem):
+            xyz_pt, cfg = elem[0], elem[1]
+            return [data.util.xyz2uvd_op(xyz_pt, CameraConfig(*tf.unstack(cfg,axis=0))), cfg]
+        uvd_pts, _ = tf.map_fn(to_uvd_fn, [xyz_pts, cfgs])
+        gt_uvd_pts, _ = tf.map_fn(to_uvd_fn, [poses, cfgs])
+
+        resized_hms = tf.image.resize_images(est_hms, (self._input_height, self._input_width))
+        hm_uvd_pts, _ = self._uvd_estimation_op(resized_hms, tf.ones_like(resized_hms))
+
+        # for gt visulization
+        gt_normed_poses = norm_xyz_pose(poses, coms)
+        gt_hms = self._hm_2d(poses, cfgs, self._output_height, self._output_width)
+        xyzs = generate_xyzs_from_multi_cfgs(tiny_normed_dms, cfgs, coms)
+        xyzs = tf.tile(xyzs, [1,1,1,self._jnt_num])
+        gt_oms = tf.reshape(gt_normed_poses, (-1,1,1,3*self._jnt_num)) - xyzs
+        gt_hm3s = self._hm_3d(gt_oms)
+        gt_ums = self._um(gt_oms, gt_hm3s)
+        gt_xy_angle = self._vis_um_xy(gt_ums)
+        gt_z_angle = self._vis_um_z(est_ums)
+
+        # add summayries
+        est_xy_angle = self._vis_um_xy(est_ums)
+        est_z_angle = self._vis_um_z(est_ums)
+        if FLAGS.debug_level > 0:
+            tf.summary.image('val_pts/', tf_jointplot_wrapper(tf.squeeze(dms,axis=-1), 
+                                                              tf.reshape(uvd_pts, (batch_size,-1,3))),
+                             collections=['val_summaries'])
+            tf.summary.image('gt_pts/', tf_jointplot_wrapper(tf.squeeze(dms,axis=-1), 
+                                                              tf.reshape(gt_uvd_pts, (batch_size,-1,3))),
+                             collections=['val_summaries'])
+        if FLAGS.debug_level > 1:
+            tf.summary.image('gt_hm/', tf_heatmap_wrapper(gt_hms[:,:,:,0]), 
+                             collections=['val_summaries'])
+            tf.summary.image('gt_hm3', tf_heatmap_wrapper(gt_hm3s[:,:,:,0]),
+                             collections=['val_summaries'])
+            tf.summary.image('val_hm/', tf_heatmap_wrapper(est_hms[:,:,:,0]), 
+                             collections=['val_summaries'])
+            tf.summary.image('val_hm3', tf_heatmap_wrapper(est_hm3s[:,:,:,0]),
+                             collections=['val_summaries'])
+            tf.summary.image('val_dm/', dms, collections=['val_summaries'])
+            tf.summary.image('val_pts_hm/', tf_jointplot_wrapper(tf.squeeze(dms,axis=-1), 
+                                                              tf.reshape(hm_uvd_pts, (batch_size,-1,3))),
+                             collections=['val_summaries'])
+
+        if FLAGS.debug_level > 2:
+            tf.summary.image('gt_xy', tf_heatmap_wrapper(gt_xy_angle[:,:,:,0]),
+                             collections=['val_summaries'])
+            tf.summary.image('gt_z', tf_heatmap_wrapper(gt_z_angle[:,:,:,0]),
+                            collections=['val_summaries'])
+            tf.summary.image('val_xy', tf_heatmap_wrapper(est_xy_angle[:,:,:,0]),
+                             collections=['val_summaries'])
+            tf.summary.image('val_z', tf_heatmap_wrapper(est_z_angle[:,:,:,0]),
+                            collections=['val_summaries'])
+
+
+        self.val_summary_op = tf.summary.merge_all(key='val_summaries')
+
+        # interface to fetch output
+        self.uvd_pts = uvd_pts
+        self.xyz_pts = xyz_pts
+        self.val_dms = dms
+        self.est_hms = est_hms
+        self.gt_pose = poses
+        print('testing graph is established')
+
+    @property
+    def is_validate(self):
+        return True if self._val_dataset else False
+
+    @property
+    def name(self):
+        return '%s_%s'%(self._model_desc, self._net_desc)
+
+    @property
+    def train_dir(self):
+        return os.path.join(self._base_dir, self.name)
+
+    @property
+    def summary_dir(self):
+        return os.path.join(self.train_dir, 'summary')
+
+    def _mean_shift(self, can_pts, num_it=10, band_width=0.8):
+        '''mean shift over the candidate point
+        Args:
+            can_pts: candidate points, (b,j,n,3)
+            num_it: number of iterations
+            band_width: bandwidth of the kernel
+        Returns:
+            centers: the density maximal points
+        '''
+        def joint_fn(can_pt):
+            '''iteration over joint
+            Args:
+                can_pt: (n,3)
+            Returns:
+                center: (3)
+            '''
+            # initialization
+            num_quan = 2.0
+            quan_pt = tf.clip_by_value((can_pt+1.0)*num_quan, 0, 2*num_quan-0.1)
+            quan_pt = tf.to_int64(quan_pt)
+
+            quan_hm = tf.scatter_nd(quan_pt, tf.ones(num_pt,), 
+                                    (int(2*num_quan),int(2*num_quan),int(2*num_quan)))  
+            curr_pt = tf.where(tf.equal(quan_hm, tf.reduce_max(quan_hm)))[-1]
+            curr_pt = tf.divide(tf.to_float(curr_pt), num_quan) - 1.0
+            curr_pt += 0.5/num_quan 
+
+            # iteration
+            for _ in range(num_it):
+                s = tf.reduce_sum((can_pt - curr_pt)**2, axis=-1)
+                s = tf.expand_dims(tf.exp(inverse_sigma*s), axis=-1)
+                curr_pt = tf.reduce_sum(tf.multiply(can_pt, s), axis=0)
+                curr_pt = tf.divide(curr_pt, tf.reduce_sum(s))
+                curr_pt = tf.reshape(curr_pt, (1,3))
+
+            curr_pt = tf.reshape(curr_pt, (3,))
+            return curr_pt
+
+        def batch_fn(can_pt):
+            '''iteration over batch
+            Args:
+                can_pt: (j,n,3)
+            Returns:
+                centers: (j,3)
+            '''
+            return tf.map_fn(joint_fn ,can_pt)
+        
+        num_jnt = can_pts.get_shape()[1].value
+        num_pt = can_pts.get_shape()[2].value
+        inverse_sigma = -1.0 / (2*band_width*band_width)
+        
+        return tf.map_fn(batch_fn, can_pts)
+
+    def _generate_candidates(self, hms, xyzs, num_pt):
+        '''generate the candidates to do mean shift, from xyzs
+        Args:
+            hms: estimated heatmaps, (b,h,w,j)
+            xyzs: the xyz points, (b,h,w,j*3)
+            num_pt: the number of candidates
+        Returns:
+            can_pts: candidate points, (b,j,n,3)
+        '''
+        def fn(elems):
+            hm, xyz = elems[0], elems[1]
+            hm = tf.reshape(hm, (-1, jnt_num))
+            xyz = tf.reshape(xyz, (-1, 3*jnt_num))
+
+            hm_list = tf.unstack(hm, axis=-1)
+            xyz_list = tf.unstack(xyz, axis=-1)
+            can_list = []
+
+            for j in range(jnt_num):
+                weights, indices = tf.nn.top_k(hm_list[j], k=num_pt, sorted=True)
+                xx = tf.gather(xyz_list[j*3], indices)
+                yy = tf.gather(xyz_list[j*3+1], indices)
+                zz = tf.gather(xyz_list[j*3+2], indices)
+                can_list.append(tf.stack([xx,yy,zz], axis=1))
+            can_pts = tf.stack(can_list, axis=0)
+            return [can_pts, hms]
+
+        jnt_num = hms.get_shape()[-1].value
+        can_pts, _ = tf.map_fn(fn, [hms, xyzs])
+        return can_pts
+
+    def _get_candidate_weights(self, xyz_pts, coms, cfgs, hms, dms):
+        '''the weights measures how xyz_pts fits the 2d hms estimation and dms observation
+        Args:
+            xyz_pts: the candidate points, (b,j,n,3)
+            coms: centers of mass, (b,3)
+            cfgs: camera configurations, (b,6)
+            hms: estimated 2D heatmap, (b,h,w,j)
+            dms: depth map, (b,h,w,1)
+        Returns:
+            weights: the weights of the corresponding points, (b,j,n,1)
+        '''
+        def fn(elems):
+            xyz_pt, com, cfg, hm, dm = elems[0], elems[1], elems[2], elems[3], elems[4]
+
+            xx,yy,zz = tf.unstack(tf.reshape(xyz_pt,(-1,3)), axis=-1)
+            xyz_pt = tf.reshape(xyz_pt, (-1,))
+
+            xyz_pt = tf.multiply(xyz_pt, data.preprocess.POSE_NORM_RATIO) + tf.tile(com,[jnt_num*pnt_num])
+            xyz_pt = tf.reshape(xyz_pt, (-1,3))
+
+            w_ratio = cfg[4] / out_w
+            h_ratio = cfg[5] / out_h
+            new_cfg = CameraConfig(cfg[0]/w_ratio, cfg[1]/h_ratio,
+                                   cfg[2]/w_ratio, cfg[3]/h_ratio,
+                                   out_w, out_h)
+            uvd_pt = xyz2uvd_op(xyz_pt, new_cfg)
+            uvd_pt = tf.reshape(uvd_pt, (-1, 3))
+            uu, vv, dd = tf.unstack(uvd_pt, axis=-1)
+            uu = tf.to_int32(uu+0.5)
+            vv = tf.to_int32(vv+0.5)
+            jj = tf.tile(tf.expand_dims(tf.range(jnt_num),axis=-1), [1,pnt_num])
+            jj = tf.reshape(jj, (-1,))
+
+            indices = tf.stack([vv,uu,jj], axis=-1)
+            weights = tf.gather_nd(hm, indices)
+            weights = tf.reshape(weights, (jnt_num, pnt_num, 1))
+
+            #we also clip the values of depth 
+            dm = tf.squeeze(dm)
+            dm = tf.divide(dm*data.preprocess.D_RANGE - data.preprocess.D_RANGE*0.5, 
+                           data.preprocess.POSE_NORM_RATIO)
+            indices = tf.stack([vv,uu], axis=-1)
+            od = tf.gather_nd(dm, indices)
+            zz = tf.maximum(zz, od)
+            xyz_pt = tf.stack([xx,yy,zz], axis=-1)
+            xyz_pt = tf.reshape(xyz_pt, (jnt_num, pnt_num, 3))
+
+            return [weights, xyz_pt, cfg, hm, dm]
+        
+        out_h, out_w = self._output_height, self._output_width
+        jnt_num = xyz_pts.get_shape()[1].value
+        pnt_num = xyz_pts.get_shape()[2].value
+        weights, xyz_pts, _, _, _ = tf.map_fn(fn, [xyz_pts, coms, cfgs, hms, dms])
+        return weights, xyz_pts
+
+    def _weighted_mean_shift(self, can_pts, weights, num_it, band_width):
+        '''mean shift over the candidate point
+        Args:
+            can_pts: candidate points, (b,j,n,3)
+            weights: weights of candidate points, (b,j,n,1)
+            num_it: number of iterations
+            band_width: bandwidth of the kernel
+        Returns:
+            centers: the density maximal points
+        '''
+        def joint_fn(elems):
+            '''iteration over joint
+            Args:
+                can_pt: (n,3), elems[0]
+                weight: (n,1), elems[1]
+            Returns:
+                center: (3)
+            '''
+            can_pt, weight = elems[0], elems[1]
+            # initialization
+            num_quan = 2.0
+            quan_pt = tf.clip_by_value((can_pt+1.0)*num_quan, 0, 2*num_quan-0.1)
+            quan_pt = tf.to_int64(quan_pt)
+
+            quan_hm = tf.scatter_nd(quan_pt, tf.squeeze(weight), 
+                                    (int(2*num_quan),int(2*num_quan),int(2*num_quan)))  
+            curr_pt = tf.where(tf.equal(quan_hm, tf.reduce_max(quan_hm)))[-1]
+            curr_pt = tf.divide(tf.to_float(curr_pt), num_quan) - 1.0
+            curr_pt += 0.5/num_quan 
+
+            # iteration
+            for _ in range(num_it):
+                s = tf.reduce_sum((can_pt - curr_pt)**2, axis=-1)
+                s = tf.expand_dims(tf.exp(inverse_sigma*s), axis=-1)
+                s = tf.multiply(s, weight)
+                curr_pt = tf.reduce_sum(tf.multiply(can_pt, s), axis=0)
+                curr_pt = tf.divide(curr_pt, tf.reduce_sum(s))
+                curr_pt = tf.reshape(curr_pt, (1,3))
+
+            curr_pt = tf.reshape(curr_pt, (3,))
+            return [curr_pt, can_pt]
+
+        def batch_fn(elems):
+            '''iteration over batch
+            Args:
+                can_pt: (j,n,3), elems[0]
+                weights: (j,n,1), elems[1]
+            Returns:
+                centers: (j,3)
+            '''
+            return tf.map_fn(joint_fn ,elems)
+        
+        num_jnt = can_pts.get_shape()[1].value
+        num_pt = can_pts.get_shape()[2].value
+        inverse_sigma = -1.0 / (2*band_width*band_width)
+        
+        centers, _ = tf.map_fn(batch_fn, [can_pts, weights])
+        return centers
+    
+    def _xyz_estimation(self, hms, oms, hm3s, dms, cfgs, coms):
+        '''use meanshift to get the final estimation
+        Args:
+            hms: the heatmap returned from 2D joint detection, (b,h,w,j)
+            oms: the 3D offset maps, (b,h,w,3*j)
+            hm3s: the 3D heaetmap, (b,h,w,j)
+            dms: the normalized depth map, (b,h,w,1)
+            cfgs: camera configurations, (b,6)
+        Returns:
+            xyz_pts: the estimated 3d joint, (b,3*j)
+        '''
+        # get dense joint estimation
+        jnt_num = hms.get_shape()[-1].value
+        xyzs = generate_xyzs_from_multi_cfgs(dms, cfgs, coms)
+        xyzs = tf.tile(xyzs, [1,1,1,self._jnt_num])
+        orig_xyzs= xyzs
+
+        xyzs = xyzs + oms
+
+        # get the weight map for candidate selection
+        # refined_hms = tf.multiply(hms, hm3s)
+        refined_hms = tf.multiply(hms+1.0, hm3s)
+        # refined_hms = hm3s
+        # refined_hms = hms
+        dms_mask = tf.where(tf.less(dms, -0.99), tf.zeros_like(dms), tf.ones_like(dms))
+        refined_hms = tf.multiply(refined_hms, dms_mask)
+
+        num_pt = 5
+        can_pts = self._generate_candidates(refined_hms, xyzs, num_pt=num_pt)
+
+        # weighted scheme
+        can_weights, _ = self._get_candidate_weights(can_pts, coms, cfgs, hms, dms)
+        xyz_pts = self._weighted_mean_shift(can_pts, can_weights, num_it=10, band_width=0.4)
+
+        # unweighted scheme
+        # xyz_pts = self._mean_shift(can_pts, num_it=10, band_width=0.4)
+
+        # for visualization
+        ori_pts = self._generate_candidates(refined_hms, orig_xyzs, num_pt=num_pt)
+
+        self.can_pts = can_pts
+        self.ori_pts = ori_pts 
+        return xyz_pts
+
+
+    def _uvd_estimation_op(self, hms, ds):
+        ''' find the argmax from heatmaps and corresponding depth maps, and get the final estimation
+        Args:
+            hms: the heatmap with the same size as the initial captured image by camera
+            ds: the depth value of the coresponding points
+        Returns:
+            the uvd points of the joint
+        '''
+        width = hms.shape[2]
+
+        def fn(elems):
+            hough_hm, hough_dm = elems[0], elems[1]
+            uvd_pts = []
+
+            hough_hm_list = tf.unstack(hough_hm, axis=-1)
+            hough_dm_list = tf.unstack(hough_dm, axis=-1)
+            for j in range(self._jnt_num):
+                hh = hough_hm_list[j]
+                hd = hough_dm_list[j]
+
+                idx = tf.where(tf.equal(hh, tf.reduce_max(hh)))
+                dd = tf.gather_nd(hd, idx)
+
+                uu, vv, dd = tf.cast(idx[0][1],tf.float32), tf.cast(idx[0][0], tf.float32), dd[0]
+                uvd_pts.append(tf.stack([uu,vv,dd]))
+            return [tf.concat(uvd_pts, axis=-1), ds]
+        return tf.map_fn(fn, [hms, ds])
+
+    def do_test(self, sess, summary_writer, step, names=None):
+        '''execute computation of the inference
+        a fast version of inference
+        '''
+        # during training
+        if names is None:
+            f = open(self._log_path, 'a')
+            summary_str, gt_vals, xyz_vals = sess.run(
+                [self.val_summary_op, self.gt_pose, self.xyz_pts])
+            summary_writer.add_summary(summary_str, step)
+
+            maxJntError=[]
+            f.write('[%s] step %d\n'%(datetime.now(), step))
+            for xyz_val, gt_val in zip(xyz_vals, gt_vals):
+                maxJntError.append(Evaluation.maxJntError(xyz_val, gt_val))
+                diff = (xyz_val-gt_val).reshape(-1,3)
+                dist = alg.norm(diff, axis=1).reshape(-1,1)
+                error_mat = np.concatenate((diff, dist), axis=1)
+                print(error_mat)
+                f.write(np.array_str(error_mat)+'\n')
+            print('validate error:', maxJntError)
+            f.write('validation error: {}\n'.format(maxJntError))
+            f.flush()
+            f.close()
+            return
+
+        if step%100 == 0:
+            summary_str, xyz_vals, gt_vals, names = sess.run(
+                [self.val_summary_op, self.xyz_pts, self.gt_pose, names])
+            summary_writer.add_summary(summary_str, step)
+
+            maxJntError=[]
+            for xyz_val, gt_val in zip(xyz_vals, gt_vals):
+                maxJntError.append(Evaluation.maxJntError(xyz_val, gt_val))
+                diff = (xyz_val-gt_val).reshape(-1,3)
+                dist = alg.norm(diff, axis=1).reshape(-1,1)
+                print(np.concatenate((diff, dist), axis=1))
+            print('[step: %d]test error:'%step, maxJntError)
+            print('---\n')
+            return gt_vals, xyz_vals, names
+
+        gt_vals, xyz_vals, names = sess.run([self.gt_pose, self.xyz_pts, names])
+        return gt_vals, xyz_vals, names
+
+'''unit test
+'''
+def run_train(dataset, val_dataset, restore_step=None):
+    net_module_name = 'network.'+FLAGS.net_module
+
+    net_module = importlib.import_module(net_module_name, package=None)
+    net = net_module.detect_net
+    net_name = net_module.TOWER_NAME
+
+    model = JointDetectionModel(dataset, net, epoch=FLAGS.epoch, net_desc=net_name,
+                               val_dataset = val_dataset) 
+    train(model, restore_step)
+
+def run_test(train_dataset, test_dataset, selected_step=None):
+    net_module_name = 'network.'+FLAGS.net_module
+
+    net_module = importlib.import_module(net_module_name, package=None)
+    net = net_module.detect_net
+    net_name = net_module.TOWER_NAME
+
+    model = JointDetectionModel(train_dataset, net, epoch=FLAGS.epoch, net_desc=net_name,
+                               val_dataset = test_dataset)
+
+    test(model, selected_step)
+
+if __name__ == '__main__':
+    if FLAGS.dataset == 'bighand':
+        import data.bigHand
+        dataset = data.bigHand.BigHandDataset('training')
+        val_dataset = data.bigHand.BigHandDataset('testing')
+
+    elif FLAGS.dataset == 'nyu':
+        import data.nyu
+        dataset = data.nyu.NyuDataset('training')
+        val_dataset = data.nyu.NyuDataset('testing')
+
+    elif FLAGS.dataset == 'icvl':
+        import data.icvl
+        dataset = data.icvl.IcvlDataset('training')
+        val_dataset = data.icvl.IcvlDataset('testing')
+
+    elif FLAGS.dataset == 'msra':
+        import data.msra
+        dataset = data.msra.MsraDataset('training', FLAGS.pid)
+        val_dataset = data.msra.MsraDataset('testing', FLAGS.pid)
+
+    if FLAGS.is_train:
+        run_train(dataset, val_dataset)
+    else:
+        run_test(dataset, val_dataset, -1)
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/test_model.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/test_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5f06073e93b67a561feee819475157b5eab28cd
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/test_model.py
@@ -0,0 +1,94 @@
+from __future__ import print_function, absolute_import, division
+
+import gpu_config
+import tensorflow as tf
+import network.slim as slim
+import numpy as np
+import time, os
+import cv2
+from datetime import datetime
+from data.evaluation import Evaluation
+
+FLAGS = tf.app.flags.FLAGS
+
+def test(model, selected_step):
+    with tf.Graph().as_default():
+        total_test_num = model.val_dataset.exact_num
+
+        dms, poses, cfgs, coms, names = model.batch_input_test(model.val_dataset)
+        model.test(dms, poses, cfgs, coms, reuse_variables=None)
+
+        # dms, poses, names = model.batch_input_test(model.val_dataset)
+        # model.test(dms, poses, reuse_variables=None)
+
+        sess = tf.Session(config=tf.ConfigProto(
+            allow_soft_placement=True,
+            log_device_placement=False))
+
+        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
+        sess.run(init_op)
+
+        if selected_step is not None:
+            checkpoint_path = os.path.join(model.train_dir, 'model.ckpt-%d'%selected_step)
+            saver = tf.train.Saver(tf.global_variables())
+            saver.restore(sess, checkpoint_path)
+            print('[test_model]model has been resotored from %s'%checkpoint_path)
+
+        tf.train.start_queue_runners(sess=sess)
+        summary_writer = tf.summary.FileWriter(
+            model.summary_dir+'_'+model.val_dataset.subset,
+            graph=sess.graph)
+        
+        res_path = os.path.join(model.train_dir, '%s-%s-result'%(model.val_dataset.subset, datetime.now()))
+        res_path = res_path.replace(' ', '_')
+
+        res_txt_path = res_path+'.txt'
+        if os.path.exists(res_txt_path):
+            os.remove(res_txt_path)
+        err_path = res_path+'_error.txt'
+        f = open(res_txt_path, 'w')
+
+        # res_vid_path = res_path+'.avi'
+        # codec = cv2.cv.CV_FOURCC('X','V','I','D')
+        # the output size is defined by the visualization tool of matplotlib
+        # vid = cv2.VideoWriter(res_vid_path, codec, 25, (640, 480))
+        
+        print('[test_model]begin test')
+        test_num = 0
+        step = 0
+        maxJntError = []
+        while True:
+            start_time = time.time()
+            try:
+                gt_vals, xyz_vals, name_vals = model.do_test(sess, summary_writer, step, names)
+            except tf.errors.OutOfRangeError:
+                print('run out of range')
+                break
+
+            duration = time.time()-start_time
+            
+            for xyz_val, gt_val, name_val in zip(xyz_vals, gt_vals, name_vals):
+                maxJntError.append(Evaluation.maxJntError(xyz_val, gt_val))
+
+                xyz_val = xyz_val.tolist()
+                res_str = '%s\t%s\n'%(name_val, '\t'.join(format(pt, '.4f') for pt in xyz_val))
+                res_str = res_str.replace('/', '\\')
+                f.write(res_str)
+                # vid.write(vis_val)
+                test_num += 1
+                if test_num >= total_test_num:
+                    print('finish test')
+                    f.close()
+                    Evaluation.plotError(maxJntError, err_path)
+                    return
+            f.flush()
+            
+            if step%101 == 0:
+                print('[%s]: %d/%d computed, with %.2fs'%(datetime.now(), step, model.max_steps, duration))
+
+            step += 1
+
+
+        print('finish test')
+        f.close()
+        Evaluation.plotError(maxJntError, 'result.txt')
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_multi_gpu.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_multi_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..45866a8e3a72011b1001bb6b696bcc45bd5ff765
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_multi_gpu.py
@@ -0,0 +1,159 @@
+'''provide a multi-thread training scheme
+WARNING: this file is still under development, is not guaranteed to work.
+'''
+from __future__ import print_function, absolute_import, division
+
+import gpu_config
+import tensorflow as tf
+import network.slim as slim
+import numpy as np
+import time, os
+from datetime import datetime
+import model.memory_util as memory_util
+
+FLAGS = tf.app.flags.FLAGS
+
+def _average_gradients(tower_grads):
+    '''calcualte the average gradient for each shared variable across all towers on multi gpus
+    Args:
+        tower_grads: list of lists of (gradient, variable) tuples. len(tower_grads)=#tower, len(tower_grads[0])=#vars 
+    Returns:
+        List of paris (gradient, variable) where the gradients has been averaged across
+        all towers
+    '''
+    average_grads = []
+    for grad_and_vars in zip(*tower_grads):
+        # over different variables
+        grads = []
+        for g, _ in grad_and_vars:
+            # over different towers
+            expanded_g = tf.expand_dims(g,0)
+            grads.append(expanded_g)
+
+        grad = tf.concat(axis=0, values=grads)
+        grad = tf.reduce_mean(grad, 0)
+
+        v = grad_and_vars[0][1]
+        grad_and_var = (grad, v)
+        average_grads.append(grad_and_var)
+    return average_grads
+
+def train(model):
+    '''train the provided model
+    model: provide several required interface to train
+    '''
+    with tf.Graph().as_default(), tf.device('/cpu:0'):
+        global_step = tf.get_variable(
+            'global_step', [],
+            initializer=tf.constant_initializer(0), trainable=False)
+        lr = tf.train.exponential_decay(model.init_lr,
+                                       global_step,
+                                       model.decay_steps,
+                                       model.lr_decay_factor,
+                                       staircase=True)
+        opt = model.opt(lr)
+        
+        '''split the batch into num_gpus groups, 
+        do the backpropagation on each gpu seperately,
+        then average the gradidents on each of which and update
+        '''
+        assert FLAGS.batch_size % FLAGS.num_gpus == 0, (
+            'the batch_size should be divisible wrt num_gpus')
+        dms, poses = model.batch_input
+        dm_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=dms)
+        pose_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=poses)
+
+        # calculate the gradients for each gpu
+        tower_grads = []
+        reuse_variables = None
+
+        for i in range(FLAGS.num_gpus):
+            # i = 1
+            # with tf.device('/gpu:%d'%gpu_config.gpu_list[i]):
+            with tf.device('gpu:%d'%i):
+                with tf.name_scope('%s_%d'%(model.name, i)) as scope:
+                    with slim.arg_scope([slim.variables.variable], device='/cpu:0'):
+                    # with slim.arg_scope([slim.variables.variable], device='/gpu:%d'%gpu_config.gpu_list[i]):
+                        loss = model.loss(dm_splits[i], pose_splits[i], reuse_variables)
+
+                    # tf.get_variable_scope().reuse_variables()
+                    # reuse variables after the first tower
+                    reuse_variables = True
+                    # only retain the summaries for the last tower
+                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
+                    # retain the batch-norm optimization only from the last tower
+                    batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION,
+                                                         scope)
+
+                    grads = opt.compute_gradients(loss)
+                    tower_grads.append(grads)
+                    print('setup %dth gpu on %d'%(i, gpu_config.gpu_list[i]))
+
+        grads = _average_gradients(tower_grads)
+
+        # TODO: add input summaries
+        # summaries.extend(input_summaries)
+
+        summaries.append(tf.summary.scalar('learning_rate', lr))
+
+        for grad, var in grads:
+            if grad is not None:
+                summaries.append(tf.summary.histogram(var.op.name+'/gradients', grad))
+
+        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+        for var in tf.trainable_variables():
+            summaries.append(tf.summary.histogram(var.op.name, var))
+
+        variable_averages = tf.train.ExponentialMovingAverage(
+            model.moving_average_decay, global_step)
+        variables_to_average = (tf.trainable_variables()+
+                                tf.moving_average_variables())
+        variable_averages_op = variable_averages.apply(variables_to_average)
+
+        batchnorm_update_op = tf.group(*batchnorm_updates)
+        # group all training operations into one 
+        train_op = tf.group(apply_gradient_op, variable_averages_op, batchnorm_update_op)
+
+        saver = tf.train.Saver(tf.global_variables())
+        summary_op = tf.summary.merge(summaries)
+        init_op = tf.global_variables_initializer()
+
+        memory_util.vlog(1)
+
+        sess = tf.Session(config=tf.ConfigProto(
+            allow_soft_placement=True,
+            log_device_placement=False))
+
+        sess.run(init_op)
+        tf.train.start_queue_runners(sess=sess)
+
+        summary_writer = tf.summary.FileWriter(
+            model.train_dir,
+            graph=sess.graph)
+        
+        # finally into the training loop
+        print('finally into the long long training loop')
+
+        # for step in range(model.max_steps):
+        for step in range(1000):
+            start_time = time.time()
+            _, loss_value = sess.run([train_op, loss])
+            duration = time.time() - start_time
+
+            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
+
+            if step%10 == 0:
+                format_str = '[model/train_multi_gpu] %s: step %d, loss = %.2f, %.3f sec/batch, %.3f sec/sample'
+                print(format_str %(datetime.now(), step, loss_value, duration, duration/FLAGS.batch_size))
+
+            if step%100 == 0:
+                summary_str = sess.run(summary_op)
+                summary_writer.add_summary(summary_str, step)
+
+            if step%1000 == 0 or (step+1) == model.max_steps:
+                checkpoint_path = os.path.join(model.train_dir, 'model.ckpt')
+                saver.save(sess, checkpoint_path, global_step=step)
+
+        print('finish train')
+
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_single_gpu.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_single_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e72e3f4c260e5f1e9c1d1b7dba877867c057828
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/model/train_single_gpu.py
@@ -0,0 +1,179 @@
+from __future__ import print_function, absolute_import, division
+
+import gpu_config
+import tensorflow as tf
+import network.slim as slim
+import numpy as np
+import time, os
+from datetime import datetime
+
+FLAGS = tf.app.flags.FLAGS
+
+def _average_gradients(tower_grads):
+    '''calcualte the average gradient for each shared variable across all towers on multi gpus
+    Args:
+        tower_grads: list of lists of (gradient, variable) tuples. len(tower_grads)=#tower, len(tower_grads[0])=#vars 
+    Returns:
+        List of paris (gradient, variable) where the gradients has been averaged across
+        all towers
+    '''
+    average_grads = []
+    for grad_and_vars in zip(*tower_grads):
+        # over different variables
+        grads = []
+        for g, _ in grad_and_vars:
+            # over different towers
+            expanded_g = tf.expand_dims(g,0)
+            grads.append(expanded_g)
+
+        grad = tf.concat(axis=0, values=grads)
+        grad = tf.reduce_mean(grad, 0)
+
+        v = grad_and_vars[0][1]
+        grad_and_var = (grad, v)
+        average_grads.append(grad_and_var)
+    return average_grads
+
+def train(model, restore_step=None):
+    '''train the provided model
+    model: provide several required interface to train
+    '''
+    with tf.Graph().as_default():
+        global_step = tf.get_variable(
+            'global_step', [],
+            initializer=tf.constant_initializer(0), trainable=False)
+        lr = tf.train.exponential_decay(model.init_lr,
+                                       global_step,
+                                       model.decay_steps,
+                                       model.lr_decay_factor,
+                                       staircase=True)
+
+        print('[train] learning rate decays per %d steps with rate=%f'%(
+            model.decay_steps,model.lr_decay_factor))
+        print('[train] initial learning_rate = %f'%model.init_lr)
+        tf.summary.scalar('learning_rate', lr)
+        opt = model.opt(lr)
+        
+        batches = model.batch_input(model.train_dataset)
+
+        loss = model.loss(*batches)
+        tf.summary.scalar('loss', loss)
+
+        if model.is_validate:
+            # set batch_size as 3 since tensorboard visualization
+            val_batches = model.batch_input(model.val_dataset, 3)
+            model.test(*val_batches) # don't need the name
+
+        batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION)
+
+        accu_steps = float(FLAGS.sub_batch)
+
+        grads = opt.compute_gradients(loss)
+        accum_grads = []
+        for grad, var in grads:
+            if grad is not None:
+                accum_grads.append(tf.Variable(tf.zeros_like(grad), trainable=False,
+                                    collections=[tf.GraphKeys.LOCAL_VARIABLES],
+                                    name=var.op.name+'_accu_grad'))
+            else:
+                accum_grads.append(tf.Variable(tf.zeros_like(var), trainable=False,
+                                    collections=[tf.GraphKeys.LOCAL_VARIABLES],
+                                    name=var.op.name+'_accu_grad'))
+
+        reset_op = [grad.assign(tf.zeros_like(grad)) for grad in accum_grads]
+        accum_op = [accum_grads[i].assign_add(grad[0]) for i, grad in enumerate(grads)if grad[0] is not None]
+
+        ave_grad = [(tf.clip_by_value(tf.divide(accum_grads[i], accu_steps), -0.2, 0.2),
+                     grad[1]) for i, grad in enumerate(grads)]
+        apply_gradient_op = opt.apply_gradients(ave_grad, 
+                                                global_step=global_step)
+
+        for ave_grad, grad_and_var in zip(ave_grad, grads):
+            grad, var = grad_and_var[0], grad_and_var[1]
+            if grad is not None:
+                tf.summary.histogram(var.op.name, var)
+                tf.summary.histogram(var.op.name+'/gradients', ave_grad)
+
+        # variable_averages = tf.train.ExponentialMovingAverage(
+            # model.moving_average_decay, global_step)
+        # variables_to_average = tf.trainable_variables()
+        # var_1, var_2 = tf.moving_average_variables()[0], tf.moving_average_variables()[1]
+        # variable_averages_op = variable_averages.apply(variables_to_average)
+
+        batchnorm_update_op = tf.group(*batchnorm_updates)
+        # group all training operations into one 
+        # train_op = tf.group(apply_gradient_op, variable_averages_op)
+        train_op = tf.group(apply_gradient_op)
+        
+        saver = tf.train.Saver(tf.global_variables())
+        summary_op = tf.summary.merge_all()
+
+        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
+
+        sess = tf.Session(config=tf.ConfigProto(
+            allow_soft_placement=True,
+            log_device_placement=False))
+
+        sess.run(init_op)
+        start_step = 0
+        # to resume the training
+        if restore_step is not None and restore_step>0:
+            checkpoint_path = os.path.join(model.train_dir, 'model.ckpt-%d'%restore_step)
+            saver.restore(sess, checkpoint_path)
+            start_step = restore_step
+
+        tf.train.start_queue_runners(sess=sess)
+
+        #TODO: change to tf.train.SummaryWriter()
+        summary_writer = tf.summary.FileWriter(
+            model.summary_dir,
+            graph=sess.graph)
+
+        # finally into the training loop
+        print('finally into the long long training loop')
+
+        log_path = os.path.join(model.train_dir, 'training_log.txt')
+        f = open(log_path, 'a')
+
+        for step in range(start_step, model.max_steps):
+            if f.closed:
+                f = open(log_path, 'a')
+
+            start_time = time.time()
+            ave_loss = 0
+            sess.run(reset_op)
+            for sub_step in range(int(accu_steps)):
+                _, _, loss_value = sess.run([accum_op, batchnorm_update_op, loss])
+                assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
+                ave_loss += loss_value
+
+            _ = sess.run([train_op])
+            ave_loss /= accu_steps
+            duration = time.time() - start_time
+
+            if step%5 == 0:
+                format_str = '[model/train_multi_gpu] %s: step %d/%d, loss = %.3f, %.3f sec/batch, %.3f sec/sample'
+                print(format_str%(datetime.now(), step, model.max_steps, ave_loss, duration, duration/(FLAGS.batch_size*accu_steps)))
+                f.write(format_str%(datetime.now(), step, model.max_steps, ave_loss, duration, duration/(FLAGS.batch_size*accu_steps))+'\n')
+                f.flush()
+
+            if step%20 == 0:
+                summary_str = sess.run(summary_op)
+                summary_writer.add_summary(summary_str, step)
+
+
+            if step%40 == 0 and hasattr(model, 'do_test'):
+                model.do_test(sess, summary_writer, step)
+
+            if step%100 == 0 or (step+1) == model.max_steps:
+                if not os.path.exists(model.train_dir):
+                    os.makedirs(model.train_dir)
+                checkpoint_path = os.path.join(model.train_dir, 'model.ckpt')
+                saver.save(sess, checkpoint_path, global_step=step)
+                print('model has been saved to %s\n'%checkpoint_path)
+                f.write('model has been saved to %s\n'%checkpoint_path)
+                f.flush()
+
+        print('finish train')
+        f.close()
+
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/.keep b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/__init__.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..26ca233c929c1bac6f1ee07145fe59658711c502
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/__init__.py
@@ -0,0 +1,25 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TF-Slim grouped API. Please see README.md for details and usage."""
+# pylint: disable=unused-import
+
+# Collapse tf-slim into a single namespace.
+from __future__ import absolute_import
+
+from network.slim import losses
+from network.slim import ops
+from network.slim import scopes
+from network.slim import variables
+from network.slim.scopes import arg_scope
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/losses.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..78298d092fab3afc264e427fb060602c27ea97b0
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/losses.py
@@ -0,0 +1,174 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains convenience wrappers for various Neural Network TensorFlow losses.
+
+  All the losses defined here add themselves to the LOSSES_COLLECTION
+  collection.
+
+  l1_loss: Define a L1 Loss, useful for regularization, i.e. lasso.
+  l2_loss: Define a L2 Loss, useful for regularization, i.e. weight decay.
+  cross_entropy_loss: Define a cross entropy loss using
+    softmax_cross_entropy_with_logits. Useful for classification.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+# In order to gather all losses in a network, the user should use this
+# key for get_collection, i.e:
+#   losses = tf.get_collection(slim.losses.LOSSES_COLLECTION)
+LOSSES_COLLECTION = '_losses'
+
+
+def l1_regularizer(weight=1.0, scope=None):
+  """Define a L1 regularizer.
+
+  Args:
+    weight: scale the loss by this factor.
+    scope: Optional scope for name_scope.
+
+  Returns:
+    a regularizer function.
+  """
+  def regularizer(tensor):
+    with tf.name_scope(scope, 'L1Regularizer', [tensor]):
+      l1_weight = tf.convert_to_tensor(weight,
+                                       dtype=tensor.dtype.base_dtype,
+                                       name='weight')
+      return tf.multiply(l1_weight, tf.reduce_sum(tf.abs(tensor)), name='value')
+  return regularizer
+
+
+def l2_regularizer(weight=1.0, scope=None):
+  """Define a L2 regularizer.
+
+  Args:
+    weight: scale the loss by this factor.
+    scope: Optional scope for name_scope.
+
+  Returns:
+    a regularizer function.
+  """
+  def regularizer(tensor):
+    with tf.name_scope(scope, 'L2Regularizer', [tensor]):
+      l2_weight = tf.convert_to_tensor(weight,
+                                       dtype=tensor.dtype.base_dtype,
+                                       name='weight')
+      return tf.multiply(l2_weight, tf.nn.l2_loss(tensor), name='value')
+  return regularizer
+
+
+def l1_l2_regularizer(weight_l1=1.0, weight_l2=1.0, scope=None):
+  """Define a L1L2 regularizer.
+
+  Args:
+    weight_l1: scale the L1 loss by this factor.
+    weight_l2: scale the L2 loss by this factor.
+    scope: Optional scope for name_scope.
+
+  Returns:
+    a regularizer function.
+  """
+  def regularizer(tensor):
+    with tf.name_scope(scope, 'L1L2Regularizer', [tensor]):
+      weight_l1_t = tf.convert_to_tensor(weight_l1,
+                                         dtype=tensor.dtype.base_dtype,
+                                         name='weight_l1')
+      weight_l2_t = tf.convert_to_tensor(weight_l2,
+                                         dtype=tensor.dtype.base_dtype,
+                                         name='weight_l2')
+      reg_l1 = tf.multiply(weight_l1_t, tf.reduce_sum(tf.abs(tensor)),
+                      name='value_l1')
+      reg_l2 = tf.multiply(weight_l2_t, tf.nn.l2_loss(tensor),
+                      name='value_l2')
+      return tf.add(reg_l1, reg_l2, name='value')
+  return regularizer
+
+
+def l1_loss(tensor, weight=1.0, scope=None):
+  """Define a L1Loss, useful for regularize, i.e. lasso.
+
+  Args:
+    tensor: tensor to regularize.
+    weight: scale the loss by this factor.
+    scope: Optional scope for name_scope.
+
+  Returns:
+    the L1 loss op.
+  """
+  with tf.name_scope(scope, 'L1Loss', [tensor]):
+    weight = tf.convert_to_tensor(weight,
+                                  dtype=tensor.dtype.base_dtype,
+                                  name='loss_weight')
+    loss = tf.multiply(weight, tf.reduce_sum(tf.abs(tensor)), name='value')
+    tf.add_to_collection(LOSSES_COLLECTION, loss)
+    return loss
+
+
+def l2_loss(tensor, weight=1.0, scope=None):
+  """Define a L2Loss, useful for regularize, i.e. weight decay.
+
+  Args:
+    tensor: tensor to regularize.
+    weight: an optional weight to modulate the loss.
+    scope: Optional scope for name_scope.
+
+  Returns:
+    the L2 loss op.
+  """
+  with tf.name_scope(scope, 'L2Loss', [tensor]):
+    weight = tf.convert_to_tensor(weight,
+                                  dtype=tensor.dtype.base_dtype,
+                                  name='loss_weight')
+    loss = tf.multiply(weight, tf.nn.l2_loss(tensor), name='value')
+    tf.add_to_collection(LOSSES_COLLECTION, loss)
+    return loss
+
+
+def cross_entropy_loss(logits, one_hot_labels, label_smoothing=0,
+                       weight=1.0, scope=None):
+  """Define a Cross Entropy loss using softmax_cross_entropy_with_logits.
+
+  It can scale the loss by weight factor, and smooth the labels.
+
+  Args:
+    logits: [batch_size, num_classes] logits outputs of the network .
+    one_hot_labels: [batch_size, num_classes] target one_hot_encoded labels.
+    label_smoothing: if greater than 0 then smooth the labels.
+    weight: scale the loss by this factor.
+    scope: Optional scope for name_scope.
+
+  Returns:
+    A tensor with the softmax_cross_entropy loss.
+  """
+  logits.get_shape().assert_is_compatible_with(one_hot_labels.get_shape())
+  with tf.name_scope(scope, 'CrossEntropyLoss', [logits, one_hot_labels]):
+    num_classes = one_hot_labels.get_shape()[-1].value
+    one_hot_labels = tf.cast(one_hot_labels, logits.dtype)
+    if label_smoothing > 0:
+      smooth_positives = 1.0 - label_smoothing
+      smooth_negatives = label_smoothing / num_classes
+      one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives
+    cross_entropy = tf.contrib.nn.deprecated_flipped_softmax_cross_entropy_with_logits(
+        logits, one_hot_labels, name='xentropy')
+
+    weight = tf.convert_to_tensor(weight,
+                                  dtype=logits.dtype.base_dtype,
+                                  name='loss_weight')
+    loss = tf.multiply(weight, tf.reduce_mean(cross_entropy), name='value')
+    tf.add_to_collection(LOSSES_COLLECTION, loss)
+    return loss
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/ops.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e047cc85351b96e98af9a2ae13a526ea40e0f76
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/ops.py
@@ -0,0 +1,781 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains convenience wrappers for typical Neural Network TensorFlow layers.
+
+   Additionally it maintains a collection with update_ops that need to be
+   updated after the ops have been computed, for example to update moving means
+   and moving variances of batch_norm.
+
+   Ops that have different behavior during training or eval have an is_training
+   parameter. Additionally Ops that contain variables.variable have a trainable
+   parameter, which control if the ops variables are trainable or not.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import tensorflow as tf
+
+from tensorflow.python.training import moving_averages
+
+from network.slim import losses
+from network.slim import scopes
+from network.slim import variables
+
+# Used to keep the update ops done by batch_norm.
+UPDATE_OPS_COLLECTION = '_update_ops_'
+
+
+# the batch_norm here is batch_renorm implementation instead of batch norm
+@scopes.add_arg_scope
+def batch_norm(inputs,
+               decay=0.999,
+               center=True,
+               scale=False,
+               epsilon=0.001,
+               moving_vars='moving_vars',
+               activation=None,
+               is_training=True,
+               trainable=True,
+               restore=True,
+               scope=None,
+               reuse=None):
+  """Adds a Batch ReNormalization layer.
+
+  Args:
+    inputs: a tensor of size [batch_size, height, width, channels]
+            or [batch_size, channels].
+    decay: decay for the moving average.
+    center: If True, subtract beta. If False, beta is not created and ignored.
+    scale: If True, multiply by gamma. If False, gamma is
+      not used. When the next layer is linear (also e.g. ReLU), this can be
+      disabled since the scaling can be done by the next layer.
+    epsilon: small float added to variance to avoid dividing by zero.
+    moving_vars: collection to store the moving_mean and moving_variance.
+    activation: activation function.
+    is_training: whether or not the model is in training mode.
+    trainable: whether or not the variables should be trainable or not.
+    restore: whether or not the variables should be marked for restore.
+    scope: Optional scope for variable_scope.
+    reuse: whether or not the layer and its variables should be reused. To be
+      able to reuse the layer scope must be given.
+
+  Returns:
+    a tensor representing the output of the operation.
+
+  """
+  inputs_shape = inputs.get_shape()
+  with tf.variable_scope(scope, 'BatchReNorm', [inputs], reuse=reuse):
+    axis = list(range(len(inputs_shape) - 1))
+    params_shape = inputs_shape[-1:]
+    # Allocate parameters for the beta and gamma of the normalization.
+    beta, gamma = None, None
+    if center:
+      beta = variables.variable('beta',
+                                params_shape,
+                                initializer=tf.zeros_initializer(),
+                                trainable=trainable,
+                                restore=restore)
+    if scale:
+      gamma = variables.variable('gamma',
+                                 params_shape,
+                                 initializer=tf.ones_initializer(),
+                                 trainable=trainable,
+                                 restore=restore)
+    # Create moving_mean and moving_variance add them to
+    # GraphKeys.MOVING_AVERAGE_VARIABLES collections.
+    moving_collections = [moving_vars, tf.GraphKeys.MOVING_AVERAGE_VARIABLES]
+    moving_mean = variables.variable('moving_mean',
+                                     params_shape,
+                                     initializer=tf.zeros_initializer(),
+                                     trainable=False,
+                                     restore=restore,
+                                     collections=moving_collections)
+    moving_variance = variables.variable('moving_variance',
+                                         params_shape,
+                                         initializer=tf.ones_initializer(),
+                                         trainable=False,
+                                         restore=restore,
+                                         collections=moving_collections)
+
+    r_max = variables.variable('r_max',
+                              (1,),
+                              initializer=tf.ones_initializer(),
+                              trainable=False,
+                              restore=restore)
+    d_max = variables.variable('d_max',
+                              (1,),
+                              initializer=tf.zeros_initializer(),
+                              trainable=False,
+                              restore=restore)
+    curr_t = variables.variable('curr_t',
+                                (1,),
+                                initializer=tf.zeros_initializer(),
+                                trainable=False,
+                                restore=restore)
+
+    if is_training:
+      # Calculate the moments based on the individual batch.
+      mean, variance = tf.nn.moments(inputs, axis)
+
+      update_moving_mean = moving_averages.assign_moving_average(
+          moving_mean, mean, decay)
+      tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
+      update_moving_variance = moving_averages.assign_moving_average(
+          moving_variance, variance, decay)
+      tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
+
+      r_max_val = 3.0
+      new_r = tf.divide(r_max_val, 1.0+(r_max_val-1.0)*tf.exp(-curr_t))
+      update_r = r_max.assign(new_r)
+      tf.add_to_collection(UPDATE_OPS_COLLECTION, update_r)
+
+      d_max_val = 5.0
+      new_d = tf.divide(d_max_val, (1.0+(d_max_val/1e-3)-1.0)*tf.exp(-2.0*curr_t))
+      update_d = d_max.assign(new_d)
+      tf.add_to_collection(UPDATE_OPS_COLLECTION, update_d)
+
+      new_t = curr_t+1e-5
+      update_t = curr_t.assign(new_t)
+      tf.add_to_collection(UPDATE_OPS_COLLECTION, update_t)
+
+      # batch renorm
+      std = tf.sqrt(variance+epsilon)
+      moving_std = tf.sqrt(moving_variance+epsilon)
+      r = tf.divide(std, moving_std)
+      r = tf.stop_gradient(tf.clip_by_value(r, 1.0/r_max, r_max))
+      
+      d = tf.divide(mean - moving_mean, moving_std)
+      d = tf.stop_gradient(tf.clip_by_value(d, -d_max, d_max))
+      
+      outputs = tf.nn.batch_normalization(
+          inputs, mean, variance, None, None, epsilon) 
+      outputs = tf.multiply(outputs, r) + d
+        
+      if scale:
+        outputs = tf.multiply(outputs, gamma)
+      if center:
+        outputs += beta
+
+    else:
+      # Just use the moving_mean and moving_variance.
+      mean = moving_mean
+      variance = moving_variance
+
+      # Normalize the activations.
+      outputs = tf.nn.batch_normalization(
+        inputs, mean, variance, beta, gamma, epsilon)
+    
+    outputs.set_shape(inputs.get_shape())
+    if activation:
+      outputs = activation(outputs)
+    return outputs
+
+
+def _two_element_tuple(int_or_tuple):
+  """Converts `int_or_tuple` to height, width.
+
+  Several of the functions that follow accept arguments as either
+  a tuple of 2 integers or a single integer.  A single integer
+  indicates that the 2 values of the tuple are the same.
+
+  This functions normalizes the input value by always returning a tuple.
+
+  Args:
+    int_or_tuple: A list of 2 ints, a single int or a tf.TensorShape.
+
+  Returns:
+    A tuple with 2 values.
+
+  Raises:
+    ValueError: If `int_or_tuple` it not well formed.
+  """
+  if isinstance(int_or_tuple, (list, tuple)):
+    if len(int_or_tuple) != 2:
+      raise ValueError('Must be a list with 2 elements: %s' % int_or_tuple)
+    return int(int_or_tuple[0]), int(int_or_tuple[1])
+  if isinstance(int_or_tuple, int):
+    return int(int_or_tuple), int(int_or_tuple)
+  if isinstance(int_or_tuple, tf.TensorShape):
+    if len(int_or_tuple) == 2:
+      return int_or_tuple[0], int_or_tuple[1]
+  raise ValueError('Must be an int, a list with 2 elements or a TensorShape of '
+                   'length 2')
+
+
+@scopes.add_arg_scope
+def conv2d(inputs,
+           num_filters_out,
+           kernel_size,
+           stride=1,
+           padding='SAME',
+           activation=tf.nn.relu,
+           stddev=0.01,
+           bias=0.0,
+           weight_decay=0,
+           batch_norm_params=None,
+           is_training=True,
+           trainable=True,
+           restore=True,
+           scope=None,
+           reuse=None):
+  """Adds a 2D convolution followed by an optional batch_norm layer.
+
+  conv2d creates a variable called 'weights', representing the convolutional
+  kernel, that is convolved with the input. If `batch_norm_params` is None, a
+  second variable called 'biases' is added to the result of the convolution
+  operation.
+
+  Args:
+    inputs: a tensor of size [batch_size, height, width, channels].
+    num_filters_out: the number of output filters.
+    kernel_size: a list of length 2: [kernel_height, kernel_width] of
+      of the filters. Can be an int if both values are the same.
+    stride: a list of length 2: [stride_height, stride_width].
+      Can be an int if both strides are the same.  Note that presently
+      both strides must have the same value.
+    padding: one of 'VALID' or 'SAME'.
+    activation: activation function.
+    stddev: standard deviation of the truncated guassian weight distribution.
+    bias: the initial value of the biases.
+    weight_decay: the weight decay.
+    batch_norm_params: parameters for the batch_norm. If is None don't use it.
+    is_training: whether or not the model is in training mode.
+    trainable: whether or not the variables should be trainable or not.
+    restore: whether or not the variables should be marked for restore.
+    scope: Optional scope for variable_scope.
+    reuse: whether or not the layer and its variables should be reused. To be
+      able to reuse the layer scope must be given.
+  Returns:
+    a tensor representing the output of the operation.
+
+  """
+  with tf.variable_scope(scope, 'Conv', [inputs], reuse=reuse):
+    kernel_h, kernel_w = _two_element_tuple(kernel_size)
+    stride_h, stride_w = _two_element_tuple(stride)
+    num_filters_in = inputs.get_shape()[-1]
+    weights_shape = [kernel_h, kernel_w,
+                     num_filters_in, num_filters_out]
+    weights_initializer = tf.truncated_normal_initializer(stddev=stddev)
+    l2_regularizer = None
+    if weight_decay and weight_decay > 0:
+      l2_regularizer = losses.l2_regularizer(weight_decay)
+    weights = variables.variable('weights',
+                                 shape=weights_shape,
+                                 initializer=weights_initializer,
+                                 regularizer=l2_regularizer,
+                                 trainable=trainable,
+                                 restore=restore)
+    conv = tf.nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1],
+                        padding=padding)
+    if batch_norm_params is not None:
+      with scopes.arg_scope([batch_norm], is_training=is_training,
+                            trainable=trainable, restore=restore):
+        outputs = batch_norm(conv, **batch_norm_params)
+    else:
+      bias_shape = [num_filters_out,]
+      bias_initializer = tf.constant_initializer(bias)
+      biases = variables.variable('biases',
+                                  shape=bias_shape,
+                                  initializer=bias_initializer,
+                                  trainable=trainable,
+                                  restore=restore)
+      outputs = tf.nn.bias_add(conv, biases)
+    if activation:
+      outputs = activation(outputs)
+    return outputs
+
+@scopes.add_arg_scope
+def depthwise_conv2d(inputs,
+           num_filters_out,
+           kernel_size,
+           stride=1,
+           padding='VALID',
+           activation=tf.nn.relu,
+           stddev=0.01,
+           bias=0.0,
+           weight_decay=0,
+           is_norm=False,
+           is_training=True,
+           trainable=True,
+           restore=True,
+           scope=None,
+           reuse=None):
+  """Adds a 2D depth wise convolution followed by an optional batch_norm layer.
+  this applies channels differnt filters to each channel independently
+
+  Args:
+    inputs: a tensor of size [batch_size, height, width, channels].
+    num_filters_out: the number of output filters.
+    kernel_size: a list of length 2: [kernel_height, kernel_width] of
+      of the filters. Can be an int if both values are the same.
+    stride: a list of length 2: [stride_height, stride_width].
+      Can be an int if both strides are the same.  Note that presently
+      both strides must have the same value.
+    padding: one of 'VALID' or 'SAME'.
+    activation: activation function.
+    stddev: standard deviation of the truncated guassian weight distribution.
+    bias: the initial value of the biases.
+    weight_decay: the weight decay.
+    is_training: whether or not the model is in training mode.
+    trainable: whether or not the variables should be trainable or not.
+    restore: whether or not the variables should be marked for restore.
+    scope: Optional scope for variable_scope.
+    reuse: whether or not the layer and its variables should be reused. To be
+      able to reuse the layer scope must be given.
+  Returns:
+    a tensor representing the output of the operation.
+
+  """
+  with tf.variable_scope(scope, 'ConvDepthWise', [inputs], reuse=reuse):
+    kernel_h, kernel_w = _two_element_tuple(kernel_size)
+    stride_h, stride_w = _two_element_tuple(stride)
+    num_filters_in = inputs.get_shape()[-1].value
+    weights_shape = [kernel_h, kernel_w,
+                     num_filters_in, num_filters_out]
+    weights_initializer = tf.truncated_normal_initializer(stddev=stddev)
+    l2_regularizer = None
+    if weight_decay and weight_decay > 0:
+      l2_regularizer = losses.l2_regularizer(weight_decay)
+    weights = variables.variable('weights',
+                                 shape=weights_shape,
+                                 initializer=weights_initializer,
+                                 regularizer=l2_regularizer,
+                                 trainable=trainable,
+                                 restore=restore)
+
+    batch_size = inputs.get_shape()[0].value
+    num_pt = inputs.get_shape()[1].value
+
+    conv = tf.nn.depthwise_conv2d(inputs, weights, [1, stride_h, stride_w, 1],
+                                 padding=padding)
+
+    if is_norm:
+        outputs = tf.reshape(conv, (batch_size*num_pt, num_filters_out, num_filters_in))
+        outputs = batch_norm(outputs, decay=0.999)
+        outputs = tf.reshape(conv, (batch_size, num_pt, num_filters_out, num_filters_in))
+    else:
+        bias_shape = [conv.get_shape()[-1],]
+        bias_initializer = tf.constant_initializer(bias)
+        biases = variables.variable('biases',
+                                    shape=bias_shape,
+                                    initializer=bias_initializer,
+                                    trainable=trainable,
+                                    restore=restore)
+        outputs = tf.nn.bias_add(conv, biases)
+        outputs = tf.reshape(outputs, (batch_size,num_pt,num_filters_out,num_filters_in))
+
+    if activation:
+      outputs = activation(outputs)
+    return outputs
+
+@scopes.add_arg_scope
+def depthwise_conv2d_v1(inputs,
+           num_filters_out,
+           kernel_size,
+           stride=1,
+           padding='VALID',
+           activation=tf.nn.relu,
+           stddev=0.01,
+           bias=0.0,
+           weight_decay=0,
+           batch_norm_params=None,
+           is_training=True,
+           trainable=True,
+           restore=True,
+           scope=None,
+           reuse=None):
+  """Adds a 2D depth wise convolution followed by an optional batch_norm layer.
+  this applies channels differnt filters to each channel independently
+
+  Args:
+    inputs: a tensor of size [batch_size, height, width, channels].
+    num_filters_out: the number of output filters.
+    kernel_size: a list of length 2: [kernel_height, kernel_width] of
+      of the filters. Can be an int if both values are the same.
+    stride: a list of length 2: [stride_height, stride_width].
+      Can be an int if both strides are the same.  Note that presently
+      both strides must have the same value.
+    padding: one of 'VALID' or 'SAME'.
+    activation: activation function.
+    stddev: standard deviation of the truncated guassian weight distribution.
+    bias: the initial value of the biases.
+    weight_decay: the weight decay.
+    batch_norm_params: parameters for the batch_norm. If is None don't use it.
+    is_training: whether or not the model is in training mode.
+    trainable: whether or not the variables should be trainable or not.
+    restore: whether or not the variables should be marked for restore.
+    scope: Optional scope for variable_scope.
+    reuse: whether or not the layer and its variables should be reused. To be
+      able to reuse the layer scope must be given.
+  Returns:
+    a tensor representing the output of the operation.
+
+  """
+  with tf.variable_scope(scope, 'ConvDepthWise', [inputs], reuse=reuse):
+    kernel_h, kernel_w = _two_element_tuple(kernel_size)
+    stride_h, stride_w = _two_element_tuple(stride)
+    num_filters_in = inputs.get_shape()[-1]
+    weights_shape = [kernel_h, kernel_w,
+                     num_filters_in, num_filters_out]
+    weights_initializer = tf.truncated_normal_initializer(stddev=stddev)
+    l2_regularizer = None
+    if weight_decay and weight_decay > 0:
+      l2_regularizer = losses.l2_regularizer(weight_decay)
+    weights = variables.variable('weights',
+                                 shape=weights_shape,
+                                 initializer=weights_initializer,
+                                 regularizer=l2_regularizer,
+                                 trainable=trainable,
+                                 restore=restore)
+    conv = tf.nn.depthwise_conv2d(inputs, weights, [1, stride_h, stride_w, 1],
+                                 padding=padding)
+    if batch_norm_params is not None:
+      with scopes.arg_scope([batch_norm], is_training=is_training,
+                            trainable=trainable, restore=restore):
+        outputs = batch_norm(conv, **batch_norm_params)
+    else:
+      bias_shape = [conv.get_shape()[-1],]
+      bias_initializer = tf.constant_initializer(bias)
+      biases = variables.variable('biases',
+                                  shape=bias_shape,
+                                  initializer=bias_initializer,
+                                  trainable=trainable,
+                                  restore=restore)
+      outputs = tf.nn.bias_add(conv, biases)
+    if activation:
+      outputs = activation(outputs)
+    return outputs
+
+def _deconv_output_length(input_length, filter_size, padding, stride):
+    """determines output length of a transposed convolution given input length, stride and kernel
+    Args:
+        padding: 'SAME', 'VALID' or 'FULL'
+    Returns:
+        output length
+    """
+    padding = padding.upper
+    if input_length is None:
+        return None
+    input_length *= stride
+    if padding == 'VALID':
+        input_length += max(filter_size-stride, 0)
+    elif padding == 'FULL':
+        input_length -= (stride + filter_size - 2)
+    return input_length
+
+@scopes.add_arg_scope
+def deconv(inputs,
+           num_filters_out,
+           kernel_size,
+           stride,
+           padding='SAME',
+           activation=tf.nn.relu,
+           stddev=0.01,
+           bias=0.0,
+           weight_decay=0,
+           batch_norm_params=None,
+           is_training=True,
+           trainable=True,
+           restore=True,
+           scope=None,
+           reuse=None):
+    """Adds a 2D deconvolution operator followed by optional batch_norm layer
+    args:
+        inputs: with size [batch_size, height, widht, channels]
+        num_filters_out: number of output feature channels
+        padding: 'VALID', 'SAME' 'FULL'
+    returns:
+        a tensor representing the output of the operation
+    """
+    with tf.variable_scope(scope, 'Deconv', [inputs], reuse=reuse):
+        batch_size = inputs.get_shape()[0]
+        height, width = inputs.get_shape()[1], inputs.get_shape()[2]
+        num_filters_in = inputs.get_shape()[-1]
+
+        kernel_h, kernel_w = _two_element_tuple(kernel_size)
+        stride_h, stride_w = _two_element_tuple(stride)
+
+        weights_shape = [kernel_h, kernel_w,
+                        num_filters_out, num_filters_in]
+        weights_initializer = tf.truncated_normal_initializer(stddev=stddev)
+        l2_regularizer = None
+        if weight_decay and weight_decay>0:
+            l2_regularizer = losses.l2_regularizer(weight_decay)
+        weights = variables.variable('weights',
+                                    shape=weights_shape,
+                                    initializer=weights_initializer,
+                                    regularizer=l2_regularizer,
+                                    trainable=trainable,
+                                    restore=restore)
+
+        out_height = _deconv_output_length(height, kernel_h, padding, stride_h)
+        out_width = _deconv_output_length(width, kernel_w, padding, stride_w)
+        output_shape = tf.stack([batch_size, out_height, out_width, num_filters_out]) 
+        deconv = tf.nn.conv2d_transpose(inputs, weights, output_shape, [1, stride_h, stride_w, 1], padding=padding)
+
+        if batch_norm_params is not None:
+            with scopes.arg_scope([batch_norm], is_training=is_training,
+                                 trainable=trainable, restore=restore):
+                outputs = batch_norm(deconv, **batch_norm_params)
+        else:
+            bias_shape = [num_filters_out,]
+            bias_initializer = tf.constant_initializer(bias)
+            biases = variables.variable('biases',
+                                       shape=bias_shape,
+                                       initializer=bias_initializer,
+                                       trainable=trainable,
+                                       restore=restore)
+            outputs = tf.nn.bias_add(deconv, biases)
+        if activation:
+            outputs = activation(outputs)
+            return outputs
+
+
+@scopes.add_arg_scope
+def fc(inputs,
+       num_units_out,
+       activation=tf.nn.relu,
+       stddev=0.01,
+       bias=0.0,
+       weight_decay=0,
+       batch_norm_params=None,
+       is_training=True,
+       trainable=True,
+       restore=True,
+       scope=None,
+       reuse=None):
+  """Adds a fully connected layer followed by an optional batch_norm layer.
+
+  FC creates a variable called 'weights', representing the fully connected
+  weight matrix, that is multiplied by the input. If `batch_norm` is None, a
+  second variable called 'biases' is added to the result of the initial
+  vector-matrix multiplication.
+
+  Args:
+    inputs: a [B x N] tensor where B is the batch size and N is the number of
+            input units in the layer.
+    num_units_out: the number of output units in the layer.
+    activation: activation function.
+    stddev: the standard deviation for the weights.
+    bias: the initial value of the biases.
+    weight_decay: the weight decay.
+    batch_norm_params: parameters for the batch_norm. If is None don't use it.
+    is_training: whether or not the model is in training mode.
+    trainable: whether or not the variables should be trainable or not.
+    restore: whether or not the variables should be marked for restore.
+    scope: Optional scope for variable_scope.
+    reuse: whether or not the layer and its variables should be reused. To be
+      able to reuse the layer scope must be given.
+
+  Returns:
+     the tensor variable representing the result of the series of operations.
+  """
+  with tf.variable_scope(scope, 'FC', [inputs], reuse=reuse):
+    num_units_in = inputs.get_shape()[1]
+    weights_shape = [num_units_in, num_units_out]
+    weights_initializer = tf.truncated_normal_initializer(stddev=stddev)
+    l2_regularizer = None
+    if weight_decay and weight_decay > 0:
+      l2_regularizer = losses.l2_regularizer(weight_decay)
+    weights = variables.variable('weights',
+                                 shape=weights_shape,
+                                 initializer=weights_initializer,
+                                 regularizer=l2_regularizer,
+                                 trainable=trainable,
+                                 restore=restore)
+    if batch_norm_params is not None:
+      outputs = tf.matmul(inputs, weights)
+      with scopes.arg_scope([batch_norm], is_training=is_training,
+                            trainable=trainable, restore=restore):
+        outputs = batch_norm(outputs, **batch_norm_params)
+    else:
+      bias_shape = [num_units_out,]
+      bias_initializer = tf.constant_initializer(bias)
+      biases = variables.variable('biases',
+                                  shape=bias_shape,
+                                  initializer=bias_initializer,
+                                  trainable=trainable,
+                                  restore=restore)
+      outputs = tf.nn.xw_plus_b(inputs, weights, biases)
+    if activation:
+      outputs = activation(outputs)
+    return outputs
+
+
+def one_hot_encoding(labels, num_classes, scope=None):
+  """Transform numeric labels into onehot_labels.
+
+  Args:
+    labels: [batch_size] target labels.
+    num_classes: total number of classes.
+    scope: Optional scope for name_scope.
+  Returns:
+    one hot encoding of the labels.
+  """
+  with tf.name_scope(scope, 'OneHotEncoding', [labels]):
+    batch_size = labels.get_shape()[0]
+    indices = tf.expand_dims(tf.range(0, batch_size), 1)
+    labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype)
+    concated = tf.concat(axis=1, values=[indices, labels])
+    onehot_labels = tf.sparse_to_dense(
+        concated, tf.stack([batch_size, num_classes]), 1.0, 0.0)
+    onehot_labels.set_shape([batch_size, num_classes])
+    return onehot_labels
+
+
+@scopes.add_arg_scope
+def max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None):
+  """Adds a Max Pooling layer.
+
+  It is assumed by the wrapper that the pooling is only done per image and not
+  in depth or batch.
+
+  Args:
+    inputs: a tensor of size [batch_size, height, width, depth].
+    kernel_size: a list of length 2: [kernel_height, kernel_width] of the
+      pooling kernel over which the op is computed. Can be an int if both
+      values are the same.
+    stride: a list of length 2: [stride_height, stride_width].
+      Can be an int if both strides are the same.  Note that presently
+      both strides must have the same value.
+    padding: the padding method, either 'VALID' or 'SAME'.
+    scope: Optional scope for name_scope.
+
+  Returns:
+    a tensor representing the results of the pooling operation.
+  Raises:
+    ValueError: if 'kernel_size' is not a 2-D list
+  """
+  with tf.name_scope(scope, 'MaxPool', [inputs]):
+    kernel_h, kernel_w = _two_element_tuple(kernel_size)
+    stride_h, stride_w = _two_element_tuple(stride)
+    return tf.nn.max_pool(inputs,
+                          ksize=[1, kernel_h, kernel_w, 1],
+                          strides=[1, stride_h, stride_w, 1],
+                          padding=padding)
+
+@scopes.add_arg_scope
+def upsampling_nearest(inputs, scale):
+    assert scale>1, 'scale of upsampling should be larger then 1'
+    new_h = int(inputs.shape[1]*scale)
+    new_w = int(inputs.shape[2]*scale)
+    return tf.image.resize_images(inputs, [new_h, new_w], 
+                                  method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+
+
+@scopes.add_arg_scope
+def avg_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None):
+  """Adds a Avg Pooling layer.
+
+  It is assumed by the wrapper that the pooling is only done per image and not
+  in depth or batch.
+
+  Args:
+    inputs: a tensor of size [batch_size, height, width, depth].
+    kernel_size: a list of length 2: [kernel_height, kernel_width] of the
+      pooling kernel over which the op is computed. Can be an int if both
+      values are the same.
+    stride: a list of length 2: [stride_height, stride_width].
+      Can be an int if both strides are the same.  Note that presently
+      both strides must have the same value.
+    padding: the padding method, either 'VALID' or 'SAME'.
+    scope: Optional scope for name_scope.
+
+  Returns:
+    a tensor representing the results of the pooling operation.
+  """
+  with tf.name_scope(scope, 'AvgPool', [inputs]):
+    kernel_h, kernel_w = _two_element_tuple(kernel_size)
+    stride_h, stride_w = _two_element_tuple(stride)
+    return tf.nn.avg_pool(inputs,
+                          ksize=[1, kernel_h, kernel_w, 1],
+                          strides=[1, stride_h, stride_w, 1],
+                          padding=padding)
+
+
+@scopes.add_arg_scope
+def dropout(inputs, keep_prob=0.5, is_training=True, scope=None):
+  """Returns a dropout layer applied to the input.
+
+  Args:
+    inputs: the tensor to pass to the Dropout layer.
+    keep_prob: the probability of keeping each input unit.
+    is_training: whether or not the model is in training mode. If so, dropout is
+    applied and values scaled. Otherwise, inputs is returned.
+    scope: Optional scope for name_scope.
+
+  Returns:
+    a tensor representing the output of the operation.
+  """
+  if is_training and keep_prob > 0:
+    with tf.name_scope(scope, 'Dropout', [inputs]):
+      return tf.nn.dropout(inputs, keep_prob)
+  else:
+    return inputs
+
+
+def flatten(inputs, scope=None):
+  """Flattens the input while maintaining the batch_size.
+
+    Assumes that the first dimension represents the batch.
+
+  Args:
+    inputs: a tensor of size [batch_size, ...].
+    scope: Optional scope for name_scope.
+
+  Returns:
+    a flattened tensor with shape [batch_size, k].
+  Raises:
+    ValueError: if inputs.shape is wrong.
+  """
+  if len(inputs.get_shape()) < 2:
+    raise ValueError('Inputs must be have a least 2 dimensions')
+  dims = inputs.get_shape()[1:]
+  k = dims.num_elements()
+  with tf.name_scope(scope, 'Flatten', [inputs]):
+    return tf.reshape(inputs, [-1, k])
+
+
+def repeat_op(repetitions, inputs, op, *args, **kwargs):
+  """Build a sequential Tower starting from inputs by using an op repeatedly.
+
+  It creates new scopes for each operation by increasing the counter.
+  Example: given repeat_op(3, _, ops.conv2d, 64, [3, 3], scope='conv1')
+    it will repeat the given op under the following variable_scopes:
+      conv1/Conv
+      conv1/Conv_1
+      conv1/Conv_2
+
+  Args:
+    repetitions: number or repetitions.
+    inputs: a tensor of size [batch_size, height, width, channels].
+    op: an operation.
+    *args: args for the op.
+    **kwargs: kwargs for the op.
+
+  Returns:
+    a tensor result of applying the operation op, num times.
+  Raises:
+    ValueError: if the op is unknown or wrong.
+  """
+  scope = kwargs.pop('scope', None)
+  with tf.variable_scope(scope, 'RepeatOp', [inputs]):
+    tower = inputs
+    for _ in range(repetitions):
+      tower = op(tower, *args, **kwargs)
+    return tower
+
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/scopes.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/scopes.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c2fb0a2efa7d30eaddb36fc30265f30cbaeb9ef
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/scopes.py
@@ -0,0 +1,170 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains the new arg_scope used for TF-Slim ops.
+
+  Allows one to define models much more compactly by eliminating boilerplate
+  code. This is accomplished through the use of argument scoping (arg_scope).
+
+  Example of how to use scopes.arg_scope:
+
+  with scopes.arg_scope(ops.conv2d, padding='SAME',
+                      stddev=0.01, weight_decay=0.0005):
+    net = ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
+    net = ops.conv2d(net, 256, [5, 5], scope='conv2')
+
+  The first call to conv2d will overwrite padding:
+    ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
+              stddev=0.01, weight_decay=0.0005, scope='conv1')
+
+  The second call to Conv will use predefined args:
+    ops.conv2d(inputs, 256, [5, 5], padding='SAME',
+               stddev=0.01, weight_decay=0.0005, scope='conv2')
+
+  Example of how to reuse an arg_scope:
+  with scopes.arg_scope(ops.conv2d, padding='SAME',
+                      stddev=0.01, weight_decay=0.0005) as conv2d_arg_scope:
+    net = ops.conv2d(net, 256, [5, 5], scope='conv1')
+    ....
+
+  with scopes.arg_scope(conv2d_arg_scope):
+    net = ops.conv2d(net, 256, [5, 5], scope='conv2')
+
+  Example of how to use scopes.add_arg_scope:
+
+  @scopes.add_arg_scope
+  def conv2d(*args, **kwargs)
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+import functools
+
+from tensorflow.python.framework import ops
+
+_ARGSTACK_KEY = ("__arg_stack",)
+
+_DECORATED_OPS = set()
+
+
+def _get_arg_stack():
+  stack = ops.get_collection(_ARGSTACK_KEY)
+  if stack:
+    return stack[0]
+  else:
+    stack = [{}]
+    ops.add_to_collection(_ARGSTACK_KEY, stack)
+    return stack
+
+
+def _current_arg_scope():
+  stack = _get_arg_stack()
+  return stack[-1]
+
+
+def _add_op(op):
+  key_op = (op.__module__, op.__name__)
+  if key_op not in _DECORATED_OPS:
+    _DECORATED_OPS.add(key_op)
+
+
+@contextlib.contextmanager
+def arg_scope(list_ops_or_scope, **kwargs):
+  """Stores the default arguments for the given set of list_ops.
+
+  For usage, please see examples at top of the file.
+
+  Args:
+    list_ops_or_scope: List or tuple of operations to set argument scope for or
+      a dictionary containg the current scope. When list_ops_or_scope is a dict,
+      kwargs must be empty. When list_ops_or_scope is a list or tuple, then
+      every op in it need to be decorated with @add_arg_scope to work.
+    **kwargs: keyword=value that will define the defaults for each op in
+              list_ops. All the ops need to accept the given set of arguments.
+
+  Yields:
+    the current_scope, which is a dictionary of {op: {arg: value}}
+  Raises:
+    TypeError: if list_ops is not a list or a tuple.
+    ValueError: if any op in list_ops has not be decorated with @add_arg_scope.
+  """
+  if isinstance(list_ops_or_scope, dict):
+    # Assumes that list_ops_or_scope is a scope that is being reused.
+    if kwargs:
+      raise ValueError("When attempting to re-use a scope by suppling a"
+                       "dictionary, kwargs must be empty.")
+    current_scope = list_ops_or_scope.copy()
+    try:
+      _get_arg_stack().append(current_scope)
+      yield current_scope
+    finally:
+      _get_arg_stack().pop()
+  else:
+    # Assumes that list_ops_or_scope is a list/tuple of ops with kwargs.
+    if not isinstance(list_ops_or_scope, (list, tuple)):
+      raise TypeError("list_ops_or_scope must either be a list/tuple or reused"
+                      "scope (i.e. dict)")
+    try:
+      current_scope = _current_arg_scope().copy()
+      for op in list_ops_or_scope:
+        key_op = (op.__module__, op.__name__)
+        if not has_arg_scope(op):
+          raise ValueError("%s is not decorated with @add_arg_scope", key_op)
+        if key_op in current_scope:
+          current_kwargs = current_scope[key_op].copy()
+          current_kwargs.update(kwargs)
+          current_scope[key_op] = current_kwargs
+        else:
+          current_scope[key_op] = kwargs.copy()
+      _get_arg_stack().append(current_scope)
+      yield current_scope
+    finally:
+      _get_arg_stack().pop()
+
+
+def add_arg_scope(func):
+  """Decorates a function with args so it can be used within an arg_scope.
+
+  Args:
+    func: function to decorate.
+
+  Returns:
+    A tuple with the decorated function func_with_args().
+  """
+  @functools.wraps(func)
+  def func_with_args(*args, **kwargs):
+    current_scope = _current_arg_scope()
+    current_args = kwargs
+    key_func = (func.__module__, func.__name__)
+    if key_func in current_scope:
+      current_args = current_scope[key_func].copy()
+      current_args.update(kwargs)
+    return func(*args, **current_args)
+  _add_op(func)
+  return func_with_args
+
+
+def has_arg_scope(func):
+  """Checks whether a func has been decorated with @add_arg_scope or not.
+
+  Args:
+    func: function to check.
+
+  Returns:
+    a boolean.
+  """
+  key_op = (func.__module__, func.__name__)
+  return key_op in _DECORATED_OPS
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/variables.py b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/variables.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcd6e55d1bb8df016b1dd0591a650b0c93d3a530
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/network/variables.py
@@ -0,0 +1,289 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains convenience wrappers for creating variables in TF-Slim.
+
+The variables module is typically used for defining model variables from the
+ops routines (see slim.ops). Such variables are used for training, evaluation
+and inference of models.
+
+All the variables created through this module would be added to the
+MODEL_VARIABLES collection, if you create a model variable outside slim, it can
+be added with slim.variables.add_variable(external_variable, reuse).
+
+Usage:
+  weights_initializer = tf.truncated_normal_initializer(stddev=0.01)
+  l2_regularizer = lambda t: losses.l2_loss(t, weight=0.0005)
+  weights = variables.variable('weights',
+                               shape=[100, 100],
+                               initializer=weights_initializer,
+                               regularizer=l2_regularizer,
+                               device='/cpu:0')
+
+  biases = variables.variable('biases',
+                              shape=[100],
+                              initializer=tf.zeros_initializer(),
+                              device='/cpu:0')
+
+  # More complex example.
+
+  net = slim.ops.conv2d(input, 32, [3, 3], scope='conv1')
+  net = slim.ops.conv2d(net, 64, [3, 3], scope='conv2')
+  with slim.arg_scope([variables.variable], restore=False):
+    net = slim.ops.conv2d(net, 64, [3, 3], scope='conv3')
+
+  # Get all model variables from all the layers.
+  model_variables = slim.variables.get_variables()
+
+  # Get all model variables from a specific the layer, i.e 'conv1'.
+  conv1_variables = slim.variables.get_variables('conv1')
+
+  # Get all weights from all the layers.
+  weights = slim.variables.get_variables_by_name('weights')
+
+  # Get all bias from all the layers.
+  biases = slim.variables.get_variables_by_name('biases')
+
+  # Get all variables to restore.
+  # (i.e. only those created by 'conv1' and 'conv2')
+  variables_to_restore = slim.variables.get_variables_to_restore()
+
+************************************************
+* Initializing model variables from a checkpoint
+************************************************
+
+# Create some variables.
+v1 = slim.variables.variable(name="v1", ..., restore=False)
+v2 = slim.variables.variable(name="v2", ...) # By default restore=True
+...
+# The list of variables to restore should only contain 'v2'.
+variables_to_restore = slim.variables.get_variables_to_restore()
+restorer = tf.train.Saver(variables_to_restore)
+with tf.Session() as sess:
+  # Restore variables from disk.
+  restorer.restore(sess, "/tmp/model.ckpt")
+  print("Model restored.")
+  # Do some work with the model
+  ...
+
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from network.slim import scopes
+
+# Collection containing all the variables created using slim.variables
+MODEL_VARIABLES = '_model_variables_'
+
+# Collection containing the slim.variables that are created with restore=True.
+VARIABLES_TO_RESTORE = '_variables_to_restore_'
+
+
+def add_variable(var, restore=True):
+  """Adds a variable to the MODEL_VARIABLES collection.
+
+    Optionally it will add the variable to  the VARIABLES_TO_RESTORE collection.
+  Args:
+    var: a variable.
+    restore: whether the variable should be added to the
+      VARIABLES_TO_RESTORE collection.
+
+  """
+  collections = [MODEL_VARIABLES]
+  if restore:
+    collections.append(VARIABLES_TO_RESTORE)
+  for collection in collections:
+    if var not in tf.get_collection(collection):
+      tf.add_to_collection(collection, var)
+
+
+def get_variables(scope=None, suffix=None):
+  """Gets the list of variables, filtered by scope and/or suffix.
+
+  Args:
+    scope: an optional scope for filtering the variables to return.
+    suffix: an optional suffix for filtering the variables to return.
+
+  Returns:
+    a copied list of variables with scope and suffix.
+  """
+  candidates = tf.get_collection(MODEL_VARIABLES, scope)[:]
+  if suffix is not None:
+    candidates = [var for var in candidates if var.op.name.endswith(suffix)]
+  return candidates
+
+
+def get_variables_to_restore():
+  """Gets the list of variables to restore.
+
+  Returns:
+    a copied list of variables.
+  """
+  return tf.get_collection(VARIABLES_TO_RESTORE)[:]
+
+
+def get_variables_by_name(given_name, scope=None):
+  """Gets the list of variables that were given that name.
+
+  Args:
+    given_name: name given to the variable without scope.
+    scope: an optional scope for filtering the variables to return.
+
+  Returns:
+    a copied list of variables with the given name and prefix.
+  """
+  return get_variables(scope=scope, suffix=given_name)
+
+
+def get_unique_variable(name):
+  """Gets the variable uniquely identified by that name.
+
+  Args:
+    name: a name that uniquely identifies the variable.
+
+  Returns:
+    a tensorflow variable.
+
+  Raises:
+    ValueError: if no variable uniquely identified by the name exists.
+  """
+  candidates = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, name)
+  if not candidates:
+    raise ValueError('Couldnt find variable %s' % name)
+
+  for candidate in candidates:
+    if candidate.op.name == name:
+      return candidate
+  raise ValueError('Variable %s does not uniquely identify a variable', name)
+
+
+class VariableDeviceChooser(object):
+  """Slim device chooser for variables.
+
+  When using a parameter server it will assign them in a round-robin fashion.
+  When not using a parameter server it allows GPU:0 placement otherwise CPU:0.
+  """
+
+  def __init__(self,
+               num_parameter_servers=0,
+               ps_device='/job:ps',
+               placement='CPU:0'):
+    """Initialize VariableDeviceChooser.
+
+    Args:
+      num_parameter_servers: number of parameter servers.
+      ps_device: string representing the parameter server device.
+      placement: string representing the placement of the variable either CPU:0
+        or GPU:0. When using parameter servers forced to CPU:0.
+    """
+    self._num_ps = num_parameter_servers
+    self._ps_device = ps_device
+    self._placement = placement if num_parameter_servers == 0 else 'CPU:0'
+    self._next_task_id = 0
+
+  def __call__(self, op):
+    device_string = ''
+    if self._num_ps > 0:
+      task_id = self._next_task_id
+      self._next_task_id = (self._next_task_id + 1) % self._num_ps
+      device_string = '%s/task:%d' % (self._ps_device, task_id)
+    device_string += '/%s' % self._placement
+    return device_string
+
+
+# TODO(sguada) Remove once get_variable is able to colocate op.devices.
+def variable_device(device, name):
+  """Fix the variable device to colocate its ops."""
+  if callable(device):
+    var_name = tf.get_variable_scope().name + '/' + name
+    var_def = tf.NodeDef(name=var_name, op='Variable')
+    device = device(var_def)
+  if device is None:
+    device = ''
+  return device
+
+
+@scopes.add_arg_scope
+def global_step(device=''):
+  """Returns the global step variable.
+
+  Args:
+    device: Optional device to place the variable. It can be an string or a
+      function that is called to get the device for the variable.
+
+  Returns:
+    the tensor representing the global step variable.
+  """
+  global_step_ref = tf.get_collection(tf.GraphKeys.GLOBAL_STEP)
+  if global_step_ref:
+    return global_step_ref[0]
+  else:
+    collections = [
+        VARIABLES_TO_RESTORE,
+        tf.GraphKeys.GLOBAL_VARIABLES,
+        tf.GraphKeys.GLOBAL_STEP,
+    ]
+    # Get the device for the variable.
+    with tf.device(variable_device(device, 'global_step')):
+      return tf.get_variable('global_step', shape=[], dtype=tf.int64,
+                             initializer=tf.zeros_initializer(),
+                             trainable=False, collections=collections)
+
+
+@scopes.add_arg_scope
+def variable(name, shape=None, dtype=tf.float32, initializer=None,
+             regularizer=None, trainable=True, collections=None, device='',
+             restore=True):
+  """Gets an existing variable with these parameters or creates a new one.
+
+    It also add itself to a group with its name.
+
+  Args:
+    name: the name of the new or existing variable.
+    shape: shape of the new or existing variable.
+    dtype: type of the new or existing variable (defaults to `DT_FLOAT`).
+    initializer: initializer for the variable if one is created.
+    regularizer: a (Tensor -> Tensor or None) function; the result of
+        applying it on a newly created variable will be added to the collection
+        GraphKeys.REGULARIZATION_LOSSES and can be used for regularization.
+    trainable: If `True` also add the variable to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
+    collections: A list of collection names to which the Variable will be added.
+      Note that the variable is always also added to the tf.GraphKeys.GLOBAL_VARIABLES
+      and MODEL_VARIABLES collections.
+    device: Optional device to place the variable. It can be an string or a
+      function that is called to get the device for the variable.
+    restore: whether the variable should be added to the
+      VARIABLES_TO_RESTORE collection.
+
+  Returns:
+    The created or existing variable.
+  """
+  collections = list(collections or [])
+
+  # Make sure variables are added to tf.GraphKeys.GLOBAL_VARIABLES and MODEL_VARIABLES
+  collections += [tf.GraphKeys.GLOBAL_VARIABLES, MODEL_VARIABLES]
+  # Add to VARIABLES_TO_RESTORE if necessary
+  if restore:
+    collections.append(VARIABLES_TO_RESTORE)
+  # Remove duplicates
+  collections = set(collections)
+  # Get the device for the variable.
+  with tf.device(variable_device(device, name)):
+    return tf.get_variable(name, shape=shape, dtype=dtype,
+                           initializer=initializer, regularizer=regularizer,
+                           trainable=trainable, collections=collections)
diff --git a/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/readme.md b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..7cf15b11cfdbc91262fb733922ba320d5257f774
--- /dev/null
+++ b/TensorFlow/contrib/graph/Dense3DRegression_ID1066_for_TensorFlow/readme.md
@@ -0,0 +1,40 @@
+# Dense 3D Regression for Hand Pose Estimation
+
+This respository contains tensorflow implementation of the [paper](https://arxiv.org/abs/1711.08996). It is developped and tested on Debian GNU/Linux 8 64-bit.
+
+## Requirements:
+- python 2.7
+- tensorflow == 1.3
+- [tfplot](https://github.com/wookayin/tensorflow-plot) (for visualization on tf summary files)
+- matplotlib >= 2.0.2 
+- numpy
+- opencv >= 2.4 (optional, for cpu visualization) 
+
+## Data Preparations:
+Download the datasets, create soft links for them to [exp/data](./exp/data) and run `python data/${dataset}.py` to create the TFRecord files. Details are [here](./exp/data).
+
+## Usage:
+Both training and testing functions are provided by `model/hourglass_um_crop_tiny.py`. Here is an example:
+```bash
+python model/hourglass_um_crop_tiny.py --dataset 'icvl' --batch_size 40 --num_stack 2 --fea_num 128 --debug_level 2 --is_train True
+```
+where the hyper parameter configuration is explained in the source python files.
+
+## Results:
+We provide the estimation results by the proposed method for [ICVL](./exp/result/icvl.txt), [NYU](./exp/result/nyu.txt), [MSRA15](./exp/result/msra.txt). They are xyz coordinates in mm, the 2D projection method is in the function _xyz2uvd_ from [here](data/util.py#L23). Check [here](https://github.com/xinghaochen/awesome-hand-pose-estimation/tree/master/evaluation) for comparison to other methods. Thanks @xinghaochen for providing the comparison.
+
+## Pretrained Models:
+Run the script to download and install the corresponding trained model of datasets. $ROOT denote the root path of this project.
+```bash
+cd $ROOT
+./exp/scripts/fetch_icvl_models.sh
+./exp/scripts/fetch_msra_models.sh
+./exp/scripts/fetch_nyu_models.sh
+```
+To perform testing, simply run
+```
+python model/hourglass_um_crop_tiny.py --dataset 'icvl' --batch_size 3 --num_stack 2 --num_fea 128 --debug_level 2 --is_train False
+python model/hourglass_um_crop_tiny.py --dataset 'nyu' --batch_size 3 --num_stack 2 --num_fea 128 --debug_level 2 --is_train False
+python model/hourglass_um_crop_tiny.py --dataset 'msra' --pid 0 --batch_size 3 --num_stack 2 --num_fea 128 --debug_level 2 --is_train False
+```
+in which msra dataset should use pid to indicate which person to test on. In the [testing function](data/hourglass_um_crop_tiny.py#L23), the third augument is used to indicate which model with corresponding training step will be restored. We use step of -1 to indicate our pre-trained model.