diff --git a/cv/semantic_segmentation/vnet/tensorflow/.gitignore b/cv/semantic_segmentation/vnet/tensorflow/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..4baf45d22c25b7828488f806a7bae6f19ff14a1a
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/.gitignore
@@ -0,0 +1,4 @@
+**/__pycache__/
+data
+logs
+model_train
diff --git a/cv/semantic_segmentation/vnet/tensorflow/Dockerfile b/cv/semantic_segmentation/vnet/tensorflow/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..a707f0618af56b56c7300d35f1441cbcb7510e44
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/Dockerfile
@@ -0,0 +1,12 @@
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.06-tf1-py3
+FROM ${FROM_IMAGE_NAME}
+
+ADD . /workspace/vnet
+WORKDIR /workspace/vnet
+
+RUN pip install --upgrade pip
+RUN pip install --disable-pip-version-check -r requirements.txt
+
+ENV TF_XLA_FLAGS="--tf_xla_enable_lazy_compilation=false"
+
+
diff --git a/cv/semantic_segmentation/vnet/tensorflow/LICENSE b/cv/semantic_segmentation/vnet/tensorflow/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..7a4a3ea2424c09fbe48d455aed1eaa94d9124835
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/cv/semantic_segmentation/vnet/tensorflow/README.md b/cv/semantic_segmentation/vnet/tensorflow/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..505d1a3441f9617dd7924daf27ccae36869757c8
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/README.md
@@ -0,0 +1,23 @@
+
+## Prepare
+```
+pip3 install -r requirements.txt
+```
+
+## Download dataset 
+```
+python3  download_dataset.py --data_dir ./data
+```
+
+
+## Run training 
+### single card
+```
+python3 examples/vnet_train_and_evaluate.py --gpus 1 --batch_size 8 --base_lr 0.0001 --data_dir ./data/Task04_Hippocampus/ --model_dir ./model_train/
+```
+
+### 8 cards
+```
+python3 examples/vnet_train_and_evaluate.py --gpus 8 --batch_size 8 --base_lr 0.0001 --data_dir ./data/Task04_Hippocampus/ --model_dir ./model_train/
+
+```
\ No newline at end of file
diff --git a/cv/semantic_segmentation/vnet/tensorflow/data.tgz b/cv/semantic_segmentation/vnet/tensorflow/data.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..8919414d8dc000bcff35ab475299bb2e841ccc34
Binary files /dev/null and b/cv/semantic_segmentation/vnet/tensorflow/data.tgz differ
diff --git a/cv/semantic_segmentation/vnet/tensorflow/download_dataset.py b/cv/semantic_segmentation/vnet/tensorflow/download_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..83639ff64d5502fda62778803af86711eb457b6e
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/download_dataset.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import tarfile
+
+from google_drive_downloader import GoogleDriveDownloader as gdd
+
+PARSER = argparse.ArgumentParser(description="V-Net medical")
+
+PARSER.add_argument('--data_dir',
+                    type=str,
+                    default='./data',
+                    help="""Directory where to download the dataset""")
+
+PARSER.add_argument('--dataset',
+                    type=str,
+                    default='hippocampus',
+                    help="""Dataset to download""")
+
+
+def main():
+    FLAGS = PARSER.parse_args()
+
+    if not os.path.exists(FLAGS.data_dir):
+        os.makedirs(FLAGS.data_dir)
+
+    filename = ''
+
+    if FLAGS.dataset == 'hippocampus':
+        filename = 'Task04_Hippocampus.tar'
+        gdd.download_file_from_google_drive(file_id='1RzPB1_bqzQhlWvU-YGvZzhx2omcDh38C',
+                                            dest_path=os.path.join(FLAGS.data_dir, filename),
+                                            unzip=False)
+
+    print('Unpacking...')
+
+    tf = tarfile.open(os.path.join(FLAGS.data_dir, filename))
+    tf.extractall(path=FLAGS.data_dir)
+
+    print('Cleaning up...')
+
+    os.remove(os.path.join(FLAGS.data_dir, filename))
+
+    print("Finished downloading files for V-Net medical to {}".format(FLAGS.data_dir))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cv/semantic_segmentation/vnet/tensorflow/download_script.sh b/cv/semantic_segmentation/vnet/tensorflow/download_script.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0d874aaeb9d98c450e8f12d85c1bbe7c9e18c036
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/download_script.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+
+exit 0
diff --git a/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_benchmark.py b/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..8781fb50d9768c201594d19f89a1574a990c39b7
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_benchmark.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+from os.path import dirname
+
+PARSER = argparse.ArgumentParser(description="vnet_benchmark")
+
+PARSER.add_argument('--data_dir',
+                    required=True,
+                    type=str)
+
+PARSER.add_argument('--model_dir',
+                    required=True,
+                    type=str)
+
+PARSER.add_argument('--mode',
+                    choices=['train', 'predict'],
+                    required=True,
+                    type=str)
+
+PARSER.add_argument('--gpus',
+                    choices=[1, 8],
+                    required=True,
+                    type=int)
+
+PARSER.add_argument('--batch_size',
+                    required=True,
+                    type=int)
+
+PARSER.add_argument('--amp', dest='use_amp', action='store_true', default=False)
+
+
+def build_horovod_prefix(gpus):
+    return 'mpirun -np {} -H localhost:{} -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x PATH -mca ' \
+           'pml ob1 -mca btl ^openib --allow-run-as-root '.format(gpus, gpus)
+
+
+def build_command(FLAGS, path_to_main, use_amp):
+    return 'python {} --data_dir {} --model_dir {} --exec_mode {} --batch_size {} {} --augment --benchmark'.format(
+        path_to_main,
+        FLAGS.data_dir,
+        FLAGS.model_dir,
+        FLAGS.mode,
+        FLAGS.batch_size,
+        use_amp)
+
+
+def main():
+    FLAGS = PARSER.parse_args()
+
+    use_amp = '--amp' if FLAGS.use_amp else ''
+    path_to_main = os.path.join(dirname(dirname(os.path.realpath(__file__))), 'main.py')
+
+    cmd = build_command(FLAGS, path_to_main, use_amp)
+
+    if FLAGS.gpus > 1:
+        assert FLAGS.mode != 'predict', 'Prediction can only be benchmarked on 1 GPU'
+        cmd = build_horovod_prefix(FLAGS.gpus) + cmd
+
+    print('Command to be executed:')
+    print(cmd)
+    subprocess.call(cmd, shell=True)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_eval.py b/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..0686d4027af1c0d166087d3bd9de7752545b897a
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_eval.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+import os
+import subprocess
+from os.path import dirname
+
+PARSER = argparse.ArgumentParser(description="vnet_evaluate")
+
+PARSER.add_argument('--data_dir',
+                    required=True,
+                    type=str)
+
+PARSER.add_argument('--model_dir',
+                    required=True,
+                    type=str)
+
+PARSER.add_argument('--batch_size',
+                    required=True,
+                    type=int)
+
+PARSER.add_argument('--amp', dest='use_amp', action='store_true', default=False)
+
+
+def build_command(FLAGS, path_to_main, use_amp):
+    return 'python {} --data_dir {} --model_dir {} --train_split 0.9 --exec_mode evaluate --batch_size {} {}'.format(
+        path_to_main,
+        FLAGS.data_dir,
+        FLAGS.model_dir,
+        FLAGS.batch_size,
+        use_amp)
+
+
+def main():
+    FLAGS = PARSER.parse_args()
+
+    use_amp = '--amp' if FLAGS.use_amp else ''
+    path_to_main = os.path.join(dirname(dirname(os.path.realpath(__file__))), 'main.py')
+
+    cmd = build_command(FLAGS, path_to_main, use_amp)
+
+    print('Command to be executed:')
+    print(cmd)
+    subprocess.call(cmd, shell=True)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_predict.py b/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_predict.py
new file mode 100644
index 0000000000000000000000000000000000000000..d83cb2bfb64f92a939cfdd57e786b209bb753029
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_predict.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+import os
+import subprocess
+from os.path import dirname
+
+PARSER = argparse.ArgumentParser(description="vnet_predict")
+
+PARSER.add_argument('--data_dir',
+                    required=True,
+                    type=str)
+
+PARSER.add_argument('--model_dir',
+                    required=True,
+                    type=str)
+
+PARSER.add_argument('--batch_size',
+                    required=True,
+                    type=int)
+
+PARSER.add_argument('--amp', dest='use_amp', action='store_true', default=False)
+
+
+def build_command(FLAGS, path_to_main, use_amp):
+    return 'python {} --data_dir {} --model_dir {} --exec_mode predict --batch_size {} {}'.format(
+        path_to_main,
+        FLAGS.data_dir,
+        FLAGS.model_dir,
+        FLAGS.batch_size,
+        use_amp)
+
+
+def main():
+    FLAGS = PARSER.parse_args()
+
+    use_amp = '--amp' if FLAGS.use_amp else ''
+    path_to_main = os.path.join(dirname(dirname(os.path.realpath(__file__))), 'main.py')
+
+    cmd = build_command(FLAGS, path_to_main, use_amp)
+
+    print('Command to be executed:')
+    print(cmd)
+    subprocess.call(cmd, shell=True)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_train.py b/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..a640f17d36023b41faadc9bb0cf08e6ba0ded3da
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_train.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+
+from os.path import dirname
+
+PARSER = argparse.ArgumentParser(description="vnet_train")
+
+PARSER.add_argument('--data_dir',
+                    required=True,
+                    type=str,
+                    help='Directory where the dataset is stored')
+
+PARSER.add_argument('--model_dir',
+                    required=True,
+                    type=str,
+                    help='Directory where model information (including checkpoints) is stored')
+
+PARSER.add_argument('--gpus',
+                    choices=[1, 8],
+                    required=True,
+                    type=int,
+                    help='Number of GPUs')
+
+PARSER.add_argument('--batch_size',
+                    default=1,
+                    type=int,
+                    help='Batch size for training')
+
+PARSER.add_argument('--epochs',
+                    default=40,
+                    type=int,
+                    help='Number of epochs for training')
+
+
+PARSER.add_argument('--amp', dest='use_amp', action='store_true', default=False)
+
+PARSER.add_argument('--base_lr',
+                    default=0.0001,
+                    type=float,
+                    help='Initial learning rate for RMSProp')
+
+
+def build_horovod_prefix(gpus):
+    return 'mpirun -np {} -H localhost:{} -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x PATH -mca ' \
+           'pml ob1 -mca btl ^openib --allow-run-as-root '.format(gpus, gpus)
+
+
+def build_command(FLAGS, path_to_main, use_amp):
+    return 'python {} --data_dir {} --model_dir {} --exec_mode train --batch_size {} {} --augment --train_epochs {} --base_lr {}'.format(
+        path_to_main,
+        FLAGS.data_dir,
+        FLAGS.model_dir,
+        FLAGS.batch_size,
+        use_amp,
+        FLAGS.epochs,
+        FLAGS.base_lr)
+
+
+def main():
+    FLAGS = PARSER.parse_args()
+
+    use_amp = '--amp' if FLAGS.use_amp else ''
+    path_to_main = os.path.join(dirname(dirname(os.path.realpath(__file__))), 'main.py')
+
+    cmd = build_command(FLAGS, path_to_main, use_amp)
+
+    if FLAGS.gpus > 1:
+        cmd = build_horovod_prefix(FLAGS.gpus) + cmd
+
+    print('Command to be executed:')
+    print(cmd)
+    subprocess.call(cmd, shell=True)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_train_and_evaluate.py b/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_train_and_evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..50f60cb5fc88f2893495d3c947015a97426bfaa0
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/examples/vnet_train_and_evaluate.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+from os.path import dirname
+
+PARSER = argparse.ArgumentParser(description="vnet_train_and_evaluate")
+
+PARSER.add_argument('--data_dir',
+                    required=True,
+                    type=str,
+                    help='Directory where the dataset is stored')
+
+PARSER.add_argument('--model_dir',
+                    required=True,
+                    type=str,
+                    help='Directory where model information (including checkpoints) is stored')
+
+PARSER.add_argument('--gpus',
+                    choices=[1, 8],
+                    required=True,
+                    type=int,
+                    help='Number of GPUs')
+
+PARSER.add_argument('--batch_size',
+                    default=1,
+                    type=int,
+                    help='Batch size for training')
+
+PARSER.add_argument('--epochs',
+                    default=40,
+                    type=int,
+                    help='Number of epochs for training')
+
+PARSER.add_argument('--amp', dest='use_amp', action='store_true', default=False)
+
+PARSER.add_argument('--base_lr',
+                    default=0.0001,
+                    type=float,
+                    help='Initial learning rate for RMSProp')
+
+
+def build_horovod_prefix(gpus):
+    return 'mpirun -np {} -H localhost:{} -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x PATH -mca ' \
+           'pml ob1 -mca btl ^openib --allow-run-as-root '.format(gpus, gpus)
+
+
+def build_command(FLAGS, path_to_main, use_amp):
+    return 'python3 {} --data_dir {} --model_dir {} --exec_mode train_and_evaluate --batch_size {} {} --augment --train_epochs {} --train_split 0.9 --split_seed 42 --base_lr {}'.format(
+        path_to_main,
+        FLAGS.data_dir,
+        FLAGS.model_dir,
+        FLAGS.batch_size,
+        use_amp,
+        FLAGS.epochs,
+        FLAGS.base_lr)
+
+
+def main():
+    FLAGS = PARSER.parse_args()
+
+    use_amp = '--amp' if FLAGS.use_amp else ''
+    path_to_main = os.path.join(dirname(dirname(os.path.realpath(__file__))), 'main.py')
+
+    cmd = build_command(FLAGS, path_to_main, use_amp)
+
+    if FLAGS.gpus > 1:
+        cmd = build_horovod_prefix(FLAGS.gpus) + cmd
+
+    print('Command to be executed:')
+    print(cmd)
+    subprocess.call(cmd, shell=True)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cv/semantic_segmentation/vnet/tensorflow/export.py b/cv/semantic_segmentation/vnet/tensorflow/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab6adb6b70319cbd1b42db32e05912824cfcb51f
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/export.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+import tensorflow as tf
+
+from utils.data_loader import MSDDataset
+from utils.model_fn import vnet_v2
+from utils.tf_export import to_savedmodel, to_tf_trt, to_onnx
+
+PARSER = argparse.ArgumentParser(description="V-Net")
+
+PARSER.add_argument('--to', dest='to', choices=['savedmodel', 'tftrt', 'onnx'], required=True)
+
+PARSER.add_argument('--use_amp', dest='use_amp', action='store_true', default=False)
+PARSER.add_argument('--use_xla', dest='use_xla', action='store_true', default=False)
+PARSER.add_argument('--compress', dest='compress', action='store_true', default=False)
+
+PARSER.add_argument('--input_shape',
+                    nargs='+',
+                    type=int,
+                    help="""Model's input shape""")
+
+PARSER.add_argument('--data_dir',
+                    type=str,
+                    help="""Directory where the dataset is located""")
+
+PARSER.add_argument('--checkpoint_dir',
+                    type=str,
+                    help="""Directory where the checkpoint is located""")
+
+PARSER.add_argument('--savedmodel_dir',
+                    type=str,
+                    help="""Directory where the savedModel is located""")
+
+PARSER.add_argument('--precision',
+                    type=str,
+                    choices=['FP32', 'FP16', 'INT8'],
+                    help="""Precision for the model""")
+
+
+def main():
+    """
+    Starting point of the application
+    """
+    flags = PARSER.parse_args()
+
+    if flags.to == 'savedmodel':
+        params = {
+            'labels': ['0', '1', '2'],
+            'batch_size': 1,
+            'input_shape': flags.input_shape,
+            'convolution_size': 3,
+            'downscale_blocks': [3, 3, 3],
+            'upscale_blocks': [3, 3],
+            'upsampling': 'transposed_conv',
+            'pooling': 'conv_pool',
+            'normalization_layer': 'batchnorm',
+            'activation': 'relu'
+        }
+        to_savedmodel(input_shape=flags.input_shape,
+                      model_fn=vnet_v2,
+                      checkpoint_dir=flags.checkpoint_dir,
+                      output_dir='./saved_model',
+                      input_names=['IteratorGetNext'],
+                      output_names=['vnet/loss/total_loss_ref'],
+                      use_amp=flags.use_amp,
+                      use_xla=flags.use_xla,
+                      compress=flags.compress,
+                      params=argparse.Namespace(**params))
+    if flags.to == 'tftrt':
+        ds = MSDDataset(json_path=flags.data_dir + "/dataset.json",
+                        interpolator='linear')
+        iterator = tf.compat.v1.data.make_one_shot_iterator(ds.test_fn(count=1))
+        features = iterator.get_next()
+
+        sess = tf.compat.v1.Session()
+
+        def input_data():
+            return {'input_tensor:0': sess.run(features)}
+
+        to_tf_trt(savedmodel_dir=flags.savedmodel_dir,
+                  output_dir='./tf_trt_model',
+                  precision=flags.precision,
+                  feed_dict_fn=input_data,
+                  num_runs=1,
+                  output_tensor_names=['vnet/Softmax:0'],
+                  compress=flags.compress)
+    if flags.to == 'onnx':
+        raise NotImplementedError('Currently ONNX not supported for 3D models')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cv/semantic_segmentation/vnet/tensorflow/hooks/profiling_hook.py b/cv/semantic_segmentation/vnet/tensorflow/hooks/profiling_hook.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed38a1a34af4bdea933d7371450d9880c129dffe
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/hooks/profiling_hook.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+
+import numpy as np
+import tensorflow as tf
+
+import dllogger as DLLogger
+
+
+class ProfilingHook(tf.estimator.SessionRunHook):
+    def __init__(self, warmup_steps, global_batch_size, logger, training=True):
+        self._warmup_steps = warmup_steps
+        self._global_batch_size = global_batch_size
+        self._step = 0
+        self._timestamps = []
+        self._logger = logger
+        self._training = training
+
+    def before_run(self, run_context):
+        self._step += 1
+        if self._step >= self._warmup_steps:
+            self._timestamps.append(time.time())
+
+    def end(self, session):
+        deltas = np.array([self._timestamps[i + 1] - self._timestamps[i] for i in range(len(self._timestamps) - 1)])
+        stats = process_performance_stats(np.array(deltas),
+                                          self._global_batch_size)
+
+        self._logger.log(step=(), data={metric: value for (metric, value) in stats})
+        self._logger.flush()
+
+
+def process_performance_stats(timestamps, batch_size):
+    timestamps_ms = 1000 * timestamps
+    latency_ms = timestamps_ms.mean()
+    std = timestamps_ms.std()
+    n = np.sqrt(len(timestamps_ms))
+    throughput_imgps = (1000.0 * batch_size / timestamps_ms).mean()
+
+    stats = [("Throughput Avg", str(throughput_imgps)),
+             ('Latency Avg:', str(latency_ms))]
+    for ci, lvl in zip(["90%:", "95%:", "99%:"],
+                       [1.645, 1.960, 2.576]):
+        stats.append(("Latency_"+ci, str(latency_ms + lvl * std / n)))
+    return stats
\ No newline at end of file
diff --git a/cv/semantic_segmentation/vnet/tensorflow/hooks/train_hook.py b/cv/semantic_segmentation/vnet/tensorflow/hooks/train_hook.py
new file mode 100644
index 0000000000000000000000000000000000000000..82fe3a36171001d2be72cfbfd8887be6a0bdb7eb
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/hooks/train_hook.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+
+import dllogger as DLLogger
+
+
+class TrainHook(tf.estimator.SessionRunHook):
+    def __init__(self, log_every, logger):
+        self._log_every = log_every
+        self._step = 0
+        self._logger = logger
+
+    def before_run(self, run_context):
+        run_args = tf.estimator.SessionRunArgs(
+            fetches=[
+                'vnet/loss/total_loss_ref:0',
+            ]
+        )
+
+        return run_args
+
+    def after_run(self,
+                  run_context,
+                  run_values):
+        if self._step % self._log_every == 0:
+            self._logger.log(step=(self._step,), data={'total_loss': str(run_values.results[0])})
+        self._step += 1
+
+    def end(self, session):
+        self._logger.flush()
diff --git a/cv/semantic_segmentation/vnet/tensorflow/images/1gpu_p_vs_v.png b/cv/semantic_segmentation/vnet/tensorflow/images/1gpu_p_vs_v.png
new file mode 100644
index 0000000000000000000000000000000000000000..8307cbe195656b20e639c7a79d0253bf8644ff51
Binary files /dev/null and b/cv/semantic_segmentation/vnet/tensorflow/images/1gpu_p_vs_v.png differ
diff --git a/cv/semantic_segmentation/vnet/tensorflow/images/8gpu_p_vs_v.png b/cv/semantic_segmentation/vnet/tensorflow/images/8gpu_p_vs_v.png
new file mode 100644
index 0000000000000000000000000000000000000000..b656d7677c890aa6b74aa1d72fd93bbd0e19c51d
Binary files /dev/null and b/cv/semantic_segmentation/vnet/tensorflow/images/8gpu_p_vs_v.png differ
diff --git a/cv/semantic_segmentation/vnet/tensorflow/images/V-Net_TF1_conv.png b/cv/semantic_segmentation/vnet/tensorflow/images/V-Net_TF1_conv.png
new file mode 100644
index 0000000000000000000000000000000000000000..d034275b8adcd5d796e163f056b2a6a490441050
Binary files /dev/null and b/cv/semantic_segmentation/vnet/tensorflow/images/V-Net_TF1_conv.png differ
diff --git a/cv/semantic_segmentation/vnet/tensorflow/images/infer_p_vs_v.png b/cv/semantic_segmentation/vnet/tensorflow/images/infer_p_vs_v.png
new file mode 100644
index 0000000000000000000000000000000000000000..926a6f417e1c12d100d7686a3d5369e4f28babfe
Binary files /dev/null and b/cv/semantic_segmentation/vnet/tensorflow/images/infer_p_vs_v.png differ
diff --git a/cv/semantic_segmentation/vnet/tensorflow/images/vnet.png b/cv/semantic_segmentation/vnet/tensorflow/images/vnet.png
new file mode 100644
index 0000000000000000000000000000000000000000..3faca74a5ef5bf329f739f23e12cf0b1ae9b865f
Binary files /dev/null and b/cv/semantic_segmentation/vnet/tensorflow/images/vnet.png differ
diff --git a/cv/semantic_segmentation/vnet/tensorflow/main.py b/cv/semantic_segmentation/vnet/tensorflow/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..684f55cf9c7ab0009d1d3a8ed6c5d43bfff4b60a
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/main.py
@@ -0,0 +1,174 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# pylint: enable=line-too-long
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import math
+import os
+import pickle
+import shutil
+
+import horovod.tensorflow as hvd
+import tensorflow as tf
+
+import dllogger as DLLogger
+from dllogger import StdOutBackend, JSONStreamBackend, Verbosity
+from hooks.profiling_hook import ProfilingHook
+from hooks.train_hook import TrainHook
+from utils.cmd_util import PARSER
+from utils.data_loader import MSDDataset
+from utils.model_fn import vnet_v2
+
+
+def main(_):
+    tf.get_logger().setLevel(logging.ERROR)
+
+    hvd.init()
+
+    FLAGS = PARSER.parse_args()
+    try:
+        from dltest import show_training_arguments
+        show_training_arguments(FLAGS)
+    except:
+        pass
+
+    backends = []
+
+    if hvd.rank() == 0:
+        backends += [StdOutBackend(Verbosity.DEFAULT)]
+
+        if FLAGS.log_dir:
+            backends += [JSONStreamBackend(Verbosity.DEFAULT, FLAGS.log_dir)]
+
+    DLLogger.init(backends=backends)
+
+    for key in vars(FLAGS):
+        DLLogger.log(step="PARAMETER", data={str(key): vars(FLAGS)[key]})
+
+    os.environ['CUDA_CACHE_DISABLE'] = '0'
+
+    os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL'
+
+    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+    os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
+
+    os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
+
+    os.environ['TF_ADJUST_HUE_FUSED'] = '1'
+    os.environ['TF_ADJUST_SATURATION_FUSED'] = '1'
+    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
+
+    os.environ['TF_SYNC_ON_FINISH'] = '0'
+    os.environ['TF_AUTOTUNE_THRESHOLD'] = '2'
+    os.environ['TF_DISABLE_NVTX_RANGES'] = '1'
+
+    dataset = MSDDataset(json_path=os.path.join(FLAGS.data_dir, 'dataset.json'),
+                         dst_size=FLAGS.input_shape,
+                         seed=FLAGS.seed,
+                         interpolator=FLAGS.resize_interpolator,
+                         data_normalization=FLAGS.data_normalization,
+                         batch_size=FLAGS.batch_size,
+                         train_split=FLAGS.train_split,
+                         split_seed=FLAGS.split_seed)
+
+    FLAGS.labels = dataset.labels
+
+    gpu_options = tf.compat.v1.GPUOptions()
+    config = tf.compat.v1.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)
+
+    if FLAGS.use_xla:
+        config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
+
+    config.gpu_options.allow_growth = True
+    config.gpu_options.visible_device_list = str(hvd.local_rank())
+
+    if FLAGS.use_amp:
+        config.graph_options.rewrite_options.auto_mixed_precision = 1
+
+    run_config = tf.estimator.RunConfig(
+        save_summary_steps=None,
+        save_checkpoints_steps=None if FLAGS.benchmark else dataset.train_steps * FLAGS.train_epochs,
+        save_checkpoints_secs=None,
+        tf_random_seed=None,
+        session_config=config,
+        keep_checkpoint_max=1)
+
+    estimator = tf.estimator.Estimator(
+        model_fn=vnet_v2,
+        model_dir=FLAGS.model_dir if hvd.rank() == 0 else None,
+        config=run_config,
+        params=FLAGS)
+
+    train_hooks = [hvd.BroadcastGlobalVariablesHook(0)]
+
+    if 'train' in FLAGS.exec_mode:
+        steps = dataset.train_steps * FLAGS.train_epochs
+
+        if FLAGS.benchmark:
+            steps = FLAGS.warmup_steps * 2
+            if hvd.rank() == 0:
+                train_hooks += [ProfilingHook(FLAGS.warmup_steps, FLAGS.batch_size * hvd.size(), DLLogger)]
+        else:
+            if hvd.rank() == 0:
+                train_hooks += [TrainHook(FLAGS.log_every, DLLogger)]
+
+        estimator.train(
+            input_fn=lambda: dataset.train_fn(FLAGS.augment),
+            steps=steps,
+            hooks=train_hooks)
+
+    if 'evaluate' in FLAGS.exec_mode:
+        if hvd.rank() == 0:
+            if FLAGS.train_split >= 1.0:
+                raise ValueError("Missing argument: --train_split < 1.0")
+
+            result = estimator.evaluate(
+                input_fn=dataset.eval_fn,
+                steps=dataset.eval_steps,
+                hooks=[])
+
+            DLLogger.log(step=tuple(), data={'background_dice': str(result['background dice']),
+                                             'anterior_dice': str(result['Anterior dice']),
+                                             'posterior_dice': str(result['Posterior dice'])})
+
+    if 'predict' in FLAGS.exec_mode:
+        count = 1
+        hooks = []
+        if hvd.rank() == 0:
+            if FLAGS.benchmark:
+                count = math.ceil((FLAGS.warmup_steps * 2) / dataset.test_steps)
+                hooks += [ProfilingHook(FLAGS.warmup_steps, FLAGS.batch_size * hvd.size(), DLLogger, training=False)]
+
+            predictions = estimator.predict(input_fn=lambda: dataset.test_fn(count=count),
+                                            hooks=hooks)
+
+            pred = [p['prediction'] for p in predictions]
+
+            predict_path = os.path.join(FLAGS.model_dir, 'predictions')
+            if os.path.exists(predict_path):
+                shutil.rmtree(predict_path)
+
+            os.makedirs(predict_path)
+
+            pickle.dump(pred, open(os.path.join(predict_path, 'predictions.pkl'), 'wb'))
+
+
+if __name__ == '__main__':
+    tf.compat.v1.app.run()
+
diff --git a/cv/semantic_segmentation/vnet/tensorflow/model/layers.py b/cv/semantic_segmentation/vnet/tensorflow/model/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..65bd94cb0d6e037310c51f80459643e95d41e43e
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/model/layers.py
@@ -0,0 +1,176 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+
+
+def normalization_layer(inputs, name, mode):
+    if name == 'batchnorm':
+        return tf.compat.v1.layers.batch_normalization(inputs=inputs,
+                                             axis=-1,
+                                             training=(mode == tf.estimator.ModeKeys.TRAIN),
+                                             trainable=True,
+                                             virtual_batch_size=None)
+    elif name == 'none':
+        return inputs
+    else:
+        raise ValueError('Invalid normalization layer')
+
+
+def activation_layer(x, activation):
+    if activation == 'relu':
+        return tf.nn.relu(x)
+    elif activation == 'none':
+        return x
+    else:
+        raise ValueError("Unkown activation {}".format(activation))
+
+
+def convolution_layer(inputs, filters, kernel_size, stride, normalization, activation, mode):
+    x = tf.compat.v1.layers.conv3d(inputs=inputs,
+                         filters=filters,
+                         kernel_size=kernel_size,
+                         strides=stride,
+                         activation=None,
+                         padding='same',
+                         data_format='channels_last',
+                         use_bias=True,
+                         kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
+                         bias_initializer=tf.compat.v1.zeros_initializer(),
+                         bias_regularizer=None)
+
+    x = normalization_layer(x, normalization, mode)
+
+    return activation_layer(x, activation)
+
+
+def downsample_layer(inputs, pooling, normalization, activation, mode):
+    if pooling == 'conv_pool':
+        return convolution_layer(inputs=inputs,
+                                 filters=inputs.get_shape()[-1] * 2,
+                                 kernel_size=2,
+                                 stride=2,
+                                 normalization=normalization,
+                                 activation=activation,
+                                 mode=mode)
+    else:
+        raise ValueError('Invalid downsampling method: {}'.format(pooling))
+
+
+def upsample_layer(inputs, filters, upsampling, normalization, activation, mode):
+    if upsampling == 'transposed_conv':
+        x = tf.compat.v1.layers.conv3d_transpose(inputs=inputs,
+                                       filters=filters,
+                                       kernel_size=2,
+                                       strides=2,
+                                       activation=None,
+                                       padding='same',
+                                       data_format='channels_last',
+                                       use_bias=True,
+                                       kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
+                                       bias_initializer=tf.compat.v1.zeros_initializer(),
+                                       bias_regularizer=None)
+
+        x = normalization_layer(x, normalization, mode)
+
+        return activation_layer(x, activation)
+
+    else:
+        raise ValueError('Unsupported upsampling: {}'.format(upsampling))
+
+
+def residual_block(input_0, input_1, kernel_size, depth, normalization, activation, mode):
+    with tf.compat.v1.name_scope('residual_block'):
+        x = input_0
+        if input_1 is not None:
+            x = tf.concat([input_0, input_1], axis=-1)
+
+        inputs = x
+        n_input_channels = inputs.get_shape()[-1]
+
+        for i in range(depth):
+            x = convolution_layer(inputs=x,
+                                  filters=n_input_channels,
+                                  kernel_size=kernel_size,
+                                  stride=1,
+                                  normalization=normalization,
+                                  activation=activation,
+                                  mode=mode)
+
+        return x + inputs
+
+
+def input_block(inputs, filters, kernel_size, normalization, activation, mode):
+    with tf.compat.v1.name_scope('conversion_block'):
+        x = inputs
+        return convolution_layer(inputs=inputs,
+                                 filters=filters,
+                                 kernel_size=kernel_size,
+                                 stride=1,
+                                 normalization=normalization,
+                                 activation=activation,
+                                 mode=mode) + x
+
+
+def downsample_block(inputs, depth, kernel_size, pooling, normalization, activation, mode):
+    with tf.compat.v1.name_scope('downsample_block'):
+        x = downsample_layer(inputs,
+                             pooling=pooling,
+                             normalization=normalization,
+                             activation=activation,
+                             mode=mode)
+
+        return residual_block(input_0=x,
+                              input_1=None,
+                              depth=depth,
+                              kernel_size=kernel_size,
+                              normalization=normalization,
+                              activation=activation,
+                              mode=mode)
+
+
+def upsample_block(inputs, residual_inputs, depth, kernel_size, upsampling, normalization, activation, mode):
+    with tf.compat.v1.name_scope('upsample_block'):
+        x = upsample_layer(inputs,
+                           filters=residual_inputs.get_shape()[-1],
+                           upsampling=upsampling,
+                           normalization=normalization,
+                           activation=activation,
+                           mode=mode)
+
+        return residual_block(input_0=x,
+                              input_1=residual_inputs,
+                              depth=depth,
+                              kernel_size=kernel_size,
+                              normalization=normalization,
+                              activation=activation,
+                              mode=mode)
+
+
+def output_block(inputs, residual_inputs, n_classes, kernel_size, upsampling, normalization, activation, mode):
+    with tf.compat.v1.name_scope('output_block'):
+        x = upsample_layer(inputs,
+                           filters=residual_inputs.get_shape()[-1],
+                           upsampling=upsampling,
+                           normalization=normalization,
+                           activation=activation,
+                           mode=mode)
+
+        return convolution_layer(inputs=x,
+                                 filters=n_classes,
+                                 kernel_size=kernel_size,
+                                 stride=1,
+                                 mode=mode,
+                                 activation='none',
+                                 normalization='none')
diff --git a/cv/semantic_segmentation/vnet/tensorflow/model/vnet.py b/cv/semantic_segmentation/vnet/tensorflow/model/vnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..35259c144530427de3fe11e48b713e131f4fed40
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/model/vnet.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from model.layers import input_block, downsample_block, upsample_block, output_block
+
+
+class Builder():
+    def __init__(self, kernel_size, n_classes, upscale_blocks, downscale_blocks, upsampling, pooling, normalization,
+                 activation, mode):
+        self._kernel_size = kernel_size
+        self._pooling = pooling
+        self._upsampling = upsampling
+        self._normalization = normalization
+        self._activation = activation
+        self._mode = mode
+        self._n_classes = n_classes
+        self._downscale_blocks = downscale_blocks
+        self._upscale_blocks = upscale_blocks
+
+    def __call__(self, features):
+
+        x = input_block(inputs=features,
+                        filters=16,
+                        kernel_size=self._kernel_size,
+                        normalization=self._normalization,
+                        activation=self._activation,
+                        mode=self._mode)
+
+        skip_connections = [x]
+
+        for depth in self._downscale_blocks:
+            x = downsample_block(inputs=x,
+                                 depth=depth,
+                                 kernel_size=self._kernel_size,
+                                 pooling=self._pooling,
+                                 normalization=self._normalization,
+                                 activation=self._activation,
+                                 mode=self._mode)
+
+            skip_connections.append(x)
+
+        del skip_connections[-1]
+
+        for depth in self._upscale_blocks:
+            x = upsample_block(inputs=x,
+                               residual_inputs=skip_connections.pop(),
+                               depth=depth,
+                               upsampling=self._upsampling,
+                               kernel_size=self._kernel_size,
+                               normalization=self._normalization,
+                               activation=self._activation,
+                               mode=self._mode)
+
+        return output_block(inputs=x,
+                            residual_inputs=skip_connections.pop(),
+                            kernel_size=self._kernel_size,
+                            n_classes=self._n_classes,
+                            upsampling=self._upsampling,
+                            normalization=self._normalization,
+                            activation=self._activation,
+                            mode=self._mode)
diff --git a/cv/semantic_segmentation/vnet/tensorflow/readme_origin.md b/cv/semantic_segmentation/vnet/tensorflow/readme_origin.md
new file mode 100644
index 0000000000000000000000000000000000000000..6f135bc7a9ce1a8fcba6f70a7501cec49172d3e0
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/readme_origin.md
@@ -0,0 +1,540 @@
+# V-Net Medical For Tensorflow
+
+This repository provides a script and recipe to train the V-Net model to achieve state of the art accuracy, and is tested and maintained by NVIDIA.
+
+## Table of Contents
+ 
+- [Model overview](#model-overview)
+   * [Model architecture](#model-architecture)
+   * [Default configuration](#default-configuration)
+   * [Feature support matrix](#feature-support-matrix)
+     * [Features](#features)
+   * [Mixed precision training](#mixed-precision-training)
+     * [Enabling mixed precision](#enabling-mixed-precision)
+     * [Enabling TF32](#enabling-tf32)
+- [Setup](#setup)
+   * [Requirements](#requirements)
+- [Quick Start Guide](#quick-start-guide)
+- [Advanced](#advanced)
+   * [Scripts and sample code](#scripts-and-sample-code)
+   * [Parameters](#parameters)
+   * [Command-line options](#command-line-options)
+   * [Getting the data](#getting-the-data)
+     * [Dataset guidelines](#dataset-guidelines)
+     * [Multi-dataset](#multi-dataset)
+   * [Training process](#training-process)
+   * [Inference process](#inference-process)
+- [Performance](#performance)   
+   * [Benchmarking](#benchmarking)
+     * [Training performance benchmark](#training-performance-benchmark)
+     * [Inference performance benchmark](#inference-performance-benchmark)
+   * [Results](#results)
+     * [Training accuracy results](#training-accuracy-results)
+       * [Training accuracy: NVIDIA DGX-1 (8x V100 16GB)](#training-accuracy-nvidia-dgx-1-8x-v100-16gb)
+     * [Training performance results](#training-performance-results)
+       * [Training performance: NVIDIA DGX-1 (8x V100 16GB)](#training-performance-nvidia-dgx-1-8x-v100-16gb)
+     * [Inference performance results](#inference-performance-results)
+        * [Inference performance: NVIDIA DGX-1 (1x V100 16GB)](#inference-performance-nvidia-dgx-1-1x-v100-16gb)
+- [Release notes](#release-notes)
+   * [Changelog](#changelog)
+   * [Known issues](#known-issues)
+
+
+## Model overview
+
+The V-Net model for Tensorflow, called V-Net_Medical_TF is a convolutional neural network for 3D image segmentation. This repository contains a V-Net implementation and is based on the paper [V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation](https://arxiv.org/pdf/1606.04797), with small alterations to support a new dataset for Hippocampus segmentation.
+
+This implementation differs from the original in the following ways:
+* Convolution filters are 3x3x3 instead of 5x5x5 to increase performance without negatively affecting the accuracy
+* The number of upsample/downsample levels is reduced to 3 to accommodate the different input size
+* PReLU activation has been substituted by ReLU to increase performance without negatively affecting the accuracy
+
+This model is trained with mixed precision using Tensor Cores on Volta, Turing, and the NVIDIA Ampere GPU architectures. Therefore, researchers can get results  2.2x faster than training without Tensor Cores, while experiencing the benefits of mixed precision training. This model is tested against each NGC monthly container release to ensure consistent accuracy and performance over time.
+
+### Model architecture
+
+V-Net was first introduced by Fausto Milletari, Nassir Navab, Seyed-Ahmad Ahmadi in the paper: [V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation](https://arxiv.org/pdf/1606.04797). V-Net allows for seamless segmentation of 3D images, with high accuracy and performance, and can be adapted to solve many different segmentation problems.
+
+The following figure shows the construction of the standard V-Net model and its different components. V-Net is composed of a contractive and an expanding path, that aims at building a bottleneck in its centermost part through a combination of convolution and downsampling. After this bottleneck, the image is reconstructed through a combination of convolutions and upsampling. Skip connections are added with the goal of helping the backward flow of gradients in order to improve the training.
+
+![V-Net](images/vnet.png)
+ 
+Figure 1. VNet architecture
+
+### Default configuration
+
+V-Net consists of a contractive (left-side) and expanding (right-side) path. It repeatedly applies unpadded convolutions followed by max pooling for downsampling. Every step in the expanding path consists of an upsampling of the feature maps and a concatenation with the correspondingly cropped feature map from the contractive path.
+
+The following performance optimizations were implemented in this model:
+* XLA support.
+* Reduced size of convolutional filters to 3x3x3
+* ReLU activation used instead of PReLU
+* Batchnorm used for training
+
+### Feature support matrix    
+     
+The following features are supported by this model.
+
+| **Feature** | **V-Net_Medical_TF** |
+|:---:|:--------:|
+| Horovod Multi-GPU (NCCL) | Yes |
+| Automatic Mixed Precision (AMP) | Yes |
+
+The following features were implemented in this model:
+* Data-parallel multi-GPU training with Horovod.
+* Mixed precision support with TensorFlow Automatic Mixed Precision (TF-AMP), which enables mixed precision training without any changes to the code-base by performing automatic graph rewrites and loss scaling controlled by an environmental variable.
+* Tensor Core operations to maximize throughput using NVIDIA Volta GPUs.
+
+#### Features
+
+* Multi-GPU training with Horovod
+
+Our model uses Horovod to implement efficient multi-GPU training with NCCL. For details, see example sources in this repository or see the [TensorFlow tutorial](https://github.com/horovod/horovod/#usage).
+
+* Automatic Mixed Precision (AMP)
+
+Enables mixed precision training without any changes to the code-base by performing automatic graph rewrites and loss scaling controlled by an environmental variable.
+
+### Mixed precision training
+
+Mixed precision is the combined use of different numerical precisions in a computational method. [Mixed precision](https://arxiv.org/abs/1710.03740) training offers significant computational speedup by performing operations in half-precision format while storing minimal information in single-precision to retain as much information as possible in critical parts of the network. Since the introduction of [Tensor Cores](https://developer.nvidia.com/tensor-cores) in Volta, and following with both the Turing and Ampere architectures, significant training speedups are experienced by switching to mixed precision -- up to 3x overall speedup on the most arithmetically intense model architectures. Using [mixed precision training](https://docs.nvidia.com/deeplearning/performance/mixed-precision-training/index.html) previously required two steps:
+1.  Porting the model to use the FP16 data type where appropriate.    
+2.  Adding loss scaling to preserve small gradient values.
+
+This can now be achieved using Automatic Mixed Precision (AMP) for TensorFlow to enable the full [mixed precision methodology](https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html#tensorflow) in your existing TensorFlow model code.  AMP enables mixed precision training on Volta and Turing GPUs automatically. The TensorFlow framework code makes all necessary model changes internally.
+
+In TF-AMP, the computational graph is optimized to use as few casts as necessary and maximize the use of FP16, and the loss scaling is automatically applied inside of supported optimizers. AMP can be configured to work with the existing tf.contrib loss scaling manager by disabling the AMP scaling with a single environment variable to perform only the automatic mixed-precision optimization. It accomplishes this by automatically rewriting all computation graphs with the necessary operations to enable mixed precision training and automatic loss scaling.
+
+-   How to train using mixed precision, see the [Mixed Precision Training](https://arxiv.org/abs/1710.03740) paper and [Training With Mixed Precision](https://docs.nvidia.com/deeplearning/performance/mixed-precision-training/index.html) documentation.
+-   Techniques used for mixed precision training, see the [Mixed-Precision Training of Deep Neural Networks](https://devblogs.nvidia.com/mixed-precision-training-deep-neural-networks/) blog.
+-   How to access and enable AMP for TensorFlow, see [Using TF-AMP](https://docs.nvidia.com/deeplearning/dgx/tensorflow-user-guide/index.html#tfamp) from the TensorFlow User Guide.
+
+### Enabling mixed precision
+
+In order to enable mixed precision training, the following environment variables must be defined with the correct value before the training starts:
+```
+TF_ENABLE_AUTO_MIXED_PRECISION=1
+```
+Exporting these variables ensures that loss scaling is performed correctly and automatically. 
+By supplying the `--amp` flag to the `main.py` script while training in FP32, the following variables are set to their correct value for mixed precision training inside the `./utils/runner.py` script:
+```
+if params['use_amp']:
+   LOGGER.log("TF AMP is activated - Experimental Feature")
+   os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'
+```
+
+#### Enabling TF32
+
+TensorFloat-32 (TF32) is the new math mode in [NVIDIA A100](https://www.nvidia.com/en-us/data-center/a100/) GPUs for handling the matrix math also called tensor operations. TF32 running on Tensor Cores in A100 GPUs can provide up to 10x speedups compared to single-precision floating-point math (FP32) on Volta GPUs. 
+
+TF32 Tensor Cores can speed up networks using FP32, typically with no loss of accuracy. It is more robust than FP16 for models which require high dynamic range for weights or activations.
+
+For more information, refer to the [TensorFloat-32 in the A100 GPU Accelerates AI Training, HPC up to 20x](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/) blog post.
+
+TF32 is supported in the NVIDIA Ampere GPU architecture and is enabled by default.
+
+
+## Setup
+
+The following section lists the requirements in order to start training the V-Net Medical model.
+
+### Requirements
+
+This repository contains a `Dockerfile` which extends the TensorFlow NGC container and encapsulates some additional dependencies. Aside from these dependencies, ensure you have the following components:
+* [NVIDIA Docker](https://github.com/NVIDIA/nvidia-docker)
+- TensorFlow 20.06-tf1-py3 [NGC container](https://ngc.nvidia.com/registry/nvidia-tensorflow)
+- GPU-based architecture:
+    - [NVIDIA Volta](https://www.nvidia.com/en-us/data-center/volta-gpu-architecture/)
+    - [NVIDIA Turing](https://www.nvidia.com/en-us/geforce/turing/)
+    - [NVIDIA Ampere architecture](https://www.nvidia.com/en-us/data-center/nvidia-ampere-gpu-architecture/)
+
+
+For more information about how to get started with NGC containers, see the following sections from the NVIDIA GPU Cloud Documentation and the Deep Learning DGX Documentation:
+
+* [Getting Started Using NVIDIA GPU Cloud](https://docs.nvidia.com/ngc/ngc-getting-started-guide/index.html)
+* [Accessing And Pulling From The NGC container registry](https://docs.nvidia.com/deeplearning/dgx/user-guide/index.html#accessing_registry)
+* [Running Tensorflow](https://docs.nvidia.com/deeplearning/dgx/tensorflow-release-notes/running.html#running)
+
+## Quick Start Guide
+
+To train your model using mixed precision with Tensor Cores or using FP32, perform the following steps using the default parameters of the V-Net model on the Hippocampus head and body dataset present on the [medical segmentation decathlon website](http://medicaldecathlon.com/).
+
+1. Clone the repository
+```
+git clone https://github.com/NVIDIA/DeepLearningExamples
+cd DeepLearningExamples/TensorFlow/Segmentation/V-Net_tf
+```
+
+2. Download and preprocess the dataset
+
+The V-Net script `main.py` operates on Hippocampus head and body data from the [medical segmentation decathlon](http://medicaldecathlon.com/). Upon registration, the challenge's data is made available through the following link:
+
+* [Medical segmentation decathlon Google Drive](https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2)
+
+The script `download_dataset.py` is provided for data download. It is possible to select the destination folder when downloading the files by using the `--data_dir` flag.  For example: 
+```
+python download_dataset.py --data_dir ./data
+```
+
+Once downloaded the data using the `download_dataset.py` script, it can be used to run the training and benchmark scripts described below, by pointing `main.py` to its location using the `--data_dir` flag.
+
+**Note:** Masks are only provided for training data.
+
+3. Build the V-Net TensorFlow container
+
+After Docker is correctly set up, the V-Net TensorFlow container can be built with:
+```
+docker build -t vnet_tf .
+```
+
+4. Start an interactive session in the NGC container to run training/inference.
+
+Run the previously built Docker container:
+```
+$ docker run --runtime=nvidia --rm -it --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -v /path/to/dataset:/data vnet_tf:latest bash
+```
+**Note:** Ensure to mount your dataset using the `-v` flag to make it available for training inside the NVIDIA Docker container. Data can be downloaded as well from inside the container.
+
+5. Start training
+
+To run training on all training data for a default configuration (for example 1/4/8 GPUs FP32/TF-AMP), run the `vnet_train.py` script in the `./examples` directory:
+```
+usage: vnet_train.py [-h] 
+                          --data_dir DATA_DIR 
+                          --model_dir MODEL_DIR 
+                          --gpus {1, 8} 
+                          --batch_size BATCH_SIZE 
+                          --epochs EPOCHS
+                          OPTIONAL [--amp]
+```
+For example:
+```
+python examples/vnet_train.py --data_dir ./data/Task04_Hippocampus --model_dir ./tmp --gpus 8 --batch_size 260 --epochs 50 --amp
+```
+
+To run training on 9/10 of the training data and perform evaluation on the remaining 1/10, run the `vnet_train_and_evaluate.py` script in the `./examples` directory:
+```
+usage: vnet_train_and_evaluate.py [-h] 
+                          --data_dir DATA_DIR 
+                          --model_dir MODEL_DIR 
+                          --gpus {1, 8} 
+                          --batch_size BATCH_SIZE 
+                          --epochs EPOCHS
+                          OPTIONAL [--amp]
+```
+This is useful to estimate the convergence point of the training. For example:
+```
+python examples/vnet_train_and_evaluate.py --data_dir ./data/Task04_Hippocampus --model_dir ./tmp --gpus 1 --batch_size 8 --epochs 260 --amp
+```
+
+6. Start inference/predictions
+To run inference on a checkpointed model, run the `vnet_predict.py` script in the `./examples` directory:
+```
+usage: vnet_predict.py [-h] 
+                         --data_dir DATA_DIR 
+                         --model_dir MODEL_DIR
+                         --batch_size BATCH_SIZE 
+                         OPTIONAL [--amp]
+```
+For example:
+```
+python examples/vnet_predict.py --data_dir ./data/Task04_Hippocampus --model_dir ./tmp --batch_size 4 --amp
+```
+
+## Advanced
+
+The following sections provide greater details of the dataset, running training and inference, and the training results.
+
+### Scripts and sample code
+
+In the root directory, the most important files are:
+* `main.py`: Serves as the entry point to the application.
+* `Dockerfile`: Container with the basic set of dependencies to run V-Net
+* `requirements.txt`: Set of extra requirements for running V-Net
+* `download_data.py`: Automatically downloads the dataset for training
+
+The `utils/` folder encapsulates the necessary tools to train and perform inference using V-Net. Its main components are:
+* `runner.py`: Implements the logic for training and inference
+* `data_loader.py`: Implements the data loading and augmentation
+* `hooks/profiling_hook.py`: Collects different metrics to be used for benchmarking
+* `hooks/training_hook.py`: Collects different metrics to be used for training
+* `hooks/evaluation_hook.py`: Collects different metrics to be used for testing
+* `var_storage.py`: Helper functions for TF-AMP
+
+The model/ folder contains information about the building blocks of V-Net and the way they are assembled. Its contents are:
+* `layers.py`: Defines the different blocks that are used to assemble V-Net
+* `vnet.py`: Defines the model architecture using the blocks from the `layers.py` script
+
+Other folders included in the root directory are:
+* `dllogger/`: Contains the utilities for logging
+* `examples/`: Provides examples for training and benchmarking V-Net
+* `images/`: Contains a model diagram
+
+### Parameters
+The complete list of the available parameters for the main.py script contains:
+* `--exec_mode`: Select the execution mode to run the model (default: `train_and_predict`)
+* `--data_normalization`: Select the type of data normalization (default: `zscore`)
+* `--activation`: Select the activation to be used in the network (default: `prelu`)
+* `--resize_interpolator`: Select the interpolator for image resizing (default: `nearest_neighbor`)
+* `--loss`: Loss function to be utilized for training (default: `dice`)
+* `--normalization_layer`: Type of normalization layer to be used in the model (default: `batchnorm`)
+* `--pooling`: Type of pooling layer to be used in the model (default: `conv_pool`)
+* `--upsampling`: Type of upsampling layer to be used in the model (default: `transposed_conv`)
+* `--seed`: Random seed value (default: `0`)
+* `--input_shape`: Target resize dimension for input samples (default: `[32 32 32]`)
+* `--upscale_blocks`: Number of upscale blocks with the depth of their residual component (default: `[3 3 3]`)
+* `--downscale_blocks`: Number of downscale blocks with the depth of their residual component (default: `[3 3]`)
+* `--model_dir`: Set the output directory for information related to the model (default: `result/`)
+* `--convolution_size`: Size of the convolutional kernel filters (default: `3`)
+* `--batch_size`: Number of samples processed per execution step
+* `--log_every`: Log every this number of epochs (default: `100`)
+* `--warmup_steps`: Initial number of steps that will not be benchmarked as the model starts running (default: `200`)
+* `--train_epochs`: Number of times that training will go through the entire dataset
+* `--optimizer`: Optimizer to be used during training (default: `rmsprop`)
+* `--base_lr`: Model’s learning rate (default: `0.01`)
+* `--momentum`: Momentum coefficient for model’s optimizer (default: `0.99`)
+* `--train_split`: Proportion of the dataset that will become the training set (default: `0.9`)
+* `--split_seed`: Random seed for the splitting of the dataset between training and validation
+* `--model_dir`: Path where checkpoints and information related to the model will be stored
+* `--data_dir`: Path to the dataset
+* `--augment`: Enable data augmentation (default: `False`)
+* `--benchmark`: Enable performance benchmarking (default: `False`)
+* `--amp`: Enable automatic mixed precision (default: `False`)
+* `--xla`: Enable xla (default: `False`)
+
+### Command-line options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option, for example: 
+```
+python main.py 
+usage: main.py [-h]
+               --exec_mode {train,predict,train_and_predict,train_and_evaluate}
+               [--data_normalization {zscore}]
+               [--activation {relu}]
+               [--resize_interpolator {linear}]
+               [--loss {dice}]
+               [--normalization_layer {batchnorm}]
+               [--pooling {conv_pool}]
+               [--upsampling {transposed_conv}]
+               [--seed SEED]
+               [--input_shape INPUT_SHAPE [INPUT_SHAPE ...]]
+               [--upscale_blocks UPSCALE_BLOCKS [UPSCALE_BLOCKS ...]]
+               [--downscale_blocks DOWNSCALE_BLOCKS [DOWNSCALE_BLOCKS ...]]
+               [--convolution_size {3}]
+               --batch_size BATCH_SIZE
+               [--log_every LOG_EVERY]
+               [--warmup_steps WARMUP_STEPS]
+               [--train_epochs TRAIN_EPOCHS]
+               [--optimizer {rmsprop}]
+               [--base_lr BASE_LR]
+               [--momentum MOMENTUM]
+               [--train_split TRAIN_SPLIT]
+               [--split_seed SPLIT_SEED]
+               --model_dir MODEL_DIR
+               --data_dir DATA_DIR
+               [--benchmark]
+               [--amp]
+               [--xla]
+               [--augment]
+
+```
+
+### Getting the data
+
+The V-Net model was trained on the Hippocampus dataset from [medical segmentation decathlon](http://medicaldecathlon.com/). Test images provided by the organization were used to produce the resulting masks for submission.
+
+The objective is to produce a set of masks that segment the data as accurately as possible. 
+
+Medical segmentation decathlon (MSD) datasets are conformed by the following elements:
+* `dataset.json` contains a high level description of the contents of the dataset
+* `ImagesTr` contains the training images as Nifti files
+* `LabelsTr` contains the training labels as Nifti files
+* `ImagesTs` contains the test images as Nifti files
+
+#### Dataset guidelines
+
+The process of loading, normalizing and augmenting the data contained in the dataset can be found in the `data_loader.py` script. 
+
+Initially, data is loaded from a `Nifti` file and converted to NumPy arrays with the use of SimpleItk, with target dimensions specified through `--input_shape`. These NumPy arrays are fed to the model through `tf.data.Dataset.from_tensor_slices()`, in order to achieve high performance.
+
+Intensities on the volumes are then normalized using the method specified in `--data_normalization`, whereas labels are one-hot encoded for their later use.
+
+If augmentation is enabled, the following set of augmentation techniques are applied:
+* Random horizontal flipping
+* Random vertical flipping
+
+### Training process
+
+#### Optimizer
+
+The model trains for 80 epochs with the following hyperparameters:
+
+* RMSProp optimizer with momentum = 0.0
+* Base learning rate = 0.0001
+
+### Inference process
+
+To run inference on a checkpointed model, run the script below, although it requires a pre-trained model checkpoint and tokenized input.
+```
+python examples/vnet_predict.py --data_dir ./data/Task04_Hippocampus --model_dir ./tmp --batch_size {N} [--amp]
+```
+This script should produce the prediction results over a set of masks which will be located in `./tmp/eval`.
+
+## Performance
+
+###  Benchmarking
+
+Starting from CuDNN 7.6.2, enhanced support for 3D convolutions in mixed precision has been introduced to our containers. This enhanced support accelerates even further both training and inference, while maintaining the reduction of the model's memory footprint characteristic of mixed precision training.
+
+![Pascal_vs_Volta](images/1gpu_p_vs_v.png)
+![Pascal_vs_Volta](images/8gpu_p_vs_v.png)
+
+In the two figures above, it's displayed the difference in throughput for P100 and V100 GPUs when training V-Net in a single and multi-GPU setup. We do this for different batch sizes. 
+For both single and multi-GPU, training V-Net using mixed precision in Volta GPUs (V100) is at least 2x faster than in Pascal (P100). 
+
+![Pascal_vs_Volta](images/infer_p_vs_v.png)
+
+The figure above displays the difference in throughput for P100 and V100 GPUs when performing inference using V-Net. We do this for different batch sizes. 
+In general, V-Net inference when using mixed precision in Volta GPUs (V100) is at least 2x faster than in Pascal (P100). 
+
+The following section shows how to run benchmarks measuring the model performance in training and inference modes.
+
+#### Training performance benchmark
+
+To benchmark training, run the script `vnet_benchmark.py` in  the `./examples` directory.
+```
+usage: vnet_benchmark.py [-h] 
+                        --data_dir DATA_DIR 
+                        --model_dir MODEL_DIR 
+                        --mode train 
+                        --gpus {1, 8} 
+                        --batch_size BATCH_SIZE 
+                        OPTIONAL [--amp]
+```
+
+This script will by default run 200 warm-up iterations and benchmark the performance during training in the next 200 iterations.
+
+#### Inference performance benchmark
+
+To benchmark inference, run the script `vnet_benchmark.py` in  the `./examples` directory.
+```
+usage: vnet_benchmark.py [-h] 
+                        --data_dir DATA_DIR 
+                        --model_dir MODEL_DIR 
+                        --mode predict
+                        --gpus {1, 8} 
+                        --batch_size BATCH_SIZE 
+                        OPTIONAL [--amp]
+```
+
+This script will by default run 200 warm-up iterations and benchmark the performance  during inference in the next 200 iterations.
+
+### Results
+
+The following sections provide details on how we achieved our performance and accuracy in training and inference. 
+
+#### Training accuracy results
+
+Dataset is divided on training data (samples with ground truth) and test data (unlabelled). In order to obtain instant feedback on the quality of the model, test data is put aside and training and evaluation is performed on the original training set.
+
+For training, 90% of the traning data is used, while for validation it is used the remaining 10%, which is treated as validation data. This validation data remains unseen during training and it is used exclusively to calculate the final accuracy of the model.
+
+##### Training accuracy: NVIDIA DGX-1 (8x V100 16GB)
+
+Our results were obtained by running the `./examples/vnet_train_and_evaluate.py` script in the `nvcr.io/nvidia/tensorflow:20.06-tf1-py3` NGC container on NVIDIA DGX-1 with 8x V100 16GB GPUs.
+
+To train until convergence in FP32 using 1GPU, run:
+```
+python examples/vnet_train_and_evaluate.py --gpus 1 --batch_size 2 --base_lr 0.0001 --epochs 80 --data_dir ./data/Task04_Hippocampus/ --model_dir /tmp
+```
+To train until convergence in FP32 using 8GPU, run:
+```
+python examples/vnet_train_and_evaluate.py --gpus 8 --batch_size 2 --base_lr 0.0001 --epochs 320 --data_dir ./data/Task04_Hippocampus/ --model_dir /tmp
+```
+To train until convergence in FP16 using 1GPU, run:
+```
+python examples/vnet_train_and_evaluate.py --gpus 1 --batch_size 2 --base_lr 0.0001 --epochs 80 --data_dir ./data/Task04_Hippocampus/ --model_dir /tmp --amp
+```
+To train until convergence in FP16 using 8GPU, run:
+```
+python examples/vnet_train_and_evaluate.py --gpus 8 --batch_size 2 --base_lr 0.0001 --epochs 320 --data_dir ./data/Task04_Hippocampus/ --model_dir /tmp --amp
+```
+ 
+| GPUs    | Batch size / GPU    | Anterior dice - FP32  | Anterior dice - mixed precision  |   Time to train - FP32  |  Time to train - mixed precision | Time to train speedup (FP32 to mixed precision)|        
+|---------|-------|----------|----------|----------|---------|---------|
+|    1    |   2   |  0.8537  |  0.8533  | 11 min   | 11 min  |   1.0   |
+|    8    |   2   |  0.8409  |  0.8398  | 2 min    | 2 min   |   1.0   |
+
+To achieve these same results, follow the steps in the [Quick Start Guide](#quick-start-guide).
+
+
+### Training performance results
+
+##### Training performance: NVIDIA DGX-1 (8x V100 16GB)
+
+Our results were obtained by running the `./examples/vnet_benchmark.py` scripts in the `nvcr.io/nvidia/tensorflow:20.06-tf1-py3` NGC container on NVIDIA DGX-1 with 8x V100 16GB GPUs. Performance numbers (in images per second) were averaged over 200 iterations.
+
+For example:
+```
+python examples/vnet_benchmark.py --data_dir ./data/Task04_Hippocampus --model_dir /tmp --mode train --gpus {1,8} --batch_size {8,16,32} [--amp]
+```
+
+| GPUs | Batch size / GPU | Throughput - FP32 | Throughput - mixed precision | Throughput speedup (FP32 - mixed precision) | Weak scaling - FP32 | Weak scaling - mixed precision |       
+|---|----|---------|---------|------|------|------|
+| 1 |  2 |  117.82 |  114.11 | 0.97 |  N/A |  N/A |
+| 1 |  8 |  277.46 |  368.93 | 1.33 |  N/A |  N/A |
+| 1 | 16 |  339.56 |  427.02 | 1.26 |  N/A |  N/A |
+| 1 | 32 |  444.98 |  639.03 | 1.44 |  N/A |  N/A |
+| 8 |  2 |  584.23 |  497.05 | 0.85 | 4.96 | 4.36 |
+| 8 |  8 | 1783.44 | 1851.75 | 1.04 | 6.43 | 5.02 |
+| 8 | 16 | 2342.51 | 2821.20 | 1.20 | 6.90 | 6.61 |
+| 8 | 32 | 3189.86 | 4282.41 | 1.34 | 7.17 | 6.70 |
+
+
+To achieve these same results, follow the [Quick start guide](#quick-start-guide) outlined above.
+
+#### Inference performance results
+
+##### Inference performance: NVIDIA DGX-1 (1x V100 16GB)
+
+Our results were obtained by running the `./examples/vnet_benchmark.py` scripts in the `nvcr.io/nvidia/tensorflow:20.06-tf1-py3` NGC container on NVIDIA DGX-1 with 1x V100 16GB GPUs.
+
+For example:
+```
+python examples/vnet_benchmark.py --data_dir ./data/Task04_Hippocampus --model_dir /tmp --mode predict --gpus 1 --batch_size {8, 16, 32} [--amp]
+```
+
+FP16
+
+| Batch size | Sequence length | Throughput Avg | Latency Avg | Latency 90% |Latency 95% |Latency 99% |
+|----|------------|---------|-------|-------|-------|-------|
+|  8 | 32x32x32x1 | 1428.89 | 6.59  |  8.25	|  8.57 |  9.19 |
+| 16 | 32x32x32x1 | 2010.71 | 10.23 | 14.04	| 14.77 | 16.20 |
+| 32 | 32x32x32x1 | 3053.85 | 16.36 | 26.08	| 27.94 | 31.58 |
+
+FP32
+
+| Batch size | Sequence length | Throughput Avg | Latency Avg | Latency 90% | Latency 95% | Latency 99% |
+|----|------------|---------|-------|-------|-------|-------|
+|  8 | 32x32x32x1 | 1009.75 |  8.89 | 10.53 | 10.84 | 11.45 |
+| 16 | 32x32x32x1 | 1262.54 | 14.92 | 18.71 | 19.43 | 20.85 |
+| 32 | 32x32x32x1 | 1496.08 | 27.32 | 37.27 | 39.17 | 42.90 |
+
+To achieve these same results, follow the steps in the [Quick Start Guide](#quick-start-guide).
+
+## Release notes
+
+### Changelog
+
+June 2020
+
+* Updated training and inference accuracy
+* Updated training and inference performance
+
+November 2019
+* Initial release
+
+### Known issues
+
+
+There are no known issues in this release.
diff --git a/cv/semantic_segmentation/vnet/tensorflow/requirements.txt b/cv/semantic_segmentation/vnet/tensorflow/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..203a0837218d3240762bc374df22f1fedfbbed9d
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/requirements.txt
@@ -0,0 +1,4 @@
+cmake
+SimpleITK
+requests
+googledrivedownloader
diff --git a/cv/semantic_segmentation/vnet/tensorflow/run_get_hippocampus_data.sh b/cv/semantic_segmentation/vnet/tensorflow/run_get_hippocampus_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fb1ef7f54bb849be283b56171aacd43295948d37
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/run_get_hippocampus_data.sh
@@ -0,0 +1,5 @@
+# Download Hippocampus dataset
+DATA_DIR='data'
+if [ ! -d ${DATA_DIR} ]; then
+    tar -xf data.tgz
+fi
diff --git a/cv/semantic_segmentation/vnet/tensorflow/run_training.sh b/cv/semantic_segmentation/vnet/tensorflow/run_training.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d45deaec5087ca854484dc59d85b9c21c296fb87
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/run_training.sh
@@ -0,0 +1,38 @@
+# !/bin/bash
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+# if [[ ${ID} == "ubuntu" ]]; then
+#   echo ${ID}
+#   #sudo apt-get install libprotobuf-dev protobuf-compiler
+# elif [[ ${ID} == "centos" ]]; then
+#   echo ${ID}
+#   #yum install libprotobuf-dev protobuf-compiler
+# else
+#   echo "Unable to determine OS..."
+# fi
+# pip install scikit-build
+# pip install -r requirements.txt
+# HOROVOD_WITHOUT_PYTORCH=1 HOROVOD_WITH_TENSORFLOW=1 pip install horovod[tensorflow]
+
+: ${IX_NUM_CUDA_VISIBLE_DEVICES:=1}
+: ${HOROVOD_LAUNCH_CMD:="horovodrun -np ${IX_NUM_CUDA_VISIBLE_DEVICES} --gloo"}
+: ${BATCH_SIZE:=8}
+
+bash ./run_get_hippocampus_data.sh
+if [[ $? != 0 ]]; then
+  echo "ERROR: get hippocampus data failed"
+  exit 1
+fi
+
+LOG_DIR="logs"
+if [ ! -d "$LOG_DIR" ]; then
+  mkdir -p ${LOG_DIR}
+fi
+DATE=`date +%Y%m%d%H%M%S`
+
+${HOROVOD_LAUNCH_CMD} python3 examples/vnet_train_and_evaluate.py --gpus 1 --batch_size ${BATCH_SIZE} --base_lr 0.0001 --data_dir ./data/Task04_Hippocampus/ --model_dir ./model_train/ "$@"
+if [[ $? != 0 ]]; then
+  echo "ERROR: run vnet train and evaluate failed"
+  exit 1
+fi
+
+exit 0
diff --git a/cv/semantic_segmentation/vnet/tensorflow/utils/cmd_util.py b/cv/semantic_segmentation/vnet/tensorflow/utils/cmd_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b35e95f235ab6fc041bb547a3df24bbb58bbfec
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/utils/cmd_util.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+PARSER = argparse.ArgumentParser(description="VNet")
+
+PARSER.add_argument('--exec_mode',
+                    choices=['train', 'predict', 'train_and_predict', 'train_and_evaluate', 'evaluate'],
+                    required=True,
+                    type=str)
+
+PARSER.add_argument('--data_normalization',
+                    choices=['zscore'],
+                    default='zscore',
+                    type=str)
+
+PARSER.add_argument('--activation',
+                    choices=['relu'],
+                    default='relu',
+                    type=str)
+
+PARSER.add_argument('--resize_interpolator',
+                    choices=['linear'],
+                    default='linear',
+                    type=str)
+
+PARSER.add_argument('--loss',
+                    choices=['dice'],
+                    default='dice',
+                    type=str)
+
+PARSER.add_argument('--normalization_layer',
+                    choices=['batchnorm'],
+                    default='batchnorm',
+                    type=str)
+
+PARSER.add_argument('--pooling',
+                    choices=['conv_pool'],
+                    default='conv_pool',
+                    type=str)
+
+PARSER.add_argument('--upsampling',
+                    choices=['transposed_conv'],
+                    default='transposed_conv',
+                    type=str)
+
+PARSER.add_argument('--seed',
+                    default=0,
+                    type=int)
+
+PARSER.add_argument('--input_shape', nargs='+', type=int, default=[32, 32, 32])
+PARSER.add_argument('--upscale_blocks', nargs='+', type=int, default=[3, 3])
+PARSER.add_argument('--downscale_blocks', nargs='+', type=int, default=[3, 3, 3])
+
+PARSER.add_argument('--convolution_size',
+                    choices=[3, 5],
+                    default=3,
+                    type=int)
+
+PARSER.add_argument('--batch_size',
+                    required=True,
+                    type=int)
+
+PARSER.add_argument('--log_every',
+                    default=10,
+                    type=int)
+
+PARSER.add_argument('--warmup_steps',
+                    default=200,
+                    type=int)
+
+PARSER.add_argument('--train_epochs',
+                    default=1,
+                    type=int)
+
+PARSER.add_argument('--optimizer',
+                    choices=['rmsprop'],
+                    default='rmsprop',
+                    type=str)
+
+PARSER.add_argument('--gradient_clipping',
+                    choices=['global_norm'],
+                    default='global_norm',
+                    type=str)
+
+PARSER.add_argument('--base_lr',
+                    default=0.0001,
+                    type=float)
+
+PARSER.add_argument('--momentum',
+                    default=0.0,
+                    type=float)
+
+PARSER.add_argument('--train_split',
+                    default=1.0,
+                    type=float)
+
+PARSER.add_argument('--split_seed',
+                    default=0,
+                    type=int)
+
+PARSER.add_argument('--model_dir',
+                    required=True,
+                    type=str)
+
+PARSER.add_argument('--log_dir',
+                    default=None,
+                    type=str)
+
+PARSER.add_argument('--data_dir',
+                    required=True,
+                    type=str)
+
+PARSER.add_argument('--benchmark', dest='benchmark', action='store_true', default=False)
+PARSER.add_argument('--use_amp', '--amp', dest='use_amp', action='store_true', default=False)
+PARSER.add_argument('--use_xla', '--xla', dest='use_xla', action='store_true', default=False)
+PARSER.add_argument('--augment', dest='augment', action='store_true', default=False)
diff --git a/cv/semantic_segmentation/vnet/tensorflow/utils/data_loader.py b/cv/semantic_segmentation/vnet/tensorflow/utils/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..82d636dcdd12c1aa28d6f2cbb1283175eeb8b06a
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/utils/data_loader.py
@@ -0,0 +1,275 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import math
+import multiprocessing
+import os
+
+import SimpleITK as sitk
+import horovod.tensorflow as hvd
+import numpy as np
+import tensorflow as tf
+from scipy import stats
+
+
+def parse_nifti(path, dtype, dst_size, interpolator, normalization=None, modality=None):
+    sitk_image = load_image(path)
+    sitk_image = resize_image(sitk_image,
+                              dst_size=dst_size,
+                              interpolator=interpolator)
+
+    image = sitk_to_np(sitk_image)
+
+    if modality and 'CT' not in modality:
+        if normalization:
+            image = stats.zscore(image, axis=None)
+    elif modality:
+        raise NotImplementedError
+
+    return image
+
+
+def make_ref_image(img_path, dst_size, interpolator):
+    ref_image = load_image(img_path)
+
+    ref_image = resize_image(ref_image, dst_size=dst_size,
+                             interpolator=interpolator)
+    return sitk_to_np(ref_image) / np.max(ref_image) * 255
+
+
+def make_interpolator(interpolator):
+    if interpolator == 'linear':
+        return sitk.sitkLinear
+    else:
+        raise ValueError("Unknown interpolator type")
+
+
+def load_image(img_path):
+    image = sitk.ReadImage(img_path)
+
+    if image.GetDimension() == 4:
+        image = sitk.GetImageFromArray(sitk.GetArrayFromImage(image)[-1, :, :, :])
+
+    if image.GetPixelID() != sitk.sitkFloat32:
+        return sitk.Cast(image, sitk.sitkFloat32)
+
+    return image
+
+
+def sitk_to_np(sitk_img):
+    return np.transpose(sitk.GetArrayFromImage(sitk_img), [2, 1, 0])
+
+
+def resize_image(sitk_img,
+                 dst_size=(128, 128, 64),
+                 interpolator=sitk.sitkNearestNeighbor):
+    reference_image = sitk.Image(dst_size, sitk_img.GetPixelIDValue())
+    reference_image.SetOrigin(sitk_img.GetOrigin())
+    reference_image.SetDirection(sitk_img.GetDirection())
+    reference_image.SetSpacing(
+        [sz * spc / nsz for nsz, sz, spc in zip(dst_size, sitk_img.GetSize(), sitk_img.GetSpacing())])
+
+    return sitk.Resample(sitk_img, reference_image, sitk.Transform(3, sitk.sitkIdentity), interpolator)
+
+
+class MSDJsonParser:
+    def __init__(self, json_path):
+        with open(json_path) as f:
+            data = json.load(f)
+
+            self._labels = data.get('labels')
+            self._x_train = [os.path.join(os.path.dirname(json_path), p['image']) for p in data.get('training')]
+            self._y_train = [os.path.join(os.path.dirname(json_path), p['label']) for p in data.get('training')]
+            self._x_test = [os.path.join(os.path.dirname(json_path), p) for p in data.get('test')]
+            self._modality = [data.get('modality')[k] for k in data.get('modality').keys()]
+
+    @property
+    def labels(self):
+        return self._labels
+
+    @property
+    def x_train(self):
+        return self._x_train
+
+    @property
+    def y_train(self):
+        return self._y_train
+
+    @property
+    def x_test(self):
+        return self._x_test
+
+    @property
+    def modality(self):
+        return self._modality
+
+
+def make_split(json_parser, train_split, split_seed=0):
+    np.random.seed(split_seed)
+
+    train_size = int(len(json_parser.x_train) * train_split)
+
+    return np.array(json_parser.x_train)[:train_size], np.array(json_parser.y_train)[:train_size], \
+           np.array(json_parser.x_train)[train_size:], np.array(json_parser.y_train)[train_size:]
+
+
+class MSDDataset(object):
+    def __init__(self, json_path,
+                 dst_size=[128, 128, 64],
+                 seed=None,
+                 interpolator=None,
+                 data_normalization=None,
+                 batch_size=1,
+                 train_split=1.0,
+                 split_seed=0):
+        self._json_parser = MSDJsonParser(json_path)
+        self._interpolator = make_interpolator(interpolator)
+
+        self._ref_image = make_ref_image(img_path=self._json_parser.x_test[0],
+                                         dst_size=dst_size,
+                                         interpolator=self._interpolator)
+
+        np.random.seed(split_seed)
+
+        self._train_img, self._train_label, \
+        self._eval_img, self._eval_label = make_split(self._json_parser, train_split)
+        self._test_img = np.array(self._json_parser.x_test)
+
+        self._dst_size = dst_size
+
+        self._seed = seed
+        self._batch_size = batch_size
+        self._train_split = train_split
+        self._data_normalization = data_normalization
+
+        np.random.seed(self._seed)
+
+    @property
+    def labels(self):
+        return self._json_parser.labels
+
+    @property
+    def train_steps(self):
+        global_batch_size = hvd.size() * self._batch_size
+
+        return math.ceil(
+            len(self._train_img) / global_batch_size)
+
+    @property
+    def eval_steps(self):
+        return math.ceil(len(self._eval_img) / self._batch_size)
+
+    @property
+    def test_steps(self):
+        return math.ceil(len(self._test_img) / self._batch_size)
+
+    def _parse_image(self, img):
+        return parse_nifti(path=img,
+                           dst_size=self._dst_size,
+                           dtype=tf.float32,
+                           interpolator=self._interpolator,
+                           normalization=self._data_normalization,
+                           modality=self._json_parser.modality)
+
+    def _parse_label(self, label):
+        return parse_nifti(path=label,
+                           dst_size=self._dst_size,
+                           dtype=tf.int32,
+                           interpolator=sitk.sitkNearestNeighbor)
+
+    def _augment(self, x, y):
+        # Horizontal flip
+        h_flip = tf.random.uniform([]) > 0.5
+        x = tf.cond(pred=h_flip, true_fn=lambda: tf.image.flip_left_right(x), false_fn=lambda: x)
+        y = tf.cond(pred=h_flip, true_fn=lambda: tf.image.flip_left_right(y), false_fn=lambda: y)
+
+        # Vertical flip
+        v_flip = tf.random.uniform([]) > 0.5
+        x = tf.cond(pred=v_flip, true_fn=lambda: tf.image.flip_up_down(x), false_fn=lambda: x)
+        y = tf.cond(pred=v_flip, true_fn=lambda: tf.image.flip_up_down(y), false_fn=lambda: y)
+
+        return x, y
+
+    def _img_generator(self, collection):
+        for element in collection:
+            yield self._parse_image(element)
+
+    def _label_generator(self, collection):
+        for element in collection:
+            yield self._parse_label(element)
+
+    def train_fn(self, augment):
+        images = tf.data.Dataset.from_generator(generator=lambda: self._img_generator(self._train_img),
+                                                output_types=tf.float32,
+                                                output_shapes=(32, 32, 32))
+        labels = tf.data.Dataset.from_generator(generator=lambda: self._label_generator(self._train_label),
+                                                output_types=tf.int32,
+                                                output_shapes=(32, 32, 32))
+
+        dataset = tf.data.Dataset.zip((images, labels))
+
+        dataset = dataset.cache()
+
+        dataset = dataset.repeat()
+
+        dataset = dataset.shuffle(buffer_size=self._batch_size * 2,
+                                  reshuffle_each_iteration=True,
+                                  seed=self._seed)
+        dataset = dataset.shard(hvd.size(), hvd.rank())
+
+        if augment:
+            dataset = dataset.apply(
+                tf.data.experimental.map_and_batch(map_func=self._augment,
+                                                   batch_size=self._batch_size,
+                                                   drop_remainder=True,
+                                                   num_parallel_calls=multiprocessing.cpu_count()))
+        else:
+            dataset = dataset.batch(self._batch_size)
+
+        dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def eval_fn(self):
+        images = tf.data.Dataset.from_generator(generator=lambda: self._img_generator(self._eval_img),
+                                                output_types=tf.float32,
+                                                output_shapes=(32, 32, 32))
+        labels = tf.data.Dataset.from_generator(generator=lambda: self._label_generator(self._eval_label),
+                                                output_types=tf.int32,
+                                                output_shapes=(32, 32, 32))
+        dataset = tf.data.Dataset.zip((images, labels))
+
+        dataset = dataset.cache()
+
+        dataset = dataset.batch(self._batch_size, drop_remainder=True)
+
+        dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def test_fn(self, count=1):
+        dataset = tf.data.Dataset.from_generator(generator=lambda: self._img_generator(self._test_img),
+                                                 output_types=tf.float32,
+                                                 output_shapes=(32, 32, 32))
+
+        dataset = dataset.cache()
+
+        dataset = dataset.repeat(count=count)
+
+        dataset = dataset.batch(self._batch_size, drop_remainder=True)
+
+        dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
+
+        return dataset
diff --git a/cv/semantic_segmentation/vnet/tensorflow/utils/model_fn.py b/cv/semantic_segmentation/vnet/tensorflow/utils/model_fn.py
new file mode 100644
index 0000000000000000000000000000000000000000..45ec20163a6ee6a6130dd5373e363dca4858aa53
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/utils/model_fn.py
@@ -0,0 +1,140 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+import horovod.tensorflow as hvd
+import tensorflow as tf
+
+from model.vnet import Builder
+from utils.var_storage import model_variable_scope
+
+
+def dice_coef(predict, target, dice_type, axis=1, eps=1e-6):
+    intersection = tf.reduce_sum(input_tensor=predict * target, axis=axis)
+
+    if dice_type == 'sorensen':
+        union = tf.reduce_sum(input_tensor=predict + target, axis=axis)
+    else:
+        raise ValueError("dice_type must be either sorensen")
+
+    dice = (2 * intersection + eps) / (union + eps)
+    return tf.reduce_mean(input_tensor=dice, axis=0)  # average over batch
+
+
+def vnet_v2(features, labels, mode, params):
+    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+    is_eval = (mode == tf.estimator.ModeKeys.EVAL)
+    is_predict = (mode == tf.estimator.ModeKeys.PREDICT)
+    num_classes = len(params.labels)
+    channel_axis = -1
+
+    with model_variable_scope(
+            'vnet',
+            reuse=tf.compat.v1.AUTO_REUSE,
+            dtype=tf.float16,
+            debug_mode=False
+    ):
+        features = tf.reshape(features,
+                              [params.batch_size] + params.input_shape + [1])
+        if labels is not None:
+            labels = tf.reshape(labels,
+                                [params.batch_size] + params.input_shape + [1])
+
+        logits = Builder(kernel_size=params.convolution_size,
+                         n_classes=num_classes,
+                         downscale_blocks=params.downscale_blocks,
+                         upscale_blocks=params.upscale_blocks,
+                         upsampling=params.upsampling,
+                         pooling=params.pooling,
+                         normalization=params.normalization_layer,
+                         activation=params.activation,
+                         mode=mode)(features)
+
+        softmax = tf.nn.softmax(logits=logits, axis=channel_axis)
+
+        if is_predict:
+            prediction = tf.argmax(input=softmax, axis=channel_axis)
+            predictions = {'prediction': prediction}
+            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
+
+        # Flattened logits and softmax - in FP32
+        flattened_softmax = tf.reshape(softmax, [tf.shape(input=logits)[0], -1, num_classes])
+        flattened_softmax = tf.cast(flattened_softmax, tf.float32)
+
+        # One hot encoding
+        flattened_labels = tf.compat.v1.layers.flatten(labels)
+        one_hot_labels = tf.one_hot(indices=flattened_labels,
+                                    depth=num_classes,
+                                    dtype=tf.float32)
+
+        with tf.compat.v1.name_scope("loss"):
+            if params.loss == 'dice':
+                loss = dice_coef(predict=tf.cast(flattened_softmax, tf.float32),
+                                 target=one_hot_labels,
+                                 dice_type='sorensen')
+                total_loss = tf.identity(tf.reduce_sum(input_tensor=1. - loss),
+                                         name='total_loss_ref')
+            else:
+                raise NotImplementedError
+
+        train_op = None
+        if is_training:
+            global_step = tf.compat.v1.train.get_or_create_global_step()
+
+            with tf.compat.v1.name_scope("optimizer"):
+                if params.optimizer == 'rmsprop':
+                    optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=params.base_lr,
+                                                          momentum=params.momentum,
+                                                          centered=True)
+                else:
+                    raise NotImplementedError
+
+            update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
+            with tf.control_dependencies(update_ops):
+                gradients, variables = zip(*optimizer.compute_gradients(total_loss))
+                if params.gradient_clipping == 'global_norm':
+                    gradients, _ = tf.clip_by_global_norm(gradients, 1.0)
+                    tf.compat.v1.logging.info('clipping: global_norm')
+                else:
+                    return NotImplementedError
+
+                optimizer = hvd.DistributedOptimizer(optimizer)
+
+                try:
+                    amp_envar_enabled = (int(os.environ['TF_ENABLE_AUTO_MIXED_PRECISION']) == 1)
+                except KeyError:
+                    amp_envar_enabled = False
+
+                if params.use_amp and not amp_envar_enabled:
+                    optimizer = tf.compat.v1.train.experimental.enable_mixed_precision_graph_rewrite(
+                        optimizer,
+                        loss_scale='dynamic'
+                    )
+
+
+                train_op = optimizer.minimize(total_loss, global_step=global_step)
+
+        eval_metric_ops = None
+        if is_eval:
+            dice_loss = dice_coef(predict=tf.cast(flattened_softmax, tf.float32),
+                                  target=one_hot_labels,
+                                  dice_type='sorensen')
+            eval_loss = tf.identity(dice_loss, name='eval_loss_ref')
+            eval_metric_ops = {}
+            for i in range(num_classes):
+                eval_metric_ops['%s dice' % params.labels[str(i)]] = tf.compat.v1.metrics.mean(eval_loss[i])
+
+    return tf.estimator.EstimatorSpec(
+        mode=mode, loss=total_loss, train_op=train_op,
+        eval_metric_ops=eval_metric_ops)
diff --git a/cv/semantic_segmentation/vnet/tensorflow/utils/tf_export.py b/cv/semantic_segmentation/vnet/tensorflow/utils/tf_export.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e38d0d468b5a8e7c4c87f0005e108a1cc7de457
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/utils/tf_export.py
@@ -0,0 +1,286 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import glob
+import inspect
+import os
+import shutil
+import subprocess
+from argparse import Namespace
+from typing import List, Callable
+
+import tensorflow as tf
+from google.protobuf import text_format
+from tensorflow.core.framework import graph_pb2
+from tensorflow.python.compiler.tensorrt import trt_convert as trt
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import graph_io
+from tensorflow.python.platform import gfile
+from tensorflow.python.tools import optimize_for_inference_lib
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+
+def _compress(src_path: str, dst_path: str):
+    """
+    Compress source path into destination path
+
+    :param src_path: (str) Source path
+    :param dst_path: (str) Destination path
+    """
+    print('[*] Compressing...')
+    shutil.make_archive(dst_path, 'zip', src_path)
+    print('[*] Compressed the contents in: {}.zip'.format(dst_path))
+
+
+def _print_input(func: Callable):
+    """
+    Decorator printing function name and args
+    :param func: (Callable) Decorated function
+    :return: Wrapped call
+    """
+
+    def wrapper(*args, **kwargs):
+        """
+        Print the name and arguments of a function
+
+        :param args: Named arguments
+        :param kwargs: Keyword arguments
+        :return: Original function call
+        """
+        tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+        func_args = inspect.signature(func).bind(*args, **kwargs).arguments
+        func_args_str = ''.join('\t{} = {!r}\n'.format(*item) for item in func_args.items())
+
+        print('[*] Running \'{}\' with arguments:'.format(func.__qualname__))
+        print(func_args_str[:-1])
+
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
+def _parse_placeholder_types(values: str):
+    """
+    Extracts placeholder types from a comma separate list.
+
+    :param values: (str) Placeholder types
+    :return: (List) Placeholder types
+    """
+    values = [int(value) for value in values.split(",")]
+    return values if len(values) > 1 else values[0]
+
+
+def _optimize_checkpoint_for_inference(graph_path: str,
+                                       input_names: List[str],
+                                       output_names: List[str]):
+    """
+    Removes Horovod and training related information from the graph
+
+    :param graph_path: (str) Path to the graph.pbtxt file
+    :param input_names: (str) Input node names
+    :param output_names: (str) Output node names
+    """
+
+    print('[*] Optimizing graph for inference ...')
+
+    input_graph_def = graph_pb2.GraphDef()
+    with gfile.Open(graph_path, "rb") as f:
+        data = f.read()
+        text_format.Merge(data.decode("utf-8"), input_graph_def)
+
+    output_graph_def = optimize_for_inference_lib.optimize_for_inference(
+        input_graph_def,
+        input_names,
+        output_names,
+        _parse_placeholder_types(str(dtypes.float32.as_datatype_enum)),
+        False)
+
+    print('[*] Saving original graph in: {}'.format(graph_path + '.old'))
+    shutil.move(graph_path, graph_path + '.old')
+
+    print('[*] Writing down optimized graph ...')
+    graph_io.write_graph(output_graph_def,
+                         os.path.dirname(graph_path),
+                         os.path.basename(graph_path))
+
+
+@_print_input
+def to_savedmodel(input_shape: str,
+                  model_fn: Callable,
+                  checkpoint_dir: str,
+                  output_dir: str,
+                  input_names: List[str],
+                  output_names: List[str],
+                  use_amp: bool,
+                  use_xla: bool,
+                  compress: bool,
+                  params: Namespace):
+    """
+    Export checkpoint to Tensorflow savedModel
+
+    :param input_shape: (str) Input shape to the model in format [batch, height, width, channels]
+    :param model_fn: (Callable) Estimator's model_fn
+    :param checkpoint_dir: (str) Directory where checkpoints are stored
+    :param output_dir: (str) Output directory for storage of the generated savedModel
+    :param input_names: (List[str]) Input node names
+    :param output_names: (List[str]) Output node names
+    :param use_amp: (bool )Enable TF-AMP
+    :param use_xla: (bool) Enable XLA
+    :param compress: (bool) Compress output
+    :param params: (Namespace) Namespace to be passed to model_fn
+    """
+    assert os.path.exists(checkpoint_dir), 'Path not found: {}'.format(checkpoint_dir)
+    assert input_shape is not None, 'Input shape must be provided'
+
+    _optimize_checkpoint_for_inference(os.path.join(checkpoint_dir, 'graph.pbtxt'), input_names, output_names)
+
+    try:
+        ckpt_path = os.path.splitext([p for p in glob.iglob(os.path.join(checkpoint_dir, '*.index'))][0])[0]
+    except IndexError:
+        raise ValueError('Could not find checkpoint in directory: {}'.format(checkpoint_dir))
+
+    config_proto = tf.compat.v1.ConfigProto()
+
+    config_proto.allow_soft_placement = True
+    config_proto.log_device_placement = False
+    config_proto.gpu_options.allow_growth = True
+    config_proto.gpu_options.force_gpu_compatible = True
+
+    if use_amp:
+        os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"
+    if use_xla:
+        config_proto.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
+
+    run_config = tf.estimator.RunConfig(
+        model_dir=None,
+        tf_random_seed=None,
+        save_summary_steps=1e9,  # disabled
+        save_checkpoints_steps=None,
+        save_checkpoints_secs=None,
+        session_config=config_proto,
+        keep_checkpoint_max=None,
+        keep_checkpoint_every_n_hours=1e9,  # disabled
+        log_step_count_steps=1e9,
+        train_distribute=None,
+        device_fn=None,
+        protocol=None,
+        eval_distribute=None,
+        experimental_distribute=None
+    )
+
+    estimator = tf.estimator.Estimator(
+        model_fn=model_fn,
+        model_dir=ckpt_path,
+        config=run_config,
+        params=params
+    )
+
+    print('[*] Exporting the model ...')
+
+    input_type = tf.float16 if use_amp else tf.float32
+
+    def get_serving_input_receiver_fn():
+
+        def serving_input_receiver_fn():
+            features = tf.compat.v1.placeholder(dtype=input_type, shape=input_shape, name='input_tensor')
+
+            return tf.estimator.export.TensorServingInputReceiver(features=features, receiver_tensors=features)
+
+        return serving_input_receiver_fn
+
+    export_path = estimator.export_saved_model(
+        export_dir_base=output_dir,
+        serving_input_receiver_fn=get_serving_input_receiver_fn(),
+        checkpoint_path=ckpt_path
+    )
+
+    print('[*] Done! path: `%s`' % export_path.decode())
+
+    if compress:
+        _compress(export_path.decode(), os.path.join(output_dir, 'saved_model'))
+
+
+@_print_input
+def to_tf_trt(savedmodel_dir: str,
+              output_dir: str,
+              precision: str,
+              feed_dict_fn: Callable,
+              num_runs: int,
+              output_tensor_names: List[str],
+              compress: bool):
+    """
+    Export Tensorflow savedModel to TF-TRT
+
+    :param savedmodel_dir: (str) Input directory containing a Tensorflow savedModel
+    :param output_dir: (str) Output directory for storage of the generated TF-TRT exported model
+    :param precision: (str) Desired precision of the network (FP32, FP16 or INT8)
+    :param feed_dict_fn: (Callable) Input tensors for INT8 calibration. Model specific.
+    :param num_runs: (int) Number of calibration runs.
+    :param output_tensor_names: (List) Name of the output tensor for graph conversion. Model specific.
+    :param compress: (bool) Compress output
+    """
+    if savedmodel_dir is None or not os.path.exists(savedmodel_dir):
+        raise FileNotFoundError('savedmodel_dir not found: {}'.format(savedmodel_dir))
+
+    if os.path.exists(output_dir):
+        print('[*] Output dir \'{}\' is not empty. Cleaning up ...'.format(output_dir))
+        shutil.rmtree(output_dir)
+
+    print('[*] Converting model...')
+
+    converter = trt.TrtGraphConverter(input_saved_model_dir=savedmodel_dir,
+                                      precision_mode=precision)
+    converter.convert()
+
+    if precision == 'INT8':
+        print('[*] Running INT8 calibration ...')
+
+        converter.calibrate(fetch_names=output_tensor_names, num_runs=num_runs, feed_dict_fn=feed_dict_fn)
+
+    converter.save(output_dir)
+
+    print('[*] Done! TF-TRT saved_model stored in: `%s`' % output_dir)
+
+    if compress:
+        _compress('tftrt_saved_model', output_dir)
+
+
+@_print_input
+def to_onnx(input_dir: str, output_dir: str, compress: bool):
+    """
+    Convert Tensorflow savedModel to ONNX with tf2onnx
+
+    :param input_dir: (str) Input directory with a Tensorflow savedModel
+    :param output_dir: (str) Output directory where to store the ONNX version of the model
+    :param compress: (bool) Compress output
+    """
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    file_name = os.path.join(output_dir, 'model.onnx')
+    print('[*] Converting model...')
+
+    ret = subprocess.call(['python', '-m', 'tf2onnx.convert',
+                           '--saved-model', input_dir,
+                           '--output', file_name],
+                          stdout=open(os.devnull, 'w'),
+                          stderr=subprocess.STDOUT)
+    if ret > 0:
+        raise RuntimeError('tf2onnx.convert has failed with error: {}'.format(ret))
+
+    print('[*] Done! ONNX file stored in: %s' % file_name)
+
+    if compress:
+        _compress(output_dir, 'onnx_model')
diff --git a/cv/semantic_segmentation/vnet/tensorflow/utils/var_storage.py b/cv/semantic_segmentation/vnet/tensorflow/utils/var_storage.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f1560cd091d10c0151944ac5c1e51fd9f356771
--- /dev/null
+++ b/cv/semantic_segmentation/vnet/tensorflow/utils/var_storage.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# ==============================================================================
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import tensorflow as tf
+
+__all__ = ['model_variable_scope']
+
+
+def model_variable_scope(name, reuse=False, dtype=tf.float32, debug_mode=False, *args, **kwargs):
+    """Returns a variable scope that the model should be created under.
+    If self.dtype is a castable type, model variable will be created in fp32
+    then cast to self.dtype before being used.
+    Returns:
+      A variable scope for the model.
+    """
+
+    def _custom_dtype_getter(getter, name, shape=None, dtype=None, trainable=True, regularizer=None, *args, **kwargs):
+        """Creates variables in fp32, then casts to fp16 if necessary.
+        This function is a custom getter. A custom getter is a function with the
+        same signature as tf.get_variable, except it has an additional getter
+        parameter. Custom getters can be passed as the `custom_getter` parameter of
+        tf.variable_scope. Then, tf.get_variable will call the custom getter,
+        instead of directly getting a variable itself. This can be used to change
+        the types of variables that are retrieved with tf.get_variable.
+        The `getter` parameter is the underlying variable getter, that would have
+        been called if no custom getter was used. Custom getters typically get a
+        variable with `getter`, then modify it in some way.
+        This custom getter will create an fp32 variable. If a low precision
+        (e.g. float16) variable was requested it will then cast the variable to the
+        requested dtype. The reason we do not directly create variables in low
+        precision dtypes is that applying small gradients to such variables may
+        cause the variable not to change.
+        Args:
+          getter: The underlying variable getter, that has the same signature as
+            tf.get_variable and returns a variable.
+          name: The name of the variable to get.
+          shape: The shape of the variable to get.
+          *args: Additional arguments to pass unmodified to getter.
+          **kwargs: Additional keyword arguments to pass unmodified to getter.
+        Returns:
+          A variable which is cast to fp16 if necessary.
+        """
+
+        storage_dtype = tf.float32 if dtype in [tf.float32, tf.float16] else dtype
+
+        variable = getter(
+            name,
+            shape,
+            dtype=storage_dtype,
+            trainable=trainable,
+            regularizer=(
+                regularizer if
+                (trainable and not any(l_name.lower() in name.lower()
+                                       for l_name in ['batchnorm', 'batch_norm'])) else None
+            ),
+            *args,
+            **kwargs
+        )
+
+        if dtype != tf.float32:
+            cast_name = name + '/fp16_cast'
+
+            try:
+                cast_variable = tf.compat.v1.get_default_graph().get_tensor_by_name(cast_name + ':0')
+
+            except KeyError:
+                cast_variable = tf.cast(variable, dtype, name=cast_name)
+
+            cast_variable._ref = variable._ref
+            variable = cast_variable
+
+        return variable
+
+    return tf.compat.v1.variable_scope(name, reuse=reuse, dtype=dtype, custom_getter=_custom_dtype_getter, *args, **kwargs)