diff --git a/TensorFlow/built-in/cv/detection/SSD-VGG_ID1619_for_TensorFlow/train.py b/TensorFlow/built-in/cv/detection/SSD-VGG_ID1619_for_TensorFlow/train.py index 10fd58af0692b3eba416b77cedb4942a597cf492..5b13e6ffb96a5ea088ee637913d715f4e692baec 100644 --- a/TensorFlow/built-in/cv/detection/SSD-VGG_ID1619_for_TensorFlow/train.py +++ b/TensorFlow/built-in/cv/detection/SSD-VGG_ID1619_for_TensorFlow/train.py @@ -208,6 +208,7 @@ def main(): custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") custom_op.parameter_map["use_off_line"].b = True custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(args.precision_mode) + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") if args.over_dump: custom_op.parameter_map["enable_dump_debug"].b = True custom_op.parameter_map["dump_debug_mode"].s = tf.compat.as_bytes("all") diff --git a/TensorFlow/built-in/cv/image_classification/AlexNet_ID0259_for_TensorFlow/train.py b/TensorFlow/built-in/cv/image_classification/AlexNet_ID0259_for_TensorFlow/train.py index 0c5296de433f3985f6b1d8b9227bdcee857617f8..ed0db8696f3fd14c0e960cdf3754bc807d4c71af 100644 --- a/TensorFlow/built-in/cv/image_classification/AlexNet_ID0259_for_TensorFlow/train.py +++ b/TensorFlow/built-in/cv/image_classification/AlexNet_ID0259_for_TensorFlow/train.py @@ -135,6 +135,7 @@ class AlexNet: custom_op.parameter_map["use_off_line"].b = True # ������ʾ�������ڕN��AI������ִ��ѵ�� custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") custom_op.parameter_map["hcom_parallel"].b = True + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # ������ʾ�ر�remap bcast_op = broadcast_global_variables(0,1) sess = tf.Session(config=config) diff --git a/TensorFlow/built-in/cv/image_classification/EfficientNet-B4_ID0162_for_TensorFlow/main_npu.py b/TensorFlow/built-in/cv/image_classification/EfficientNet-B4_ID0162_for_TensorFlow/main_npu.py index df5bad95c11dd24fc24649d86dd680cc9f6ced4c..3f97d3f13c07f33ef1ff8811e48e0d7dbc344b85 100644 --- a/TensorFlow/built-in/cv/image_classification/EfficientNet-B4_ID0162_for_TensorFlow/main_npu.py +++ b/TensorFlow/built-in/cv/image_classification/EfficientNet-B4_ID0162_for_TensorFlow/main_npu.py @@ -714,7 +714,7 @@ def main(unused_argv): os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' config = NPURunConfig( - #precision_mode="allow_fp32_to_fp16", + precision_mode="allow_mix_precision", #enable_data_pre_proc=True, save_checkpoints_steps=FLAGS.num_train_images // (FLAGS.train_batch_size * int(os.getenv('RANK_SIZE'))), session_config=estimator_config, diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/Incetpion_V3.py b/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/Incetpion_V3.py index beda646d44858322b35eef7d9c9820b6c607eb4b..fc478bd1a162e30cae2a9cb15e3310e38a96ff8f 100644 --- a/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/Incetpion_V3.py +++ b/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/Incetpion_V3.py @@ -107,7 +107,7 @@ def run(): custom_op.name = "NpuOptimizer" custom_op.parameter_map["use_off_line"].b = True custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") - + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") config.graph_options.optimizer_options.global_jit_level = config_pb2.OptimizerOptions.OFF config.graph_options.rewrite_options.remapping = RewriterConfig.OFF if FLAGS.precision_mode == "allow_mix_precision": diff --git a/TensorFlow/built-in/cv/image_classification/MobileNetV1_ID0093_for_TensorFlow/train.py b/TensorFlow/built-in/cv/image_classification/MobileNetV1_ID0093_for_TensorFlow/train.py index acaf58d2821de5f26c7cefe00401d022355993a4..3f5394adbffc187b6d8a9053ca9031e57f579e70 100644 --- a/TensorFlow/built-in/cv/image_classification/MobileNetV1_ID0093_for_TensorFlow/train.py +++ b/TensorFlow/built-in/cv/image_classification/MobileNetV1_ID0093_for_TensorFlow/train.py @@ -127,6 +127,7 @@ def main(args): custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") custom_op.parameter_map["use_off_line"].b = True custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(args.precision_mode) + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") config_proto.graph_options.rewrite_options.remapping = RewriterConfig.OFF sess = tf.Session(config=npu_config_proto(config_proto=config_proto)) sess.run(tf.global_variables_initializer()) diff --git a/TensorFlow/built-in/cv/image_classification/MobileNetV3_ID0256_for_TensorFlow/train_cls-npu.py b/TensorFlow/built-in/cv/image_classification/MobileNetV3_ID0256_for_TensorFlow/train_cls-npu.py index dbb7fec0e01015b58f9f9142ceeb3d4c7df5e55f..e051a8f8cb6d2701d11bc5599a17949a8ec0b556 100644 --- a/TensorFlow/built-in/cv/image_classification/MobileNetV3_ID0256_for_TensorFlow/train_cls-npu.py +++ b/TensorFlow/built-in/cv/image_classification/MobileNetV3_ID0256_for_TensorFlow/train_cls-npu.py @@ -153,6 +153,7 @@ if __name__ == '__main__': custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" custom_op.parameter_map["use_off_line"].b = True + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF sess = tf.Session(config=sess_config) K.set_session(sess) diff --git a/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/vgg16/create_session.py b/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/vgg16/create_session.py index 3057d192b523b1dbd6e4df15f5d1b1eba372c4c0..e30135456a8eb201a9baa9b6ec09db8002f2d634 100644 --- a/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/vgg16/create_session.py +++ b/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/vgg16/create_session.py @@ -37,6 +37,7 @@ class CreateSession(): custom_op.name = "NpuOptimizer" custom_op.parameter_map["use_off_line"].b = True custom_op.parameter_map["hcom_parallel"].b = True + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") self.estimator_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF self.estimator_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF custom_op.parameter_map["graph_run_mode"].i = 0 diff --git a/TensorFlow/built-in/cv/image_classification/VGG19_for_TensorFlow/infer_from_pb.py b/TensorFlow/built-in/cv/image_classification/VGG19_for_TensorFlow/infer_from_pb.py index ff04e440b7af827cee821175f27f9c6f8027c11d..c1b2c82d269bcad6fe710b7afc1327fdbb93a565 100644 --- a/TensorFlow/built-in/cv/image_classification/VGG19_for_TensorFlow/infer_from_pb.py +++ b/TensorFlow/built-in/cv/image_classification/VGG19_for_TensorFlow/infer_from_pb.py @@ -79,6 +79,7 @@ class Classifier(object): # 4)set graph_run_mode=0,obtain better performance custom_op.parameter_map["graph_run_mode"].i = 0 + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") # -------------------------------------------------------------------------------- # load model, set graph input nodes and output nodes diff --git a/TensorFlow/built-in/recommendation/MMoE_Transformer_ID0390_for_TensorFlow/train/main.py b/TensorFlow/built-in/recommendation/MMoE_Transformer_ID0390_for_TensorFlow/train/main.py index ae177ccdfefe9a401771f41960ee669fbaa4faad..c15c1fe2b394d19f3f19a7a18b6af2084cbf5b39 100644 --- a/TensorFlow/built-in/recommendation/MMoE_Transformer_ID0390_for_TensorFlow/train/main.py +++ b/TensorFlow/built-in/recommendation/MMoE_Transformer_ID0390_for_TensorFlow/train/main.py @@ -255,6 +255,7 @@ if __name__ == '__main__': custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes('allow_mix_precision') custom_op.parameter_map["dynamic_input"].b = True custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") if iterations_per_loop_mode: custom_op.parameter_map["iterations_per_loop"].i = 10 sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF diff --git a/TensorFlow/contrib/cv/MobileFaceNet_Tensorflow/mobilefacenet-V2_ID0929_for_TensorFlow/train_nets.py b/TensorFlow/contrib/cv/MobileFaceNet_Tensorflow/mobilefacenet-V2_ID0929_for_TensorFlow/train_nets.py index 87c79246fc51ad94ab779a39dea1cca686729501..c82e1cea06398411601dc00e657acaa6a914a880 100644 --- a/TensorFlow/contrib/cv/MobileFaceNet_Tensorflow/mobilefacenet-V2_ID0929_for_TensorFlow/train_nets.py +++ b/TensorFlow/contrib/cv/MobileFaceNet_Tensorflow/mobilefacenet-V2_ID0929_for_TensorFlow/train_nets.py @@ -210,6 +210,7 @@ if __name__ == '__main__': custom_op.parameter_map["fusion_switch_file"].s = \ tf.compat.as_bytes("./fusion_switch.cfg") custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") config.graph_options.rewrite_options.remapping = RewriterConfig.OFF sess = tf.Session(config=config) sess.run(init) diff --git a/TensorFlow/contrib/cv/NOISE2NOISE_ID0800_for_TensorFlow/dnnlib/tflib/tfutil.py b/TensorFlow/contrib/cv/NOISE2NOISE_ID0800_for_TensorFlow/dnnlib/tflib/tfutil.py index 9f0c8cf74a20dcba4470e5231c81beae8018def2..63d2fef8a7faba34d6398b42d15c69f0b7d602da 100644 --- a/TensorFlow/contrib/cv/NOISE2NOISE_ID0800_for_TensorFlow/dnnlib/tflib/tfutil.py +++ b/TensorFlow/contrib/cv/NOISE2NOISE_ID0800_for_TensorFlow/dnnlib/tflib/tfutil.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # # This work is licensed under the Creative Commons Attribution-NonCommercial # 4.0 International License. To view a copy of this license, visit @@ -96,6 +96,7 @@ def create_session(config_dict: dict = None, force_as_default: bool = False, gra custom_op.parameter_map["mix_compile_mode"].b = config_dict["mix_compile_mode"] custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(config_dict["precision_mode"]) custom_op.parameter_map["op_select_implmode"].s = tf.compat.as_bytes(config_dict["op_select_implmode"]) + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") # 开启Allreduce和前后向并行执行 custom_op.parameter_map["hcom_parallel"].b = config_dict["hcom_parallel"] diff --git a/TensorFlow/contrib/cv/VDSR_ID2114_for_TensorFlow/VDSR.py b/TensorFlow/contrib/cv/VDSR_ID2114_for_TensorFlow/VDSR.py index 5c44dd5d05892122d2531e634f5d18ae384a7cd0..0e32f291cdc7f2a1ac49a1b0b6678e4a435a3054 100644 --- a/TensorFlow/contrib/cv/VDSR_ID2114_for_TensorFlow/VDSR.py +++ b/TensorFlow/contrib/cv/VDSR_ID2114_for_TensorFlow/VDSR.py @@ -185,6 +185,7 @@ if __name__ == '__main__': custom_op = config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # 必须显式关闭remap # custom_op.parameter_map["use_off_line"].b = True # config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF diff --git a/TensorFlow/contrib/cv/VGG19_ID0374_for_TensorFlow/vgg19/trainer.py b/TensorFlow/contrib/cv/VGG19_ID0374_for_TensorFlow/vgg19/trainer.py index 79895712d6b9751d870396cfd209d7e5d5f90c16..e7f57434165954d4cc2908176b633b56cfa7b4cd 100644 --- a/TensorFlow/contrib/cv/VGG19_ID0374_for_TensorFlow/vgg19/trainer.py +++ b/TensorFlow/contrib/cv/VGG19_ID0374_for_TensorFlow/vgg19/trainer.py @@ -41,6 +41,7 @@ class Trainer(object): run_config = NPURunConfig( auto_tune_mode=os.getenv("FLAG_AUTOTUNE"), + auto_tune_mode="RL,GA", hcom_parallel=True, precision_mode="allow_mix_precision", enable_data_pre_proc=True,