diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py index c32e2e67e4d99879ed5ec3c8aa33de04a75beeb2..74a83e90983d6d0286d716dbcf54e41dd42a5da1 100644 --- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py +++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py @@ -31,7 +31,7 @@ from gpu_environment import get_custom_getter from npu_bridge.estimator.npu_unary_ops import npu_unary_ops from npu_bridge.estimator import npu_ops -from npu_bridge.estimator.npu_aicore_ops import npu_aicore_ops +#from npu_bridge.estimator.npu_aicore_ops import npu_aicore_ops class BertConfig(object): """Configuration for `BertModel`.""" @@ -289,6 +289,7 @@ def gelu(x): if tf.flags.FLAGS.npu_bert_fused_gelu: if tf.flags.FLAGS.use_fast_gelu: + from npu_bridge.estimator.npu_aicore_ops import npu_aicore_ops return npu_aicore_ops.fast_gelu(x) else: return npu_unary_ops.gelu(x) @@ -388,6 +389,7 @@ def dropout(input_tensor, dropout_prob): if tf.flags.FLAGS.npu_bert_npu_dropout: output = npu_ops.dropout(input_tensor, 1.0 - dropout_prob) elif tf.flags.FLAGS.npu_bert_npu_dropout_v3: + from npu_bridge.estimator import npu_aicore_ops output = npu_aicore_ops.dropout_v3(input_tensor, 1.0 - dropout_prob) else: output = tf.nn.dropout(input_tensor, 1.0 - dropout_prob) diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_classifier.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_classifier.py index 113cecefc0ae67a15334c9f83938f3bc7625387f..8076c5db88c43a7e9bdf50da2e216efb71007757 100644 --- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_classifier.py +++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_classifier.py @@ -95,9 +95,9 @@ flags.DEFINE_integer("npu_bert_loss_scale", 0, flags.DEFINE_bool("npu_bert_clip_by_global_norm", False, "Use clip_by_global_norm if True, or use clip_by_norm for each gradient") -flags.DEFINE_bool('npu_bert_npu_dropout', True, 'Whether to use npu defined dropout op') +flags.DEFINE_bool('npu_bert_npu_dropout', False, 'Whether to use npu defined dropout op') -flags.DEFINE_bool('npu_bert_npu_dropout_v3', False, 'Whether to use npu defined dropout_v3 op') +flags.DEFINE_bool('npu_bert_npu_dropout_v3', True, 'Whether to use npu defined dropout_v3 op') flags.DEFINE_bool('npu_bert_tail_optimize', False, 'Whether to use npu allreduce tail optimization') @@ -249,7 +249,7 @@ def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, - compute_type=tf.float32) + compute_type=tf.float16 if FLAGS.precision_mode == "allow_mix_precision" else tf.float32) # In the demo, we are doing a simple classification task on the entire # segment. diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_squad.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_squad.py index 56815803e955ee20039632a1b328797c951b1890..0d21f03d2242e77081ff04244ff7352504b9ca0e 100644 --- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_squad.py +++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_squad.py @@ -194,9 +194,9 @@ def extract_run_squad_flags(): flags.DEFINE_bool("npu_bert_clip_by_global_norm", False, "Use clip_by_global_norm if True, or use clip_by_norm for each gradient") - flags.DEFINE_bool('npu_bert_npu_dropout', True, 'Whether to use npu defined dropout op') + flags.DEFINE_bool('npu_bert_npu_dropout', False, 'Whether to use npu defined dropout op') - flags.DEFINE_bool('npu_bert_npu_dropout_v3', False, 'Whether to use npu defined dropout_v3 op') + flags.DEFINE_bool('npu_bert_npu_dropout_v3', True, 'Whether to use npu defined dropout_v3 op') flags.DEFINE_bool('npu_bert_tail_optimize', False, 'Whether to use npu allreduce tail optimization') @@ -241,7 +241,7 @@ def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, - compute_type=tf.float32) + compute_type=tf.float16 if FLAGS.precision_mode == "allow_mix_precision" else tf.float32) final_hidden = model.get_sequence_output()