diff --git a/research/nlp/senta/src/data/field_reader/ernie_text_field_reader.py b/research/nlp/senta/src/data/field_reader/ernie_text_field_reader.py index 91c20a0114b5f34f25a7029c567604909ab3c890..faa24672b7baf99a7bbc0a961c3220cfe2932ef7 100644 --- a/research/nlp/senta/src/data/field_reader/ernie_text_field_reader.py +++ b/research/nlp/senta/src/data/field_reader/ernie_text_field_reader.py @@ -89,7 +89,7 @@ class ErnieTextFieldReader(): padded_ids, input_mask = pad_batch_data(src_ids, pad_idx=self.field_config.padding_id, return_input_mask=True, - return_seq_lens=True) + return_seq_lens=False) sent_ids_batch = pad_batch_data(sentence_ids, pad_idx=self.field_config.padding_id) pos_ids_batch = pad_batch_data(position_ids, pad_idx=self.field_config.padding_id) diff --git a/research/nlp/senta/src/data/tokenizer/tokenization_utils.py b/research/nlp/senta/src/data/tokenizer/tokenization_utils.py index 3c4ca7a859a878bfbe1d15f6f49636f684030c95..e07c91610a9d54aefa63ec1579b4ae70d6b84dac 100644 --- a/research/nlp/senta/src/data/tokenizer/tokenization_utils.py +++ b/research/nlp/senta/src/data/tokenizer/tokenization_utils.py @@ -136,7 +136,7 @@ class BpeEncoder(): j = word.index(first, i) new_word.extend(word[i:j]) i = j - except IOError: + except ValueError: new_word.extend(word[i:]) break