From c3abfb5954cbef12e526eb3ec042cd932418bf7d Mon Sep 17 00:00:00 2001 From: wdxwj Date: Fri, 5 Nov 2021 21:57:12 +0800 Subject: [PATCH] bug fix --- .../nlp/senta/src/data/field_reader/ernie_text_field_reader.py | 2 +- research/nlp/senta/src/data/tokenizer/tokenization_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/research/nlp/senta/src/data/field_reader/ernie_text_field_reader.py b/research/nlp/senta/src/data/field_reader/ernie_text_field_reader.py index 91c20a011..faa24672b 100644 --- a/research/nlp/senta/src/data/field_reader/ernie_text_field_reader.py +++ b/research/nlp/senta/src/data/field_reader/ernie_text_field_reader.py @@ -89,7 +89,7 @@ class ErnieTextFieldReader(): padded_ids, input_mask = pad_batch_data(src_ids, pad_idx=self.field_config.padding_id, return_input_mask=True, - return_seq_lens=True) + return_seq_lens=False) sent_ids_batch = pad_batch_data(sentence_ids, pad_idx=self.field_config.padding_id) pos_ids_batch = pad_batch_data(position_ids, pad_idx=self.field_config.padding_id) diff --git a/research/nlp/senta/src/data/tokenizer/tokenization_utils.py b/research/nlp/senta/src/data/tokenizer/tokenization_utils.py index 3c4ca7a85..e07c91610 100644 --- a/research/nlp/senta/src/data/tokenizer/tokenization_utils.py +++ b/research/nlp/senta/src/data/tokenizer/tokenization_utils.py @@ -136,7 +136,7 @@ class BpeEncoder(): j = word.index(first, i) new_word.extend(word[i:j]) i = j - except IOError: + except ValueError: new_word.extend(word[i:]) break -- Gitee