diff --git a/ais_bench/benchmark/utils/tokenizer.py b/ais_bench/benchmark/utils/tokenizer.py index e9cb76d65b2a21640eb20ba66c691568110d4b27..f039f5d8832a292217ec1394dccc23b65d1c2fe0 100644 --- a/ais_bench/benchmark/utils/tokenizer.py +++ b/ais_bench/benchmark/utils/tokenizer.py @@ -65,6 +65,9 @@ class MindformersTokenizer(Tokenizer): def decode(self, token_ids: list, skip_special_tokens=False) -> str: return self.tokenizer_model.decode(token_ids, skip_special_tokens=skip_special_tokens) + def batch_encode_plus(self, batch_text_or_text_pairs, *args, **kwargs): + return self.tokenizer_model.batch_encode_plus(batch_text_or_text_pairs, *args, **kwargs) + class BenchmarkTokenizer: def __init__(self, model_path: str, tokenizer_type: str = None, trust_remote_code: bool = False, **kwargs):