From 9c4f802b6b58f464270570cd630de56ef79fa906 Mon Sep 17 00:00:00 2001 From: Yige Xu Date: Fri, 6 Nov 2020 20:53:10 +0800 Subject: [PATCH 1/4] optimize BertEmbedding and RoBERTaEmbedding will early exit if layer != -1 --- fastNLP/embeddings/bert_embedding.py | 24 ++++++++++++++++++------ fastNLP/embeddings/roberta_embedding.py | 24 +++++++++++++++++------- fastNLP/modules/encoder/bert.py | 18 +++++++++++++++--- fastNLP/modules/encoder/roberta.py | 4 ++-- 4 files changed, 52 insertions(+), 18 deletions(-) diff --git a/fastNLP/embeddings/bert_embedding.py b/fastNLP/embeddings/bert_embedding.py index ec2ba26b..c57d2bef 100644 --- a/fastNLP/embeddings/bert_embedding.py +++ b/fastNLP/embeddings/bert_embedding.py @@ -93,7 +93,7 @@ class BertEmbedding(ContextualEmbedding): """ super(BertEmbedding, self).__init__(vocab, word_dropout=word_dropout, dropout=dropout) - if word_dropout>0: + if word_dropout > 0: assert vocab.unknown != None, "When word_drop>0, Vocabulary must contain the unknown token." if model_dir_or_name.lower() in PRETRAINED_BERT_MODEL_DIR: @@ -370,17 +370,29 @@ class _BertWordModel(nn.Module): include_cls_sep: bool = False, pooled_cls: bool = False, auto_truncate: bool = False, min_freq=2): super().__init__() - self.tokenzier = BertTokenizer.from_pretrained(model_dir_or_name) - self.encoder = BertModel.from_pretrained(model_dir_or_name) - self._max_position_embeddings = self.encoder.config.max_position_embeddings - # 检查encoder_layer_number是否合理 - encoder_layer_number = len(self.encoder.encoder.layer) if isinstance(layers, list): self.layers = [int(l) for l in layers] elif isinstance(layers, str): self.layers = list(map(int, layers.split(','))) else: raise TypeError("`layers` only supports str or list[int]") + assert len(self.layers) > 0, "There is no layer selected!" + + neg_num_output_layer = -16384 + pos_num_output_layer = 0 + for layer in self.layers: + if layer < 0: + neg_num_output_layer = max(layer, neg_num_output_layer) + else: + pos_num_output_layer = max(layer, pos_num_output_layer) + + self.tokenzier = BertTokenizer.from_pretrained(model_dir_or_name) + self.encoder = BertModel.from_pretrained(model_dir_or_name, + neg_num_output_layer=neg_num_output_layer, + pos_num_output_layer=pos_num_output_layer) + self._max_position_embeddings = self.encoder.config.max_position_embeddings + # 检查encoder_layer_number是否合理 + encoder_layer_number = len(self.encoder.encoder.layer) for layer in self.layers: if layer < 0: assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \ diff --git a/fastNLP/embeddings/roberta_embedding.py b/fastNLP/embeddings/roberta_embedding.py index 90ea1085..ec95abe2 100644 --- a/fastNLP/embeddings/roberta_embedding.py +++ b/fastNLP/embeddings/roberta_embedding.py @@ -196,20 +196,30 @@ class _RobertaWordModel(nn.Module): include_cls_sep: bool = False, pooled_cls: bool = False, auto_truncate: bool = False, min_freq=2): super().__init__() - self.tokenizer = RobertaTokenizer.from_pretrained(model_dir_or_name) - self.encoder = RobertaModel.from_pretrained(model_dir_or_name) - # 由于RobertaEmbedding中设置了padding_idx为1, 且使用了非常神奇的position计算方式,所以-2 - self._max_position_embeddings = self.encoder.config.max_position_embeddings - 2 - # 检查encoder_layer_number是否合理 - encoder_layer_number = len(self.encoder.encoder.layer) - if isinstance(layers, list): self.layers = [int(l) for l in layers] elif isinstance(layers, str): self.layers = list(map(int, layers.split(','))) else: raise TypeError("`layers` only supports str or list[int]") + assert len(self.layers) > 0, "There is no layer selected!" + + neg_num_output_layer = -16384 + pos_num_output_layer = 0 + for layer in self.layers: + if layer < 0: + neg_num_output_layer = max(layer, neg_num_output_layer) + else: + pos_num_output_layer = max(layer, pos_num_output_layer) + self.tokenizer = RobertaTokenizer.from_pretrained(model_dir_or_name) + self.encoder = RobertaModel.from_pretrained(model_dir_or_name, + neg_num_output_layer=neg_num_output_layer, + pos_num_output_layer=pos_num_output_layer) + # 由于RobertaEmbedding中设置了padding_idx为1, 且使用了非常神奇的position计算方式,所以-2 + self._max_position_embeddings = self.encoder.config.max_position_embeddings - 2 + # 检查encoder_layer_number是否合理 + encoder_layer_number = len(self.encoder.encoder.layer) for layer in self.layers: if layer < 0: assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \ diff --git a/fastNLP/modules/encoder/bert.py b/fastNLP/modules/encoder/bert.py index 7a9ba57e..8d5d576e 100644 --- a/fastNLP/modules/encoder/bert.py +++ b/fastNLP/modules/encoder/bert.py @@ -366,19 +366,28 @@ class BertLayer(nn.Module): class BertEncoder(nn.Module): - def __init__(self, config): + def __init__(self, config, num_output_layer=-1): super(BertEncoder, self).__init__() layer = BertLayer(config) self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) + num_output_layer = num_output_layer if num_output_layer >= 0 else (len(self.layer) + num_output_layer) + self.num_output_layer = max(min(num_output_layer, len(self.layer)), 0) + if self.num_output_layer + 1 < len(self.layer): + logger.info(f'The transformer encoder will early exit after layer-{self.num_output_layer} ' + f'(start from 0)!') def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): all_encoder_layers = [] - for layer_module in self.layer: + for idx, layer_module in enumerate(self.layer): + if idx > self.num_output_layer: + break hidden_states = layer_module(hidden_states, attention_mask) if output_all_encoded_layers: all_encoder_layers.append(hidden_states) if not output_all_encoded_layers: all_encoder_layers.append(hidden_states) + if len(all_encoder_layers) == 0: + all_encoder_layers.append(hidden_states) return all_encoder_layers @@ -435,6 +444,9 @@ class BertModel(nn.Module): self.config = config self.hidden_size = self.config.hidden_size self.model_type = 'bert' + neg_num_output_layer = kwargs.get('neg_num_output_layer', -1) + pos_num_output_layer = kwargs.get('pos_num_output_layer', self.config.num_hidden_layers - 1) + self.num_output_layer = max(neg_num_output_layer + self.config.num_hidden_layers, pos_num_output_layer) if hasattr(config, 'sinusoidal_pos_embds'): self.model_type = 'distilbert' elif 'model_type' in kwargs: @@ -445,7 +457,7 @@ class BertModel(nn.Module): else: self.embeddings = BertEmbeddings(config) - self.encoder = BertEncoder(config) + self.encoder = BertEncoder(config, num_output_layer=self.num_output_layer) if self.model_type != 'distilbert': self.pooler = BertPooler(config) else: diff --git a/fastNLP/modules/encoder/roberta.py b/fastNLP/modules/encoder/roberta.py index da0ab537..10bdb64b 100644 --- a/fastNLP/modules/encoder/roberta.py +++ b/fastNLP/modules/encoder/roberta.py @@ -64,8 +64,8 @@ class RobertaModel(BertModel): undocumented """ - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *inputs, **kwargs): + super().__init__(config, *inputs, **kwargs) self.embeddings = RobertaEmbeddings(config) self.apply(self.init_bert_weights) -- Gitee From 9b7562a13aa626ab4891126034bf78c326a1207a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=90=91=E9=98=B3?= <760605341@qq.com> Date: Wed, 18 Nov 2020 12:10:09 +0800 Subject: [PATCH 2/4] update fastNLP/modules/encoder/seq2seq_encoder.py. --- fastNLP/modules/encoder/seq2seq_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastNLP/modules/encoder/seq2seq_encoder.py b/fastNLP/modules/encoder/seq2seq_encoder.py index d280582a..5eae1e6d 100644 --- a/fastNLP/modules/encoder/seq2seq_encoder.py +++ b/fastNLP/modules/encoder/seq2seq_encoder.py @@ -132,7 +132,7 @@ class TransformerSeq2SeqEncoder(Seq2SeqEncoder): x = self.input_fc(x) x = F.dropout(x, p=self.dropout, training=self.training) - encoder_mask = seq_len_to_mask(seq_len) + encoder_mask = seq_len_to_mask(seq_len, max_len=max_src_len) encoder_mask = encoder_mask.to(device) for layer in self.layer_stacks: -- Gitee From fc8f8f69134bc8e52f4a4404e32903ccb55991ac Mon Sep 17 00:00:00 2001 From: willqvq Date: Mon, 23 Nov 2020 13:00:31 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E6=8A=8A=E6=B5=8B=E8=AF=95=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=A4=B9=20test=20=E6=94=B9=E5=90=8D=E4=B8=BA=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .Jenkinsfile | 2 +- .travis.yml | 2 +- MANIFEST.in | 2 +- {test => tests}/__init__.py | 0 {test => tests}/core/__init__.py | 0 {test => tests}/core/test_batch.py | 0 {test => tests}/core/test_callbacks.py | 0 {test => tests}/core/test_dataset.py | 0 {test => tests}/core/test_dist_trainer.py | 0 {test => tests}/core/test_field.py | 0 {test => tests}/core/test_instance.py | 0 {test => tests}/core/test_logger.py | 0 {test => tests}/core/test_loss.py | 0 {test => tests}/core/test_metrics.py | 0 {test => tests}/core/test_optimizer.py | 0 {test => tests}/core/test_predictor.py | 0 {test => tests}/core/test_sampler.py | 0 {test => tests}/core/test_tester.py | 0 {test => tests}/core/test_trainer.py | 0 {test => tests}/core/test_utils.py | 0 {test => tests}/core/test_vocabulary.py | 0 {test => tests}/data_for_tests/config | 0 .../data_for_tests/conll_2003_example.txt | 0 {test => tests}/data_for_tests/conll_example.txt | 0 {test => tests}/data_for_tests/cws_pku_utf_8 | 0 {test => tests}/data_for_tests/cws_test | 0 {test => tests}/data_for_tests/cws_train | 0 .../data_for_tests/embedding/small_bert/config.json | 0 .../embedding/small_bert/small_pytorch_model.bin | Bin .../data_for_tests/embedding/small_bert/vocab.txt | 0 .../data_for_tests/embedding/small_elmo/char.dic | 0 .../elmo_1x16_16_32cnn_1xhighway_options.json | 0 .../embedding/small_elmo/elmo_mini_for_testing.pkl | Bin .../data_for_tests/embedding/small_gpt2/config.json | 0 .../data_for_tests/embedding/small_gpt2/merges.txt | 0 .../embedding/small_gpt2/small_pytorch_model.bin | Bin .../data_for_tests/embedding/small_gpt2/vocab.json | 0 .../embedding/small_roberta/config.json | 0 .../embedding/small_roberta/merges.txt | 0 .../embedding/small_roberta/small_pytorch_model.bin | Bin .../embedding/small_roberta/vocab.json | 0 .../small_static_embedding/glove.6B.50d_test.txt | 0 .../small_static_embedding/word2vec_test.txt | 0 {test => tests}/data_for_tests/io/BQCorpus/dev.txt | 0 {test => tests}/data_for_tests/io/BQCorpus/test.txt | 0 .../data_for_tests/io/BQCorpus/train.txt | 0 .../data_for_tests/io/ChnSentiCorp/dev.txt | 0 .../data_for_tests/io/ChnSentiCorp/test.txt | 0 .../data_for_tests/io/ChnSentiCorp/train.txt | 0 {test => tests}/data_for_tests/io/LCQMC/dev.txt | 0 {test => tests}/data_for_tests/io/LCQMC/test.txt | 0 {test => tests}/data_for_tests/io/LCQMC/train.txt | 0 .../data_for_tests/io/MNLI/dev_matched.tsv | 0 .../data_for_tests/io/MNLI/dev_mismatched.tsv | 0 .../data_for_tests/io/MNLI/test_matched.tsv | 0 .../data_for_tests/io/MNLI/test_mismatched.tsv | 0 {test => tests}/data_for_tests/io/MNLI/train.tsv | 0 .../data_for_tests/io/MSRA_NER/dev.conll | 0 .../data_for_tests/io/MSRA_NER/test.conll | 0 .../data_for_tests/io/MSRA_NER/train.conll | 0 {test => tests}/data_for_tests/io/OntoNotes/dev.txt | 0 .../data_for_tests/io/OntoNotes/test.txt | 0 .../data_for_tests/io/OntoNotes/train.txt | 0 {test => tests}/data_for_tests/io/QNLI/dev.tsv | 0 {test => tests}/data_for_tests/io/QNLI/test.tsv | 0 {test => tests}/data_for_tests/io/QNLI/train.tsv | 0 {test => tests}/data_for_tests/io/Quora/dev.tsv | 0 {test => tests}/data_for_tests/io/Quora/test.tsv | 0 {test => tests}/data_for_tests/io/Quora/train.tsv | 0 {test => tests}/data_for_tests/io/RTE/dev.tsv | 0 {test => tests}/data_for_tests/io/RTE/test.tsv | 0 {test => tests}/data_for_tests/io/RTE/train.tsv | 0 .../data_for_tests/io/SNLI/snli_1.0_dev.jsonl | 0 .../data_for_tests/io/SNLI/snli_1.0_test.jsonl | 0 .../data_for_tests/io/SNLI/snli_1.0_train.jsonl | 0 {test => tests}/data_for_tests/io/SST-2/dev.tsv | 0 {test => tests}/data_for_tests/io/SST-2/test.tsv | 0 {test => tests}/data_for_tests/io/SST-2/train.tsv | 0 {test => tests}/data_for_tests/io/SST/dev.txt | 0 {test => tests}/data_for_tests/io/SST/test.txt | 0 {test => tests}/data_for_tests/io/SST/train.txt | 0 {test => tests}/data_for_tests/io/THUCNews/dev.txt | 0 {test => tests}/data_for_tests/io/THUCNews/test.txt | 0 .../data_for_tests/io/THUCNews/train.txt | 0 .../data_for_tests/io/WeiboSenti100k/dev.txt | 0 .../data_for_tests/io/WeiboSenti100k/test.txt | 0 .../data_for_tests/io/WeiboSenti100k/train.txt | 0 {test => tests}/data_for_tests/io/XNLI/dev.txt | 0 {test => tests}/data_for_tests/io/XNLI/test.txt | 0 {test => tests}/data_for_tests/io/XNLI/train.txt | 0 {test => tests}/data_for_tests/io/ag/test.csv | 0 {test => tests}/data_for_tests/io/ag/train.csv | 0 {test => tests}/data_for_tests/io/cmrc/dev.json | 0 {test => tests}/data_for_tests/io/cmrc/train.json | 0 .../data_for_tests/io/cnndm/dev.label.jsonl | 0 .../data_for_tests/io/cnndm/test.label.jsonl | 0 .../data_for_tests/io/cnndm/train.cnndm.jsonl | 0 {test => tests}/data_for_tests/io/cnndm/vocab | 0 {test => tests}/data_for_tests/io/conll2003/dev.txt | 0 .../data_for_tests/io/conll2003/test.txt | 0 .../data_for_tests/io/conll2003/train.txt | 0 .../io/coreference/coreference_dev.json | 0 .../io/coreference/coreference_test.json | 0 .../io/coreference/coreference_train.json | 0 {test => tests}/data_for_tests/io/cws_as/dev.txt | 0 {test => tests}/data_for_tests/io/cws_as/test.txt | 0 {test => tests}/data_for_tests/io/cws_as/train.txt | 0 {test => tests}/data_for_tests/io/cws_cityu/dev.txt | 0 .../data_for_tests/io/cws_cityu/test.txt | 0 .../data_for_tests/io/cws_cityu/train.txt | 0 {test => tests}/data_for_tests/io/cws_msra/dev.txt | 0 {test => tests}/data_for_tests/io/cws_msra/test.txt | 0 .../data_for_tests/io/cws_msra/train.txt | 0 {test => tests}/data_for_tests/io/cws_pku/dev.txt | 0 {test => tests}/data_for_tests/io/cws_pku/test.txt | 0 {test => tests}/data_for_tests/io/cws_pku/train.txt | 0 {test => tests}/data_for_tests/io/dbpedia/test.csv | 0 {test => tests}/data_for_tests/io/dbpedia/train.csv | 0 {test => tests}/data_for_tests/io/imdb/dev.txt | 0 {test => tests}/data_for_tests/io/imdb/test.txt | 0 {test => tests}/data_for_tests/io/imdb/train.txt | 0 .../data_for_tests/io/peopledaily/dev.txt | 0 .../data_for_tests/io/peopledaily/test.txt | 0 .../data_for_tests/io/peopledaily/train.txt | 0 .../data_for_tests/io/weibo_NER/dev.conll | 0 .../data_for_tests/io/weibo_NER/test.conll | 0 .../data_for_tests/io/weibo_NER/train.conll | 0 .../data_for_tests/io/yelp_review_full/dev.csv | 0 .../data_for_tests/io/yelp_review_full/test.csv | 0 .../data_for_tests/io/yelp_review_full/train.csv | 0 .../data_for_tests/io/yelp_review_polarity/dev.csv | 0 .../data_for_tests/io/yelp_review_polarity/test.csv | 0 .../io/yelp_review_polarity/train.csv | 0 .../data_for_tests/modules/decoder/crf.json | 0 {test => tests}/data_for_tests/people.txt | 0 {test => tests}/data_for_tests/people_daily_raw.txt | 0 {test => tests}/data_for_tests/sample_mnli.tsv | 0 {test => tests}/data_for_tests/sample_snli.jsonl | 0 {test => tests}/data_for_tests/text_classify.txt | 0 .../data_for_tests/tutorial_sample_dataset.csv | 0 {test => tests}/data_for_tests/zh_sample.conllx | 0 {test => tests}/embeddings/__init__.py | 0 {test => tests}/embeddings/test_bert_embedding.py | 0 {test => tests}/embeddings/test_char_embedding.py | 0 {test => tests}/embeddings/test_elmo_embedding.py | 0 {test => tests}/embeddings/test_gpt2_embedding.py | 0 .../embeddings/test_roberta_embedding.py | 0 {test => tests}/embeddings/test_stack_embeddings.py | 0 {test => tests}/embeddings/test_static_embedding.py | 0 .../embeddings/test_transformer_embedding.py | 0 {test => tests}/io/__init__.py | 0 .../io/loader/test_classification_loader.py | 0 {test => tests}/io/loader/test_conll_loader.py | 0 .../io/loader/test_coreference_loader.py | 0 {test => tests}/io/loader/test_cws_loader.py | 0 {test => tests}/io/loader/test_matching_loader.py | 0 {test => tests}/io/loader/test_qa_loader.py | 0 {test => tests}/io/pipe/test_classification.py | 0 {test => tests}/io/pipe/test_conll.py | 0 {test => tests}/io/pipe/test_coreference.py | 0 {test => tests}/io/pipe/test_cws.py | 0 {test => tests}/io/pipe/test_matching.py | 0 {test => tests}/io/pipe/test_qa.py | 0 {test => tests}/io/pipe/test_summary.py | 0 {test => tests}/io/test_embed_loader.py | 0 {test => tests}/io/test_model_io.py | 0 {test => tests}/models/__init__.py | 0 {test => tests}/models/model_runner.py | 0 {test => tests}/models/test_bert.py | 0 {test => tests}/models/test_biaffine_parser.py | 0 .../models/test_cnn_text_classification.py | 0 {test => tests}/models/test_seq2seq_generator.py | 0 {test => tests}/models/test_seq2seq_model.py | 0 {test => tests}/models/test_sequence_labeling.py | 0 {test => tests}/models/test_snli.py | 0 {test => tests}/models/test_star_trans.py | 0 {test => tests}/modules/__init__.py | 0 {test => tests}/modules/decoder/__init__.py | 0 {test => tests}/modules/decoder/test_CRF.py | 0 {test => tests}/modules/decoder/test_bert.py | 0 .../modules/decoder/test_seq2seq_decoder.py | 0 {test => tests}/modules/encoder/__init__.py | 0 {test => tests}/modules/encoder/test_pooling.py | 0 .../modules/encoder/test_seq2seq_encoder.py | 0 {test => tests}/modules/generator/__init__.py | 0 .../modules/generator/test_seq2seq_generator.py | 0 {test => tests}/modules/test_char_encoder.py | 0 {test => tests}/modules/test_other_modules.py | 0 {test => tests}/modules/test_utils.py | 0 {test => tests}/modules/test_variational_rnn.py | 0 .../modules/tokenizer/test_bert_tokenizer.py | 0 {test => tests}/test_tutorials.py | 0 192 files changed, 3 insertions(+), 3 deletions(-) rename {test => tests}/__init__.py (100%) rename {test => tests}/core/__init__.py (100%) rename {test => tests}/core/test_batch.py (100%) rename {test => tests}/core/test_callbacks.py (100%) rename {test => tests}/core/test_dataset.py (100%) rename {test => tests}/core/test_dist_trainer.py (100%) rename {test => tests}/core/test_field.py (100%) rename {test => tests}/core/test_instance.py (100%) rename {test => tests}/core/test_logger.py (100%) rename {test => tests}/core/test_loss.py (100%) rename {test => tests}/core/test_metrics.py (100%) rename {test => tests}/core/test_optimizer.py (100%) rename {test => tests}/core/test_predictor.py (100%) rename {test => tests}/core/test_sampler.py (100%) rename {test => tests}/core/test_tester.py (100%) rename {test => tests}/core/test_trainer.py (100%) rename {test => tests}/core/test_utils.py (100%) rename {test => tests}/core/test_vocabulary.py (100%) rename {test => tests}/data_for_tests/config (100%) rename {test => tests}/data_for_tests/conll_2003_example.txt (100%) rename {test => tests}/data_for_tests/conll_example.txt (100%) rename {test => tests}/data_for_tests/cws_pku_utf_8 (100%) rename {test => tests}/data_for_tests/cws_test (100%) rename {test => tests}/data_for_tests/cws_train (100%) rename {test => tests}/data_for_tests/embedding/small_bert/config.json (100%) rename {test => tests}/data_for_tests/embedding/small_bert/small_pytorch_model.bin (100%) rename {test => tests}/data_for_tests/embedding/small_bert/vocab.txt (100%) rename {test => tests}/data_for_tests/embedding/small_elmo/char.dic (100%) rename {test => tests}/data_for_tests/embedding/small_elmo/elmo_1x16_16_32cnn_1xhighway_options.json (100%) rename {test => tests}/data_for_tests/embedding/small_elmo/elmo_mini_for_testing.pkl (100%) rename {test => tests}/data_for_tests/embedding/small_gpt2/config.json (100%) rename {test => tests}/data_for_tests/embedding/small_gpt2/merges.txt (100%) rename {test => tests}/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin (100%) rename {test => tests}/data_for_tests/embedding/small_gpt2/vocab.json (100%) rename {test => tests}/data_for_tests/embedding/small_roberta/config.json (100%) rename {test => tests}/data_for_tests/embedding/small_roberta/merges.txt (100%) rename {test => tests}/data_for_tests/embedding/small_roberta/small_pytorch_model.bin (100%) rename {test => tests}/data_for_tests/embedding/small_roberta/vocab.json (100%) rename {test => tests}/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt (100%) rename {test => tests}/data_for_tests/embedding/small_static_embedding/word2vec_test.txt (100%) rename {test => tests}/data_for_tests/io/BQCorpus/dev.txt (100%) rename {test => tests}/data_for_tests/io/BQCorpus/test.txt (100%) rename {test => tests}/data_for_tests/io/BQCorpus/train.txt (100%) rename {test => tests}/data_for_tests/io/ChnSentiCorp/dev.txt (100%) rename {test => tests}/data_for_tests/io/ChnSentiCorp/test.txt (100%) rename {test => tests}/data_for_tests/io/ChnSentiCorp/train.txt (100%) rename {test => tests}/data_for_tests/io/LCQMC/dev.txt (100%) rename {test => tests}/data_for_tests/io/LCQMC/test.txt (100%) rename {test => tests}/data_for_tests/io/LCQMC/train.txt (100%) rename {test => tests}/data_for_tests/io/MNLI/dev_matched.tsv (100%) rename {test => tests}/data_for_tests/io/MNLI/dev_mismatched.tsv (100%) rename {test => tests}/data_for_tests/io/MNLI/test_matched.tsv (100%) rename {test => tests}/data_for_tests/io/MNLI/test_mismatched.tsv (100%) rename {test => tests}/data_for_tests/io/MNLI/train.tsv (100%) rename {test => tests}/data_for_tests/io/MSRA_NER/dev.conll (100%) rename {test => tests}/data_for_tests/io/MSRA_NER/test.conll (100%) rename {test => tests}/data_for_tests/io/MSRA_NER/train.conll (100%) rename {test => tests}/data_for_tests/io/OntoNotes/dev.txt (100%) rename {test => tests}/data_for_tests/io/OntoNotes/test.txt (100%) rename {test => tests}/data_for_tests/io/OntoNotes/train.txt (100%) rename {test => tests}/data_for_tests/io/QNLI/dev.tsv (100%) rename {test => tests}/data_for_tests/io/QNLI/test.tsv (100%) rename {test => tests}/data_for_tests/io/QNLI/train.tsv (100%) rename {test => tests}/data_for_tests/io/Quora/dev.tsv (100%) rename {test => tests}/data_for_tests/io/Quora/test.tsv (100%) rename {test => tests}/data_for_tests/io/Quora/train.tsv (100%) rename {test => tests}/data_for_tests/io/RTE/dev.tsv (100%) rename {test => tests}/data_for_tests/io/RTE/test.tsv (100%) rename {test => tests}/data_for_tests/io/RTE/train.tsv (100%) rename {test => tests}/data_for_tests/io/SNLI/snli_1.0_dev.jsonl (100%) rename {test => tests}/data_for_tests/io/SNLI/snli_1.0_test.jsonl (100%) rename {test => tests}/data_for_tests/io/SNLI/snli_1.0_train.jsonl (100%) rename {test => tests}/data_for_tests/io/SST-2/dev.tsv (100%) rename {test => tests}/data_for_tests/io/SST-2/test.tsv (100%) rename {test => tests}/data_for_tests/io/SST-2/train.tsv (100%) rename {test => tests}/data_for_tests/io/SST/dev.txt (100%) rename {test => tests}/data_for_tests/io/SST/test.txt (100%) rename {test => tests}/data_for_tests/io/SST/train.txt (100%) rename {test => tests}/data_for_tests/io/THUCNews/dev.txt (100%) rename {test => tests}/data_for_tests/io/THUCNews/test.txt (100%) rename {test => tests}/data_for_tests/io/THUCNews/train.txt (100%) rename {test => tests}/data_for_tests/io/WeiboSenti100k/dev.txt (100%) rename {test => tests}/data_for_tests/io/WeiboSenti100k/test.txt (100%) rename {test => tests}/data_for_tests/io/WeiboSenti100k/train.txt (100%) rename {test => tests}/data_for_tests/io/XNLI/dev.txt (100%) rename {test => tests}/data_for_tests/io/XNLI/test.txt (100%) rename {test => tests}/data_for_tests/io/XNLI/train.txt (100%) rename {test => tests}/data_for_tests/io/ag/test.csv (100%) rename {test => tests}/data_for_tests/io/ag/train.csv (100%) rename {test => tests}/data_for_tests/io/cmrc/dev.json (100%) rename {test => tests}/data_for_tests/io/cmrc/train.json (100%) rename {test => tests}/data_for_tests/io/cnndm/dev.label.jsonl (100%) rename {test => tests}/data_for_tests/io/cnndm/test.label.jsonl (100%) rename {test => tests}/data_for_tests/io/cnndm/train.cnndm.jsonl (100%) rename {test => tests}/data_for_tests/io/cnndm/vocab (100%) rename {test => tests}/data_for_tests/io/conll2003/dev.txt (100%) rename {test => tests}/data_for_tests/io/conll2003/test.txt (100%) rename {test => tests}/data_for_tests/io/conll2003/train.txt (100%) rename {test => tests}/data_for_tests/io/coreference/coreference_dev.json (100%) rename {test => tests}/data_for_tests/io/coreference/coreference_test.json (100%) rename {test => tests}/data_for_tests/io/coreference/coreference_train.json (100%) rename {test => tests}/data_for_tests/io/cws_as/dev.txt (100%) rename {test => tests}/data_for_tests/io/cws_as/test.txt (100%) rename {test => tests}/data_for_tests/io/cws_as/train.txt (100%) rename {test => tests}/data_for_tests/io/cws_cityu/dev.txt (100%) rename {test => tests}/data_for_tests/io/cws_cityu/test.txt (100%) rename {test => tests}/data_for_tests/io/cws_cityu/train.txt (100%) rename {test => tests}/data_for_tests/io/cws_msra/dev.txt (100%) rename {test => tests}/data_for_tests/io/cws_msra/test.txt (100%) rename {test => tests}/data_for_tests/io/cws_msra/train.txt (100%) rename {test => tests}/data_for_tests/io/cws_pku/dev.txt (100%) rename {test => tests}/data_for_tests/io/cws_pku/test.txt (100%) rename {test => tests}/data_for_tests/io/cws_pku/train.txt (100%) rename {test => tests}/data_for_tests/io/dbpedia/test.csv (100%) rename {test => tests}/data_for_tests/io/dbpedia/train.csv (100%) rename {test => tests}/data_for_tests/io/imdb/dev.txt (100%) rename {test => tests}/data_for_tests/io/imdb/test.txt (100%) rename {test => tests}/data_for_tests/io/imdb/train.txt (100%) rename {test => tests}/data_for_tests/io/peopledaily/dev.txt (100%) rename {test => tests}/data_for_tests/io/peopledaily/test.txt (100%) rename {test => tests}/data_for_tests/io/peopledaily/train.txt (100%) rename {test => tests}/data_for_tests/io/weibo_NER/dev.conll (100%) rename {test => tests}/data_for_tests/io/weibo_NER/test.conll (100%) rename {test => tests}/data_for_tests/io/weibo_NER/train.conll (100%) rename {test => tests}/data_for_tests/io/yelp_review_full/dev.csv (100%) rename {test => tests}/data_for_tests/io/yelp_review_full/test.csv (100%) rename {test => tests}/data_for_tests/io/yelp_review_full/train.csv (100%) rename {test => tests}/data_for_tests/io/yelp_review_polarity/dev.csv (100%) rename {test => tests}/data_for_tests/io/yelp_review_polarity/test.csv (100%) rename {test => tests}/data_for_tests/io/yelp_review_polarity/train.csv (100%) rename {test => tests}/data_for_tests/modules/decoder/crf.json (100%) rename {test => tests}/data_for_tests/people.txt (100%) rename {test => tests}/data_for_tests/people_daily_raw.txt (100%) rename {test => tests}/data_for_tests/sample_mnli.tsv (100%) rename {test => tests}/data_for_tests/sample_snli.jsonl (100%) rename {test => tests}/data_for_tests/text_classify.txt (100%) rename {test => tests}/data_for_tests/tutorial_sample_dataset.csv (100%) rename {test => tests}/data_for_tests/zh_sample.conllx (100%) rename {test => tests}/embeddings/__init__.py (100%) rename {test => tests}/embeddings/test_bert_embedding.py (100%) rename {test => tests}/embeddings/test_char_embedding.py (100%) rename {test => tests}/embeddings/test_elmo_embedding.py (100%) rename {test => tests}/embeddings/test_gpt2_embedding.py (100%) rename {test => tests}/embeddings/test_roberta_embedding.py (100%) rename {test => tests}/embeddings/test_stack_embeddings.py (100%) rename {test => tests}/embeddings/test_static_embedding.py (100%) rename {test => tests}/embeddings/test_transformer_embedding.py (100%) rename {test => tests}/io/__init__.py (100%) rename {test => tests}/io/loader/test_classification_loader.py (100%) rename {test => tests}/io/loader/test_conll_loader.py (100%) rename {test => tests}/io/loader/test_coreference_loader.py (100%) rename {test => tests}/io/loader/test_cws_loader.py (100%) rename {test => tests}/io/loader/test_matching_loader.py (100%) rename {test => tests}/io/loader/test_qa_loader.py (100%) rename {test => tests}/io/pipe/test_classification.py (100%) rename {test => tests}/io/pipe/test_conll.py (100%) rename {test => tests}/io/pipe/test_coreference.py (100%) rename {test => tests}/io/pipe/test_cws.py (100%) rename {test => tests}/io/pipe/test_matching.py (100%) rename {test => tests}/io/pipe/test_qa.py (100%) rename {test => tests}/io/pipe/test_summary.py (100%) rename {test => tests}/io/test_embed_loader.py (100%) rename {test => tests}/io/test_model_io.py (100%) rename {test => tests}/models/__init__.py (100%) rename {test => tests}/models/model_runner.py (100%) rename {test => tests}/models/test_bert.py (100%) rename {test => tests}/models/test_biaffine_parser.py (100%) rename {test => tests}/models/test_cnn_text_classification.py (100%) rename {test => tests}/models/test_seq2seq_generator.py (100%) rename {test => tests}/models/test_seq2seq_model.py (100%) rename {test => tests}/models/test_sequence_labeling.py (100%) rename {test => tests}/models/test_snli.py (100%) rename {test => tests}/models/test_star_trans.py (100%) rename {test => tests}/modules/__init__.py (100%) rename {test => tests}/modules/decoder/__init__.py (100%) rename {test => tests}/modules/decoder/test_CRF.py (100%) rename {test => tests}/modules/decoder/test_bert.py (100%) rename {test => tests}/modules/decoder/test_seq2seq_decoder.py (100%) rename {test => tests}/modules/encoder/__init__.py (100%) rename {test => tests}/modules/encoder/test_pooling.py (100%) rename {test => tests}/modules/encoder/test_seq2seq_encoder.py (100%) rename {test => tests}/modules/generator/__init__.py (100%) rename {test => tests}/modules/generator/test_seq2seq_generator.py (100%) rename {test => tests}/modules/test_char_encoder.py (100%) rename {test => tests}/modules/test_other_modules.py (100%) rename {test => tests}/modules/test_utils.py (100%) rename {test => tests}/modules/test_variational_rnn.py (100%) rename {test => tests}/modules/tokenizer/test_bert_tokenizer.py (100%) rename {test => tests}/test_tutorials.py (100%) diff --git a/.Jenkinsfile b/.Jenkinsfile index 7c0a64fd..87c286ee 100644 --- a/.Jenkinsfile +++ b/.Jenkinsfile @@ -29,7 +29,7 @@ pipeline { steps { sh 'python -m spacy download en' sh 'pip install fitlog' - sh 'pytest ./test --html=test_results.html --self-contained-html' + sh 'pytest ./tests --html=test_results.html --self-contained-html' } } } diff --git a/.travis.yml b/.travis.yml index 85bac41e..9c1ff4d3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ install: # command to run tests script: - python -m spacy download en - - pytest --cov=fastNLP test/ + - pytest --cov=fastNLP tests/ after_success: - bash <(curl -s https://codecov.io/bash) diff --git a/MANIFEST.in b/MANIFEST.in index d893b45a..61279be1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ include requirements.txt include LICENSE include README.md -prune test/ +prune tests/ prune reproduction/ prune fastNLP/api prune fastNLP/automl \ No newline at end of file diff --git a/test/__init__.py b/tests/__init__.py similarity index 100% rename from test/__init__.py rename to tests/__init__.py diff --git a/test/core/__init__.py b/tests/core/__init__.py similarity index 100% rename from test/core/__init__.py rename to tests/core/__init__.py diff --git a/test/core/test_batch.py b/tests/core/test_batch.py similarity index 100% rename from test/core/test_batch.py rename to tests/core/test_batch.py diff --git a/test/core/test_callbacks.py b/tests/core/test_callbacks.py similarity index 100% rename from test/core/test_callbacks.py rename to tests/core/test_callbacks.py diff --git a/test/core/test_dataset.py b/tests/core/test_dataset.py similarity index 100% rename from test/core/test_dataset.py rename to tests/core/test_dataset.py diff --git a/test/core/test_dist_trainer.py b/tests/core/test_dist_trainer.py similarity index 100% rename from test/core/test_dist_trainer.py rename to tests/core/test_dist_trainer.py diff --git a/test/core/test_field.py b/tests/core/test_field.py similarity index 100% rename from test/core/test_field.py rename to tests/core/test_field.py diff --git a/test/core/test_instance.py b/tests/core/test_instance.py similarity index 100% rename from test/core/test_instance.py rename to tests/core/test_instance.py diff --git a/test/core/test_logger.py b/tests/core/test_logger.py similarity index 100% rename from test/core/test_logger.py rename to tests/core/test_logger.py diff --git a/test/core/test_loss.py b/tests/core/test_loss.py similarity index 100% rename from test/core/test_loss.py rename to tests/core/test_loss.py diff --git a/test/core/test_metrics.py b/tests/core/test_metrics.py similarity index 100% rename from test/core/test_metrics.py rename to tests/core/test_metrics.py diff --git a/test/core/test_optimizer.py b/tests/core/test_optimizer.py similarity index 100% rename from test/core/test_optimizer.py rename to tests/core/test_optimizer.py diff --git a/test/core/test_predictor.py b/tests/core/test_predictor.py similarity index 100% rename from test/core/test_predictor.py rename to tests/core/test_predictor.py diff --git a/test/core/test_sampler.py b/tests/core/test_sampler.py similarity index 100% rename from test/core/test_sampler.py rename to tests/core/test_sampler.py diff --git a/test/core/test_tester.py b/tests/core/test_tester.py similarity index 100% rename from test/core/test_tester.py rename to tests/core/test_tester.py diff --git a/test/core/test_trainer.py b/tests/core/test_trainer.py similarity index 100% rename from test/core/test_trainer.py rename to tests/core/test_trainer.py diff --git a/test/core/test_utils.py b/tests/core/test_utils.py similarity index 100% rename from test/core/test_utils.py rename to tests/core/test_utils.py diff --git a/test/core/test_vocabulary.py b/tests/core/test_vocabulary.py similarity index 100% rename from test/core/test_vocabulary.py rename to tests/core/test_vocabulary.py diff --git a/test/data_for_tests/config b/tests/data_for_tests/config similarity index 100% rename from test/data_for_tests/config rename to tests/data_for_tests/config diff --git a/test/data_for_tests/conll_2003_example.txt b/tests/data_for_tests/conll_2003_example.txt similarity index 100% rename from test/data_for_tests/conll_2003_example.txt rename to tests/data_for_tests/conll_2003_example.txt diff --git a/test/data_for_tests/conll_example.txt b/tests/data_for_tests/conll_example.txt similarity index 100% rename from test/data_for_tests/conll_example.txt rename to tests/data_for_tests/conll_example.txt diff --git a/test/data_for_tests/cws_pku_utf_8 b/tests/data_for_tests/cws_pku_utf_8 similarity index 100% rename from test/data_for_tests/cws_pku_utf_8 rename to tests/data_for_tests/cws_pku_utf_8 diff --git a/test/data_for_tests/cws_test b/tests/data_for_tests/cws_test similarity index 100% rename from test/data_for_tests/cws_test rename to tests/data_for_tests/cws_test diff --git a/test/data_for_tests/cws_train b/tests/data_for_tests/cws_train similarity index 100% rename from test/data_for_tests/cws_train rename to tests/data_for_tests/cws_train diff --git a/test/data_for_tests/embedding/small_bert/config.json b/tests/data_for_tests/embedding/small_bert/config.json similarity index 100% rename from test/data_for_tests/embedding/small_bert/config.json rename to tests/data_for_tests/embedding/small_bert/config.json diff --git a/test/data_for_tests/embedding/small_bert/small_pytorch_model.bin b/tests/data_for_tests/embedding/small_bert/small_pytorch_model.bin similarity index 100% rename from test/data_for_tests/embedding/small_bert/small_pytorch_model.bin rename to tests/data_for_tests/embedding/small_bert/small_pytorch_model.bin diff --git a/test/data_for_tests/embedding/small_bert/vocab.txt b/tests/data_for_tests/embedding/small_bert/vocab.txt similarity index 100% rename from test/data_for_tests/embedding/small_bert/vocab.txt rename to tests/data_for_tests/embedding/small_bert/vocab.txt diff --git a/test/data_for_tests/embedding/small_elmo/char.dic b/tests/data_for_tests/embedding/small_elmo/char.dic similarity index 100% rename from test/data_for_tests/embedding/small_elmo/char.dic rename to tests/data_for_tests/embedding/small_elmo/char.dic diff --git a/test/data_for_tests/embedding/small_elmo/elmo_1x16_16_32cnn_1xhighway_options.json b/tests/data_for_tests/embedding/small_elmo/elmo_1x16_16_32cnn_1xhighway_options.json similarity index 100% rename from test/data_for_tests/embedding/small_elmo/elmo_1x16_16_32cnn_1xhighway_options.json rename to tests/data_for_tests/embedding/small_elmo/elmo_1x16_16_32cnn_1xhighway_options.json diff --git a/test/data_for_tests/embedding/small_elmo/elmo_mini_for_testing.pkl b/tests/data_for_tests/embedding/small_elmo/elmo_mini_for_testing.pkl similarity index 100% rename from test/data_for_tests/embedding/small_elmo/elmo_mini_for_testing.pkl rename to tests/data_for_tests/embedding/small_elmo/elmo_mini_for_testing.pkl diff --git a/test/data_for_tests/embedding/small_gpt2/config.json b/tests/data_for_tests/embedding/small_gpt2/config.json similarity index 100% rename from test/data_for_tests/embedding/small_gpt2/config.json rename to tests/data_for_tests/embedding/small_gpt2/config.json diff --git a/test/data_for_tests/embedding/small_gpt2/merges.txt b/tests/data_for_tests/embedding/small_gpt2/merges.txt similarity index 100% rename from test/data_for_tests/embedding/small_gpt2/merges.txt rename to tests/data_for_tests/embedding/small_gpt2/merges.txt diff --git a/test/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin b/tests/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin similarity index 100% rename from test/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin rename to tests/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin diff --git a/test/data_for_tests/embedding/small_gpt2/vocab.json b/tests/data_for_tests/embedding/small_gpt2/vocab.json similarity index 100% rename from test/data_for_tests/embedding/small_gpt2/vocab.json rename to tests/data_for_tests/embedding/small_gpt2/vocab.json diff --git a/test/data_for_tests/embedding/small_roberta/config.json b/tests/data_for_tests/embedding/small_roberta/config.json similarity index 100% rename from test/data_for_tests/embedding/small_roberta/config.json rename to tests/data_for_tests/embedding/small_roberta/config.json diff --git a/test/data_for_tests/embedding/small_roberta/merges.txt b/tests/data_for_tests/embedding/small_roberta/merges.txt similarity index 100% rename from test/data_for_tests/embedding/small_roberta/merges.txt rename to tests/data_for_tests/embedding/small_roberta/merges.txt diff --git a/test/data_for_tests/embedding/small_roberta/small_pytorch_model.bin b/tests/data_for_tests/embedding/small_roberta/small_pytorch_model.bin similarity index 100% rename from test/data_for_tests/embedding/small_roberta/small_pytorch_model.bin rename to tests/data_for_tests/embedding/small_roberta/small_pytorch_model.bin diff --git a/test/data_for_tests/embedding/small_roberta/vocab.json b/tests/data_for_tests/embedding/small_roberta/vocab.json similarity index 100% rename from test/data_for_tests/embedding/small_roberta/vocab.json rename to tests/data_for_tests/embedding/small_roberta/vocab.json diff --git a/test/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt b/tests/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt similarity index 100% rename from test/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt rename to tests/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt diff --git a/test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt b/tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt similarity index 100% rename from test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt rename to tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt diff --git a/test/data_for_tests/io/BQCorpus/dev.txt b/tests/data_for_tests/io/BQCorpus/dev.txt similarity index 100% rename from test/data_for_tests/io/BQCorpus/dev.txt rename to tests/data_for_tests/io/BQCorpus/dev.txt diff --git a/test/data_for_tests/io/BQCorpus/test.txt b/tests/data_for_tests/io/BQCorpus/test.txt similarity index 100% rename from test/data_for_tests/io/BQCorpus/test.txt rename to tests/data_for_tests/io/BQCorpus/test.txt diff --git a/test/data_for_tests/io/BQCorpus/train.txt b/tests/data_for_tests/io/BQCorpus/train.txt similarity index 100% rename from test/data_for_tests/io/BQCorpus/train.txt rename to tests/data_for_tests/io/BQCorpus/train.txt diff --git a/test/data_for_tests/io/ChnSentiCorp/dev.txt b/tests/data_for_tests/io/ChnSentiCorp/dev.txt similarity index 100% rename from test/data_for_tests/io/ChnSentiCorp/dev.txt rename to tests/data_for_tests/io/ChnSentiCorp/dev.txt diff --git a/test/data_for_tests/io/ChnSentiCorp/test.txt b/tests/data_for_tests/io/ChnSentiCorp/test.txt similarity index 100% rename from test/data_for_tests/io/ChnSentiCorp/test.txt rename to tests/data_for_tests/io/ChnSentiCorp/test.txt diff --git a/test/data_for_tests/io/ChnSentiCorp/train.txt b/tests/data_for_tests/io/ChnSentiCorp/train.txt similarity index 100% rename from test/data_for_tests/io/ChnSentiCorp/train.txt rename to tests/data_for_tests/io/ChnSentiCorp/train.txt diff --git a/test/data_for_tests/io/LCQMC/dev.txt b/tests/data_for_tests/io/LCQMC/dev.txt similarity index 100% rename from test/data_for_tests/io/LCQMC/dev.txt rename to tests/data_for_tests/io/LCQMC/dev.txt diff --git a/test/data_for_tests/io/LCQMC/test.txt b/tests/data_for_tests/io/LCQMC/test.txt similarity index 100% rename from test/data_for_tests/io/LCQMC/test.txt rename to tests/data_for_tests/io/LCQMC/test.txt diff --git a/test/data_for_tests/io/LCQMC/train.txt b/tests/data_for_tests/io/LCQMC/train.txt similarity index 100% rename from test/data_for_tests/io/LCQMC/train.txt rename to tests/data_for_tests/io/LCQMC/train.txt diff --git a/test/data_for_tests/io/MNLI/dev_matched.tsv b/tests/data_for_tests/io/MNLI/dev_matched.tsv similarity index 100% rename from test/data_for_tests/io/MNLI/dev_matched.tsv rename to tests/data_for_tests/io/MNLI/dev_matched.tsv diff --git a/test/data_for_tests/io/MNLI/dev_mismatched.tsv b/tests/data_for_tests/io/MNLI/dev_mismatched.tsv similarity index 100% rename from test/data_for_tests/io/MNLI/dev_mismatched.tsv rename to tests/data_for_tests/io/MNLI/dev_mismatched.tsv diff --git a/test/data_for_tests/io/MNLI/test_matched.tsv b/tests/data_for_tests/io/MNLI/test_matched.tsv similarity index 100% rename from test/data_for_tests/io/MNLI/test_matched.tsv rename to tests/data_for_tests/io/MNLI/test_matched.tsv diff --git a/test/data_for_tests/io/MNLI/test_mismatched.tsv b/tests/data_for_tests/io/MNLI/test_mismatched.tsv similarity index 100% rename from test/data_for_tests/io/MNLI/test_mismatched.tsv rename to tests/data_for_tests/io/MNLI/test_mismatched.tsv diff --git a/test/data_for_tests/io/MNLI/train.tsv b/tests/data_for_tests/io/MNLI/train.tsv similarity index 100% rename from test/data_for_tests/io/MNLI/train.tsv rename to tests/data_for_tests/io/MNLI/train.tsv diff --git a/test/data_for_tests/io/MSRA_NER/dev.conll b/tests/data_for_tests/io/MSRA_NER/dev.conll similarity index 100% rename from test/data_for_tests/io/MSRA_NER/dev.conll rename to tests/data_for_tests/io/MSRA_NER/dev.conll diff --git a/test/data_for_tests/io/MSRA_NER/test.conll b/tests/data_for_tests/io/MSRA_NER/test.conll similarity index 100% rename from test/data_for_tests/io/MSRA_NER/test.conll rename to tests/data_for_tests/io/MSRA_NER/test.conll diff --git a/test/data_for_tests/io/MSRA_NER/train.conll b/tests/data_for_tests/io/MSRA_NER/train.conll similarity index 100% rename from test/data_for_tests/io/MSRA_NER/train.conll rename to tests/data_for_tests/io/MSRA_NER/train.conll diff --git a/test/data_for_tests/io/OntoNotes/dev.txt b/tests/data_for_tests/io/OntoNotes/dev.txt similarity index 100% rename from test/data_for_tests/io/OntoNotes/dev.txt rename to tests/data_for_tests/io/OntoNotes/dev.txt diff --git a/test/data_for_tests/io/OntoNotes/test.txt b/tests/data_for_tests/io/OntoNotes/test.txt similarity index 100% rename from test/data_for_tests/io/OntoNotes/test.txt rename to tests/data_for_tests/io/OntoNotes/test.txt diff --git a/test/data_for_tests/io/OntoNotes/train.txt b/tests/data_for_tests/io/OntoNotes/train.txt similarity index 100% rename from test/data_for_tests/io/OntoNotes/train.txt rename to tests/data_for_tests/io/OntoNotes/train.txt diff --git a/test/data_for_tests/io/QNLI/dev.tsv b/tests/data_for_tests/io/QNLI/dev.tsv similarity index 100% rename from test/data_for_tests/io/QNLI/dev.tsv rename to tests/data_for_tests/io/QNLI/dev.tsv diff --git a/test/data_for_tests/io/QNLI/test.tsv b/tests/data_for_tests/io/QNLI/test.tsv similarity index 100% rename from test/data_for_tests/io/QNLI/test.tsv rename to tests/data_for_tests/io/QNLI/test.tsv diff --git a/test/data_for_tests/io/QNLI/train.tsv b/tests/data_for_tests/io/QNLI/train.tsv similarity index 100% rename from test/data_for_tests/io/QNLI/train.tsv rename to tests/data_for_tests/io/QNLI/train.tsv diff --git a/test/data_for_tests/io/Quora/dev.tsv b/tests/data_for_tests/io/Quora/dev.tsv similarity index 100% rename from test/data_for_tests/io/Quora/dev.tsv rename to tests/data_for_tests/io/Quora/dev.tsv diff --git a/test/data_for_tests/io/Quora/test.tsv b/tests/data_for_tests/io/Quora/test.tsv similarity index 100% rename from test/data_for_tests/io/Quora/test.tsv rename to tests/data_for_tests/io/Quora/test.tsv diff --git a/test/data_for_tests/io/Quora/train.tsv b/tests/data_for_tests/io/Quora/train.tsv similarity index 100% rename from test/data_for_tests/io/Quora/train.tsv rename to tests/data_for_tests/io/Quora/train.tsv diff --git a/test/data_for_tests/io/RTE/dev.tsv b/tests/data_for_tests/io/RTE/dev.tsv similarity index 100% rename from test/data_for_tests/io/RTE/dev.tsv rename to tests/data_for_tests/io/RTE/dev.tsv diff --git a/test/data_for_tests/io/RTE/test.tsv b/tests/data_for_tests/io/RTE/test.tsv similarity index 100% rename from test/data_for_tests/io/RTE/test.tsv rename to tests/data_for_tests/io/RTE/test.tsv diff --git a/test/data_for_tests/io/RTE/train.tsv b/tests/data_for_tests/io/RTE/train.tsv similarity index 100% rename from test/data_for_tests/io/RTE/train.tsv rename to tests/data_for_tests/io/RTE/train.tsv diff --git a/test/data_for_tests/io/SNLI/snli_1.0_dev.jsonl b/tests/data_for_tests/io/SNLI/snli_1.0_dev.jsonl similarity index 100% rename from test/data_for_tests/io/SNLI/snli_1.0_dev.jsonl rename to tests/data_for_tests/io/SNLI/snli_1.0_dev.jsonl diff --git a/test/data_for_tests/io/SNLI/snli_1.0_test.jsonl b/tests/data_for_tests/io/SNLI/snli_1.0_test.jsonl similarity index 100% rename from test/data_for_tests/io/SNLI/snli_1.0_test.jsonl rename to tests/data_for_tests/io/SNLI/snli_1.0_test.jsonl diff --git a/test/data_for_tests/io/SNLI/snli_1.0_train.jsonl b/tests/data_for_tests/io/SNLI/snli_1.0_train.jsonl similarity index 100% rename from test/data_for_tests/io/SNLI/snli_1.0_train.jsonl rename to tests/data_for_tests/io/SNLI/snli_1.0_train.jsonl diff --git a/test/data_for_tests/io/SST-2/dev.tsv b/tests/data_for_tests/io/SST-2/dev.tsv similarity index 100% rename from test/data_for_tests/io/SST-2/dev.tsv rename to tests/data_for_tests/io/SST-2/dev.tsv diff --git a/test/data_for_tests/io/SST-2/test.tsv b/tests/data_for_tests/io/SST-2/test.tsv similarity index 100% rename from test/data_for_tests/io/SST-2/test.tsv rename to tests/data_for_tests/io/SST-2/test.tsv diff --git a/test/data_for_tests/io/SST-2/train.tsv b/tests/data_for_tests/io/SST-2/train.tsv similarity index 100% rename from test/data_for_tests/io/SST-2/train.tsv rename to tests/data_for_tests/io/SST-2/train.tsv diff --git a/test/data_for_tests/io/SST/dev.txt b/tests/data_for_tests/io/SST/dev.txt similarity index 100% rename from test/data_for_tests/io/SST/dev.txt rename to tests/data_for_tests/io/SST/dev.txt diff --git a/test/data_for_tests/io/SST/test.txt b/tests/data_for_tests/io/SST/test.txt similarity index 100% rename from test/data_for_tests/io/SST/test.txt rename to tests/data_for_tests/io/SST/test.txt diff --git a/test/data_for_tests/io/SST/train.txt b/tests/data_for_tests/io/SST/train.txt similarity index 100% rename from test/data_for_tests/io/SST/train.txt rename to tests/data_for_tests/io/SST/train.txt diff --git a/test/data_for_tests/io/THUCNews/dev.txt b/tests/data_for_tests/io/THUCNews/dev.txt similarity index 100% rename from test/data_for_tests/io/THUCNews/dev.txt rename to tests/data_for_tests/io/THUCNews/dev.txt diff --git a/test/data_for_tests/io/THUCNews/test.txt b/tests/data_for_tests/io/THUCNews/test.txt similarity index 100% rename from test/data_for_tests/io/THUCNews/test.txt rename to tests/data_for_tests/io/THUCNews/test.txt diff --git a/test/data_for_tests/io/THUCNews/train.txt b/tests/data_for_tests/io/THUCNews/train.txt similarity index 100% rename from test/data_for_tests/io/THUCNews/train.txt rename to tests/data_for_tests/io/THUCNews/train.txt diff --git a/test/data_for_tests/io/WeiboSenti100k/dev.txt b/tests/data_for_tests/io/WeiboSenti100k/dev.txt similarity index 100% rename from test/data_for_tests/io/WeiboSenti100k/dev.txt rename to tests/data_for_tests/io/WeiboSenti100k/dev.txt diff --git a/test/data_for_tests/io/WeiboSenti100k/test.txt b/tests/data_for_tests/io/WeiboSenti100k/test.txt similarity index 100% rename from test/data_for_tests/io/WeiboSenti100k/test.txt rename to tests/data_for_tests/io/WeiboSenti100k/test.txt diff --git a/test/data_for_tests/io/WeiboSenti100k/train.txt b/tests/data_for_tests/io/WeiboSenti100k/train.txt similarity index 100% rename from test/data_for_tests/io/WeiboSenti100k/train.txt rename to tests/data_for_tests/io/WeiboSenti100k/train.txt diff --git a/test/data_for_tests/io/XNLI/dev.txt b/tests/data_for_tests/io/XNLI/dev.txt similarity index 100% rename from test/data_for_tests/io/XNLI/dev.txt rename to tests/data_for_tests/io/XNLI/dev.txt diff --git a/test/data_for_tests/io/XNLI/test.txt b/tests/data_for_tests/io/XNLI/test.txt similarity index 100% rename from test/data_for_tests/io/XNLI/test.txt rename to tests/data_for_tests/io/XNLI/test.txt diff --git a/test/data_for_tests/io/XNLI/train.txt b/tests/data_for_tests/io/XNLI/train.txt similarity index 100% rename from test/data_for_tests/io/XNLI/train.txt rename to tests/data_for_tests/io/XNLI/train.txt diff --git a/test/data_for_tests/io/ag/test.csv b/tests/data_for_tests/io/ag/test.csv similarity index 100% rename from test/data_for_tests/io/ag/test.csv rename to tests/data_for_tests/io/ag/test.csv diff --git a/test/data_for_tests/io/ag/train.csv b/tests/data_for_tests/io/ag/train.csv similarity index 100% rename from test/data_for_tests/io/ag/train.csv rename to tests/data_for_tests/io/ag/train.csv diff --git a/test/data_for_tests/io/cmrc/dev.json b/tests/data_for_tests/io/cmrc/dev.json similarity index 100% rename from test/data_for_tests/io/cmrc/dev.json rename to tests/data_for_tests/io/cmrc/dev.json diff --git a/test/data_for_tests/io/cmrc/train.json b/tests/data_for_tests/io/cmrc/train.json similarity index 100% rename from test/data_for_tests/io/cmrc/train.json rename to tests/data_for_tests/io/cmrc/train.json diff --git a/test/data_for_tests/io/cnndm/dev.label.jsonl b/tests/data_for_tests/io/cnndm/dev.label.jsonl similarity index 100% rename from test/data_for_tests/io/cnndm/dev.label.jsonl rename to tests/data_for_tests/io/cnndm/dev.label.jsonl diff --git a/test/data_for_tests/io/cnndm/test.label.jsonl b/tests/data_for_tests/io/cnndm/test.label.jsonl similarity index 100% rename from test/data_for_tests/io/cnndm/test.label.jsonl rename to tests/data_for_tests/io/cnndm/test.label.jsonl diff --git a/test/data_for_tests/io/cnndm/train.cnndm.jsonl b/tests/data_for_tests/io/cnndm/train.cnndm.jsonl similarity index 100% rename from test/data_for_tests/io/cnndm/train.cnndm.jsonl rename to tests/data_for_tests/io/cnndm/train.cnndm.jsonl diff --git a/test/data_for_tests/io/cnndm/vocab b/tests/data_for_tests/io/cnndm/vocab similarity index 100% rename from test/data_for_tests/io/cnndm/vocab rename to tests/data_for_tests/io/cnndm/vocab diff --git a/test/data_for_tests/io/conll2003/dev.txt b/tests/data_for_tests/io/conll2003/dev.txt similarity index 100% rename from test/data_for_tests/io/conll2003/dev.txt rename to tests/data_for_tests/io/conll2003/dev.txt diff --git a/test/data_for_tests/io/conll2003/test.txt b/tests/data_for_tests/io/conll2003/test.txt similarity index 100% rename from test/data_for_tests/io/conll2003/test.txt rename to tests/data_for_tests/io/conll2003/test.txt diff --git a/test/data_for_tests/io/conll2003/train.txt b/tests/data_for_tests/io/conll2003/train.txt similarity index 100% rename from test/data_for_tests/io/conll2003/train.txt rename to tests/data_for_tests/io/conll2003/train.txt diff --git a/test/data_for_tests/io/coreference/coreference_dev.json b/tests/data_for_tests/io/coreference/coreference_dev.json similarity index 100% rename from test/data_for_tests/io/coreference/coreference_dev.json rename to tests/data_for_tests/io/coreference/coreference_dev.json diff --git a/test/data_for_tests/io/coreference/coreference_test.json b/tests/data_for_tests/io/coreference/coreference_test.json similarity index 100% rename from test/data_for_tests/io/coreference/coreference_test.json rename to tests/data_for_tests/io/coreference/coreference_test.json diff --git a/test/data_for_tests/io/coreference/coreference_train.json b/tests/data_for_tests/io/coreference/coreference_train.json similarity index 100% rename from test/data_for_tests/io/coreference/coreference_train.json rename to tests/data_for_tests/io/coreference/coreference_train.json diff --git a/test/data_for_tests/io/cws_as/dev.txt b/tests/data_for_tests/io/cws_as/dev.txt similarity index 100% rename from test/data_for_tests/io/cws_as/dev.txt rename to tests/data_for_tests/io/cws_as/dev.txt diff --git a/test/data_for_tests/io/cws_as/test.txt b/tests/data_for_tests/io/cws_as/test.txt similarity index 100% rename from test/data_for_tests/io/cws_as/test.txt rename to tests/data_for_tests/io/cws_as/test.txt diff --git a/test/data_for_tests/io/cws_as/train.txt b/tests/data_for_tests/io/cws_as/train.txt similarity index 100% rename from test/data_for_tests/io/cws_as/train.txt rename to tests/data_for_tests/io/cws_as/train.txt diff --git a/test/data_for_tests/io/cws_cityu/dev.txt b/tests/data_for_tests/io/cws_cityu/dev.txt similarity index 100% rename from test/data_for_tests/io/cws_cityu/dev.txt rename to tests/data_for_tests/io/cws_cityu/dev.txt diff --git a/test/data_for_tests/io/cws_cityu/test.txt b/tests/data_for_tests/io/cws_cityu/test.txt similarity index 100% rename from test/data_for_tests/io/cws_cityu/test.txt rename to tests/data_for_tests/io/cws_cityu/test.txt diff --git a/test/data_for_tests/io/cws_cityu/train.txt b/tests/data_for_tests/io/cws_cityu/train.txt similarity index 100% rename from test/data_for_tests/io/cws_cityu/train.txt rename to tests/data_for_tests/io/cws_cityu/train.txt diff --git a/test/data_for_tests/io/cws_msra/dev.txt b/tests/data_for_tests/io/cws_msra/dev.txt similarity index 100% rename from test/data_for_tests/io/cws_msra/dev.txt rename to tests/data_for_tests/io/cws_msra/dev.txt diff --git a/test/data_for_tests/io/cws_msra/test.txt b/tests/data_for_tests/io/cws_msra/test.txt similarity index 100% rename from test/data_for_tests/io/cws_msra/test.txt rename to tests/data_for_tests/io/cws_msra/test.txt diff --git a/test/data_for_tests/io/cws_msra/train.txt b/tests/data_for_tests/io/cws_msra/train.txt similarity index 100% rename from test/data_for_tests/io/cws_msra/train.txt rename to tests/data_for_tests/io/cws_msra/train.txt diff --git a/test/data_for_tests/io/cws_pku/dev.txt b/tests/data_for_tests/io/cws_pku/dev.txt similarity index 100% rename from test/data_for_tests/io/cws_pku/dev.txt rename to tests/data_for_tests/io/cws_pku/dev.txt diff --git a/test/data_for_tests/io/cws_pku/test.txt b/tests/data_for_tests/io/cws_pku/test.txt similarity index 100% rename from test/data_for_tests/io/cws_pku/test.txt rename to tests/data_for_tests/io/cws_pku/test.txt diff --git a/test/data_for_tests/io/cws_pku/train.txt b/tests/data_for_tests/io/cws_pku/train.txt similarity index 100% rename from test/data_for_tests/io/cws_pku/train.txt rename to tests/data_for_tests/io/cws_pku/train.txt diff --git a/test/data_for_tests/io/dbpedia/test.csv b/tests/data_for_tests/io/dbpedia/test.csv similarity index 100% rename from test/data_for_tests/io/dbpedia/test.csv rename to tests/data_for_tests/io/dbpedia/test.csv diff --git a/test/data_for_tests/io/dbpedia/train.csv b/tests/data_for_tests/io/dbpedia/train.csv similarity index 100% rename from test/data_for_tests/io/dbpedia/train.csv rename to tests/data_for_tests/io/dbpedia/train.csv diff --git a/test/data_for_tests/io/imdb/dev.txt b/tests/data_for_tests/io/imdb/dev.txt similarity index 100% rename from test/data_for_tests/io/imdb/dev.txt rename to tests/data_for_tests/io/imdb/dev.txt diff --git a/test/data_for_tests/io/imdb/test.txt b/tests/data_for_tests/io/imdb/test.txt similarity index 100% rename from test/data_for_tests/io/imdb/test.txt rename to tests/data_for_tests/io/imdb/test.txt diff --git a/test/data_for_tests/io/imdb/train.txt b/tests/data_for_tests/io/imdb/train.txt similarity index 100% rename from test/data_for_tests/io/imdb/train.txt rename to tests/data_for_tests/io/imdb/train.txt diff --git a/test/data_for_tests/io/peopledaily/dev.txt b/tests/data_for_tests/io/peopledaily/dev.txt similarity index 100% rename from test/data_for_tests/io/peopledaily/dev.txt rename to tests/data_for_tests/io/peopledaily/dev.txt diff --git a/test/data_for_tests/io/peopledaily/test.txt b/tests/data_for_tests/io/peopledaily/test.txt similarity index 100% rename from test/data_for_tests/io/peopledaily/test.txt rename to tests/data_for_tests/io/peopledaily/test.txt diff --git a/test/data_for_tests/io/peopledaily/train.txt b/tests/data_for_tests/io/peopledaily/train.txt similarity index 100% rename from test/data_for_tests/io/peopledaily/train.txt rename to tests/data_for_tests/io/peopledaily/train.txt diff --git a/test/data_for_tests/io/weibo_NER/dev.conll b/tests/data_for_tests/io/weibo_NER/dev.conll similarity index 100% rename from test/data_for_tests/io/weibo_NER/dev.conll rename to tests/data_for_tests/io/weibo_NER/dev.conll diff --git a/test/data_for_tests/io/weibo_NER/test.conll b/tests/data_for_tests/io/weibo_NER/test.conll similarity index 100% rename from test/data_for_tests/io/weibo_NER/test.conll rename to tests/data_for_tests/io/weibo_NER/test.conll diff --git a/test/data_for_tests/io/weibo_NER/train.conll b/tests/data_for_tests/io/weibo_NER/train.conll similarity index 100% rename from test/data_for_tests/io/weibo_NER/train.conll rename to tests/data_for_tests/io/weibo_NER/train.conll diff --git a/test/data_for_tests/io/yelp_review_full/dev.csv b/tests/data_for_tests/io/yelp_review_full/dev.csv similarity index 100% rename from test/data_for_tests/io/yelp_review_full/dev.csv rename to tests/data_for_tests/io/yelp_review_full/dev.csv diff --git a/test/data_for_tests/io/yelp_review_full/test.csv b/tests/data_for_tests/io/yelp_review_full/test.csv similarity index 100% rename from test/data_for_tests/io/yelp_review_full/test.csv rename to tests/data_for_tests/io/yelp_review_full/test.csv diff --git a/test/data_for_tests/io/yelp_review_full/train.csv b/tests/data_for_tests/io/yelp_review_full/train.csv similarity index 100% rename from test/data_for_tests/io/yelp_review_full/train.csv rename to tests/data_for_tests/io/yelp_review_full/train.csv diff --git a/test/data_for_tests/io/yelp_review_polarity/dev.csv b/tests/data_for_tests/io/yelp_review_polarity/dev.csv similarity index 100% rename from test/data_for_tests/io/yelp_review_polarity/dev.csv rename to tests/data_for_tests/io/yelp_review_polarity/dev.csv diff --git a/test/data_for_tests/io/yelp_review_polarity/test.csv b/tests/data_for_tests/io/yelp_review_polarity/test.csv similarity index 100% rename from test/data_for_tests/io/yelp_review_polarity/test.csv rename to tests/data_for_tests/io/yelp_review_polarity/test.csv diff --git a/test/data_for_tests/io/yelp_review_polarity/train.csv b/tests/data_for_tests/io/yelp_review_polarity/train.csv similarity index 100% rename from test/data_for_tests/io/yelp_review_polarity/train.csv rename to tests/data_for_tests/io/yelp_review_polarity/train.csv diff --git a/test/data_for_tests/modules/decoder/crf.json b/tests/data_for_tests/modules/decoder/crf.json similarity index 100% rename from test/data_for_tests/modules/decoder/crf.json rename to tests/data_for_tests/modules/decoder/crf.json diff --git a/test/data_for_tests/people.txt b/tests/data_for_tests/people.txt similarity index 100% rename from test/data_for_tests/people.txt rename to tests/data_for_tests/people.txt diff --git a/test/data_for_tests/people_daily_raw.txt b/tests/data_for_tests/people_daily_raw.txt similarity index 100% rename from test/data_for_tests/people_daily_raw.txt rename to tests/data_for_tests/people_daily_raw.txt diff --git a/test/data_for_tests/sample_mnli.tsv b/tests/data_for_tests/sample_mnli.tsv similarity index 100% rename from test/data_for_tests/sample_mnli.tsv rename to tests/data_for_tests/sample_mnli.tsv diff --git a/test/data_for_tests/sample_snli.jsonl b/tests/data_for_tests/sample_snli.jsonl similarity index 100% rename from test/data_for_tests/sample_snli.jsonl rename to tests/data_for_tests/sample_snli.jsonl diff --git a/test/data_for_tests/text_classify.txt b/tests/data_for_tests/text_classify.txt similarity index 100% rename from test/data_for_tests/text_classify.txt rename to tests/data_for_tests/text_classify.txt diff --git a/test/data_for_tests/tutorial_sample_dataset.csv b/tests/data_for_tests/tutorial_sample_dataset.csv similarity index 100% rename from test/data_for_tests/tutorial_sample_dataset.csv rename to tests/data_for_tests/tutorial_sample_dataset.csv diff --git a/test/data_for_tests/zh_sample.conllx b/tests/data_for_tests/zh_sample.conllx similarity index 100% rename from test/data_for_tests/zh_sample.conllx rename to tests/data_for_tests/zh_sample.conllx diff --git a/test/embeddings/__init__.py b/tests/embeddings/__init__.py similarity index 100% rename from test/embeddings/__init__.py rename to tests/embeddings/__init__.py diff --git a/test/embeddings/test_bert_embedding.py b/tests/embeddings/test_bert_embedding.py similarity index 100% rename from test/embeddings/test_bert_embedding.py rename to tests/embeddings/test_bert_embedding.py diff --git a/test/embeddings/test_char_embedding.py b/tests/embeddings/test_char_embedding.py similarity index 100% rename from test/embeddings/test_char_embedding.py rename to tests/embeddings/test_char_embedding.py diff --git a/test/embeddings/test_elmo_embedding.py b/tests/embeddings/test_elmo_embedding.py similarity index 100% rename from test/embeddings/test_elmo_embedding.py rename to tests/embeddings/test_elmo_embedding.py diff --git a/test/embeddings/test_gpt2_embedding.py b/tests/embeddings/test_gpt2_embedding.py similarity index 100% rename from test/embeddings/test_gpt2_embedding.py rename to tests/embeddings/test_gpt2_embedding.py diff --git a/test/embeddings/test_roberta_embedding.py b/tests/embeddings/test_roberta_embedding.py similarity index 100% rename from test/embeddings/test_roberta_embedding.py rename to tests/embeddings/test_roberta_embedding.py diff --git a/test/embeddings/test_stack_embeddings.py b/tests/embeddings/test_stack_embeddings.py similarity index 100% rename from test/embeddings/test_stack_embeddings.py rename to tests/embeddings/test_stack_embeddings.py diff --git a/test/embeddings/test_static_embedding.py b/tests/embeddings/test_static_embedding.py similarity index 100% rename from test/embeddings/test_static_embedding.py rename to tests/embeddings/test_static_embedding.py diff --git a/test/embeddings/test_transformer_embedding.py b/tests/embeddings/test_transformer_embedding.py similarity index 100% rename from test/embeddings/test_transformer_embedding.py rename to tests/embeddings/test_transformer_embedding.py diff --git a/test/io/__init__.py b/tests/io/__init__.py similarity index 100% rename from test/io/__init__.py rename to tests/io/__init__.py diff --git a/test/io/loader/test_classification_loader.py b/tests/io/loader/test_classification_loader.py similarity index 100% rename from test/io/loader/test_classification_loader.py rename to tests/io/loader/test_classification_loader.py diff --git a/test/io/loader/test_conll_loader.py b/tests/io/loader/test_conll_loader.py similarity index 100% rename from test/io/loader/test_conll_loader.py rename to tests/io/loader/test_conll_loader.py diff --git a/test/io/loader/test_coreference_loader.py b/tests/io/loader/test_coreference_loader.py similarity index 100% rename from test/io/loader/test_coreference_loader.py rename to tests/io/loader/test_coreference_loader.py diff --git a/test/io/loader/test_cws_loader.py b/tests/io/loader/test_cws_loader.py similarity index 100% rename from test/io/loader/test_cws_loader.py rename to tests/io/loader/test_cws_loader.py diff --git a/test/io/loader/test_matching_loader.py b/tests/io/loader/test_matching_loader.py similarity index 100% rename from test/io/loader/test_matching_loader.py rename to tests/io/loader/test_matching_loader.py diff --git a/test/io/loader/test_qa_loader.py b/tests/io/loader/test_qa_loader.py similarity index 100% rename from test/io/loader/test_qa_loader.py rename to tests/io/loader/test_qa_loader.py diff --git a/test/io/pipe/test_classification.py b/tests/io/pipe/test_classification.py similarity index 100% rename from test/io/pipe/test_classification.py rename to tests/io/pipe/test_classification.py diff --git a/test/io/pipe/test_conll.py b/tests/io/pipe/test_conll.py similarity index 100% rename from test/io/pipe/test_conll.py rename to tests/io/pipe/test_conll.py diff --git a/test/io/pipe/test_coreference.py b/tests/io/pipe/test_coreference.py similarity index 100% rename from test/io/pipe/test_coreference.py rename to tests/io/pipe/test_coreference.py diff --git a/test/io/pipe/test_cws.py b/tests/io/pipe/test_cws.py similarity index 100% rename from test/io/pipe/test_cws.py rename to tests/io/pipe/test_cws.py diff --git a/test/io/pipe/test_matching.py b/tests/io/pipe/test_matching.py similarity index 100% rename from test/io/pipe/test_matching.py rename to tests/io/pipe/test_matching.py diff --git a/test/io/pipe/test_qa.py b/tests/io/pipe/test_qa.py similarity index 100% rename from test/io/pipe/test_qa.py rename to tests/io/pipe/test_qa.py diff --git a/test/io/pipe/test_summary.py b/tests/io/pipe/test_summary.py similarity index 100% rename from test/io/pipe/test_summary.py rename to tests/io/pipe/test_summary.py diff --git a/test/io/test_embed_loader.py b/tests/io/test_embed_loader.py similarity index 100% rename from test/io/test_embed_loader.py rename to tests/io/test_embed_loader.py diff --git a/test/io/test_model_io.py b/tests/io/test_model_io.py similarity index 100% rename from test/io/test_model_io.py rename to tests/io/test_model_io.py diff --git a/test/models/__init__.py b/tests/models/__init__.py similarity index 100% rename from test/models/__init__.py rename to tests/models/__init__.py diff --git a/test/models/model_runner.py b/tests/models/model_runner.py similarity index 100% rename from test/models/model_runner.py rename to tests/models/model_runner.py diff --git a/test/models/test_bert.py b/tests/models/test_bert.py similarity index 100% rename from test/models/test_bert.py rename to tests/models/test_bert.py diff --git a/test/models/test_biaffine_parser.py b/tests/models/test_biaffine_parser.py similarity index 100% rename from test/models/test_biaffine_parser.py rename to tests/models/test_biaffine_parser.py diff --git a/test/models/test_cnn_text_classification.py b/tests/models/test_cnn_text_classification.py similarity index 100% rename from test/models/test_cnn_text_classification.py rename to tests/models/test_cnn_text_classification.py diff --git a/test/models/test_seq2seq_generator.py b/tests/models/test_seq2seq_generator.py similarity index 100% rename from test/models/test_seq2seq_generator.py rename to tests/models/test_seq2seq_generator.py diff --git a/test/models/test_seq2seq_model.py b/tests/models/test_seq2seq_model.py similarity index 100% rename from test/models/test_seq2seq_model.py rename to tests/models/test_seq2seq_model.py diff --git a/test/models/test_sequence_labeling.py b/tests/models/test_sequence_labeling.py similarity index 100% rename from test/models/test_sequence_labeling.py rename to tests/models/test_sequence_labeling.py diff --git a/test/models/test_snli.py b/tests/models/test_snli.py similarity index 100% rename from test/models/test_snli.py rename to tests/models/test_snli.py diff --git a/test/models/test_star_trans.py b/tests/models/test_star_trans.py similarity index 100% rename from test/models/test_star_trans.py rename to tests/models/test_star_trans.py diff --git a/test/modules/__init__.py b/tests/modules/__init__.py similarity index 100% rename from test/modules/__init__.py rename to tests/modules/__init__.py diff --git a/test/modules/decoder/__init__.py b/tests/modules/decoder/__init__.py similarity index 100% rename from test/modules/decoder/__init__.py rename to tests/modules/decoder/__init__.py diff --git a/test/modules/decoder/test_CRF.py b/tests/modules/decoder/test_CRF.py similarity index 100% rename from test/modules/decoder/test_CRF.py rename to tests/modules/decoder/test_CRF.py diff --git a/test/modules/decoder/test_bert.py b/tests/modules/decoder/test_bert.py similarity index 100% rename from test/modules/decoder/test_bert.py rename to tests/modules/decoder/test_bert.py diff --git a/test/modules/decoder/test_seq2seq_decoder.py b/tests/modules/decoder/test_seq2seq_decoder.py similarity index 100% rename from test/modules/decoder/test_seq2seq_decoder.py rename to tests/modules/decoder/test_seq2seq_decoder.py diff --git a/test/modules/encoder/__init__.py b/tests/modules/encoder/__init__.py similarity index 100% rename from test/modules/encoder/__init__.py rename to tests/modules/encoder/__init__.py diff --git a/test/modules/encoder/test_pooling.py b/tests/modules/encoder/test_pooling.py similarity index 100% rename from test/modules/encoder/test_pooling.py rename to tests/modules/encoder/test_pooling.py diff --git a/test/modules/encoder/test_seq2seq_encoder.py b/tests/modules/encoder/test_seq2seq_encoder.py similarity index 100% rename from test/modules/encoder/test_seq2seq_encoder.py rename to tests/modules/encoder/test_seq2seq_encoder.py diff --git a/test/modules/generator/__init__.py b/tests/modules/generator/__init__.py similarity index 100% rename from test/modules/generator/__init__.py rename to tests/modules/generator/__init__.py diff --git a/test/modules/generator/test_seq2seq_generator.py b/tests/modules/generator/test_seq2seq_generator.py similarity index 100% rename from test/modules/generator/test_seq2seq_generator.py rename to tests/modules/generator/test_seq2seq_generator.py diff --git a/test/modules/test_char_encoder.py b/tests/modules/test_char_encoder.py similarity index 100% rename from test/modules/test_char_encoder.py rename to tests/modules/test_char_encoder.py diff --git a/test/modules/test_other_modules.py b/tests/modules/test_other_modules.py similarity index 100% rename from test/modules/test_other_modules.py rename to tests/modules/test_other_modules.py diff --git a/test/modules/test_utils.py b/tests/modules/test_utils.py similarity index 100% rename from test/modules/test_utils.py rename to tests/modules/test_utils.py diff --git a/test/modules/test_variational_rnn.py b/tests/modules/test_variational_rnn.py similarity index 100% rename from test/modules/test_variational_rnn.py rename to tests/modules/test_variational_rnn.py diff --git a/test/modules/tokenizer/test_bert_tokenizer.py b/tests/modules/tokenizer/test_bert_tokenizer.py similarity index 100% rename from test/modules/tokenizer/test_bert_tokenizer.py rename to tests/modules/tokenizer/test_bert_tokenizer.py diff --git a/test/test_tutorials.py b/tests/test_tutorials.py similarity index 100% rename from test/test_tutorials.py rename to tests/test_tutorials.py -- Gitee From 350772f518a922395a272343e3a346a2c9039a50 Mon Sep 17 00:00:00 2001 From: willqvq Date: Mon, 23 Nov 2020 13:34:05 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=BB=E5=8F=96?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=E6=95=B0=E6=8D=AE=E7=9A=84=E4=BD=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/core/test_dataset.py | 2 +- tests/core/test_utils.py | 22 ++++++++--------- tests/embeddings/test_bert_embedding.py | 16 ++++++------- tests/embeddings/test_elmo_embedding.py | 4 ++-- tests/embeddings/test_gpt2_embedding.py | 24 +++++++++---------- tests/embeddings/test_roberta_embedding.py | 24 +++++++++---------- tests/embeddings/test_static_embedding.py | 24 +++++++++---------- tests/io/loader/test_classification_loader.py | 16 ++++++------- tests/io/loader/test_conll_loader.py | 6 ++--- tests/io/loader/test_coreference_loader.py | 2 +- tests/io/loader/test_cws_loader.py | 2 +- tests/io/loader/test_matching_loader.py | 16 ++++++------- tests/io/loader/test_qa_loader.py | 4 ++-- tests/io/pipe/test_classification.py | 22 ++++++++--------- tests/io/pipe/test_conll.py | 10 ++++---- tests/io/pipe/test_coreference.py | 2 +- tests/io/pipe/test_cws.py | 4 ++-- tests/io/pipe/test_matching.py | 16 ++++++------- tests/io/pipe/test_qa.py | 2 +- tests/io/pipe/test_summary.py | 4 ++-- tests/io/test_embed_loader.py | 8 +++---- tests/models/test_bert.py | 22 ++++++++--------- tests/modules/decoder/test_CRF.py | 2 +- .../modules/tokenizer/test_bert_tokenizer.py | 2 +- tests/test_tutorials.py | 4 ++-- 25 files changed, 130 insertions(+), 130 deletions(-) diff --git a/tests/core/test_dataset.py b/tests/core/test_dataset.py index 03f24ad1..94dd3bdb 100644 --- a/tests/core/test_dataset.py +++ b/tests/core/test_dataset.py @@ -228,7 +228,7 @@ class TestDataSetMethods(unittest.TestCase): def split_sent(ins): return ins['raw_sentence'].split() csv_loader = CSVLoader(headers=['raw_sentence', 'label'], sep='\t') - data_bundle = csv_loader.load('test/data_for_tests/tutorial_sample_dataset.csv') + data_bundle = csv_loader.load('tests/data_for_tests/tutorial_sample_dataset.csv') dataset = data_bundle.datasets['train'] dataset.drop(lambda x: len(x['raw_sentence'].split()) == 0, inplace=True) dataset.apply(split_sent, new_field_name='words', is_input=True) diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index f4a29658..f43a526c 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -120,8 +120,8 @@ class TestCache(unittest.TestCase): def test_cache_save(self): try: start_time = time.time() - embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', - 'test/data_for_tests/cws_train') + embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', + 'tests/data_for_tests/cws_train') end_time = time.time() pre_time = end_time - start_time with open('test/demo1.pkl', 'rb') as f: @@ -130,8 +130,8 @@ class TestCache(unittest.TestCase): for i in range(embed.shape[0]): self.assertListEqual(embed[i].tolist(), _embed[i].tolist()) start_time = time.time() - embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', - 'test/data_for_tests/cws_train') + embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', + 'tests/data_for_tests/cws_train') end_time = time.time() read_time = end_time - start_time print("Read using {:.3f}, while prepare using:{:.3f}".format(read_time, pre_time)) @@ -142,7 +142,7 @@ class TestCache(unittest.TestCase): def test_cache_save_overwrite_path(self): try: start_time = time.time() - embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', 'test/data_for_tests/cws_train', + embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', 'tests/data_for_tests/cws_train', _cache_fp='test/demo_overwrite.pkl') end_time = time.time() pre_time = end_time - start_time @@ -152,8 +152,8 @@ class TestCache(unittest.TestCase): for i in range(embed.shape[0]): self.assertListEqual(embed[i].tolist(), _embed[i].tolist()) start_time = time.time() - embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', - 'test/data_for_tests/cws_train', + embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', + 'tests/data_for_tests/cws_train', _cache_fp='test/demo_overwrite.pkl') end_time = time.time() read_time = end_time - start_time @@ -165,8 +165,8 @@ class TestCache(unittest.TestCase): def test_cache_refresh(self): try: start_time = time.time() - embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', - 'test/data_for_tests/cws_train', + embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', + 'tests/data_for_tests/cws_train', _refresh=True) end_time = time.time() pre_time = end_time - start_time @@ -176,8 +176,8 @@ class TestCache(unittest.TestCase): for i in range(embed.shape[0]): self.assertListEqual(embed[i].tolist(), _embed[i].tolist()) start_time = time.time() - embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', - 'test/data_for_tests/cws_train', + embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', + 'tests/data_for_tests/cws_train', _refresh=True) end_time = time.time() read_time = end_time - start_time diff --git a/tests/embeddings/test_bert_embedding.py b/tests/embeddings/test_bert_embedding.py index 2e619bcb..f0104a58 100644 --- a/tests/embeddings/test_bert_embedding.py +++ b/tests/embeddings/test_bert_embedding.py @@ -32,7 +32,7 @@ class TestDownload(unittest.TestCase): class TestBertEmbedding(unittest.TestCase): def test_bert_embedding_1(self): vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInBERT".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1) + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1) requires_grad = embed.requires_grad embed.requires_grad = not requires_grad embed.train() @@ -40,14 +40,14 @@ class TestBertEmbedding(unittest.TestCase): result = embed(words) self.assertEqual(result.size(), (1, 4, 16)) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1) + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1) embed.eval() words = torch.LongTensor([[2, 3, 4, 0]]) result = embed(words) self.assertEqual(result.size(), (1, 4, 16)) # 自动截断而不报错 - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1, + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1, auto_truncate=True) words = torch.LongTensor([[2, 3, 4, 1]*10, @@ -60,7 +60,7 @@ class TestBertEmbedding(unittest.TestCase): try: os.makedirs(bert_save_test, exist_ok=True) vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInBERT".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1, + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1, auto_truncate=True) embed.save(bert_save_test) @@ -76,7 +76,7 @@ class TestBertEmbedding(unittest.TestCase): class TestBertWordPieceEncoder(unittest.TestCase): def test_bert_word_piece_encoder(self): - embed = BertWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1) + embed = BertWordPieceEncoder(model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1) ds = DataSet({'words': ["this is a test . [SEP]".split()]}) embed.index_datasets(ds, field_name='words') self.assertTrue(ds.has_field('word_pieces')) @@ -84,7 +84,7 @@ class TestBertWordPieceEncoder(unittest.TestCase): def test_bert_embed_eq_bert_piece_encoder(self): ds = DataSet({'words': ["this is a texta model vocab".split(), 'this is'.split()]}) - encoder = BertWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_bert') + encoder = BertWordPieceEncoder(model_dir_or_name='tests/data_for_tests/embedding/small_bert') encoder.eval() encoder.index_datasets(ds, field_name='words') word_pieces = torch.LongTensor(ds['word_pieces'].get([0, 1])) @@ -95,7 +95,7 @@ class TestBertWordPieceEncoder(unittest.TestCase): vocab.index_dataset(ds, field_name='words', new_field_name='words') ds.set_input('words') words = torch.LongTensor(ds['words'].get([0, 1])) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', pool_method='first', include_cls_sep=True, pooled_cls=False, min_freq=1) embed.eval() words_res = embed(words) @@ -109,7 +109,7 @@ class TestBertWordPieceEncoder(unittest.TestCase): bert_save_test = 'bert_save_test' try: os.makedirs(bert_save_test, exist_ok=True) - embed = BertWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.0, + embed = BertWordPieceEncoder(model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.0, layers='-2') ds = DataSet({'words': ["this is a test . [SEP]".split()]}) embed.index_datasets(ds, field_name='words') diff --git a/tests/embeddings/test_elmo_embedding.py b/tests/embeddings/test_elmo_embedding.py index ed6910b4..7f6f5b35 100644 --- a/tests/embeddings/test_elmo_embedding.py +++ b/tests/embeddings/test_elmo_embedding.py @@ -21,7 +21,7 @@ class TestDownload(unittest.TestCase): class TestRunElmo(unittest.TestCase): def test_elmo_embedding(self): vocab = Vocabulary().add_word_lst("This is a test .".split()) - elmo_embed = ElmoEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_elmo', layers='0,1') + elmo_embed = ElmoEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_elmo', layers='0,1') words = torch.LongTensor([[0, 1, 2]]) hidden = elmo_embed(words) print(hidden.size()) @@ -30,7 +30,7 @@ class TestRunElmo(unittest.TestCase): def test_elmo_embedding_layer_assertion(self): vocab = Vocabulary().add_word_lst("This is a test .".split()) try: - elmo_embed = ElmoEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_elmo', + elmo_embed = ElmoEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_elmo', layers='0,1,2') except AssertionError as e: print(e) diff --git a/tests/embeddings/test_gpt2_embedding.py b/tests/embeddings/test_gpt2_embedding.py index e8d0d043..070ae528 100644 --- a/tests/embeddings/test_gpt2_embedding.py +++ b/tests/embeddings/test_gpt2_embedding.py @@ -21,7 +21,7 @@ class TestGPT2Embedding(unittest.TestCase): print(embed(words).size()) def test_gpt2_embedding(self): - weight_path = 'test/data_for_tests/embedding/small_gpt2' + weight_path = 'tests/data_for_tests/embedding/small_gpt2' vocab = Vocabulary().add_word_lst("this is a texta sentence".split()) embed = GPT2Embedding(vocab, model_dir_or_name=weight_path, word_dropout=0.1) requires_grad = embed.requires_grad @@ -49,7 +49,7 @@ class TestGPT2Embedding(unittest.TestCase): def test_gpt2_ebembedding_2(self): # 测试only_use_pretrain_vocab与truncate_embed是否正常工作 Embedding = GPT2Embedding - weight_path = 'test/data_for_tests/embedding/small_gpt2' + weight_path = 'tests/data_for_tests/embedding/small_gpt2' vocab = Vocabulary().add_word_lst("this is a texta and".split()) embed1 = Embedding(vocab, model_dir_or_name=weight_path,layers=list(range(3)), only_use_pretrain_bpe=True, truncate_embed=True, min_freq=1) @@ -89,13 +89,13 @@ class TestGPT2Embedding(unittest.TestCase): def test_gpt2_tokenizer(self): from fastNLP.modules.tokenizer import GPT2Tokenizer - tokenizer = GPT2Tokenizer.from_pretrained('test/data_for_tests/embedding/small_gpt2') + tokenizer = GPT2Tokenizer.from_pretrained('tests/data_for_tests/embedding/small_gpt2') print(tokenizer.encode("this is a texta a sentence")) print(tokenizer.encode('this is')) def test_gpt2_embed_eq_gpt2_piece_encoder(self): # 主要检查一下embedding的结果与wordpieceencoder的结果是否一致 - weight_path = 'test/data_for_tests/embedding/small_gpt2' + weight_path = 'tests/data_for_tests/embedding/small_gpt2' ds = DataSet({'words': ["this is a texta a sentence".split(), 'this is'.split()]}) encoder = GPT2WordPieceEncoder(model_dir_or_name=weight_path) encoder.eval() @@ -187,7 +187,7 @@ class TestGPT2WordPieceEncoder(unittest.TestCase): print(used_pairs) import json - with open('test/data_for_tests/embedding/small_gpt2/vocab.json', 'w') as f: + with open('tests/data_for_tests/embedding/small_gpt2/vocab.json', 'w') as f: new_used_vocab = {} for idx, key in enumerate(used_vocab.keys()): new_used_vocab[key] = len(new_used_vocab) @@ -201,12 +201,12 @@ class TestGPT2WordPieceEncoder(unittest.TestCase): json.dump(new_used_vocab, f) - with open('test/data_for_tests/embedding/small_gpt2/merges.txt', 'w') as f: + with open('tests/data_for_tests/embedding/small_gpt2/merges.txt', 'w') as f: f.write('#version: small\n') for k,v in sorted(sorted(used_pairs.items(), key=lambda kv:kv[1])): f.write('{} {}\n'.format(k[0], k[1])) - new_tokenizer = GPT2Tokenizer.from_pretrained('test/data_for_tests/embedding/small_gpt2') + new_tokenizer = GPT2Tokenizer.from_pretrained('tests/data_for_tests/embedding/small_gpt2') new_all_tokens = [] for sent in [sent1, sent2, sent3]: tokens = new_tokenizer.tokenize(sent, add_prefix_space=True) @@ -227,21 +227,21 @@ class TestGPT2WordPieceEncoder(unittest.TestCase): "n_positions": 20, "vocab_size": len(new_used_vocab) } - with open('test/data_for_tests/embedding/small_gpt2/config.json', 'w') as f: + with open('tests/data_for_tests/embedding/small_gpt2/config.json', 'w') as f: json.dump(config, f) # 生成更小的merges.txt与vocab.json, 方法是通过记录tokenizer中的值实现 from fastNLP.modules.encoder.gpt2 import GPT2LMHeadModel, GPT2Config - config = GPT2Config.from_pretrained('test/data_for_tests/embedding/small_gpt2') + config = GPT2Config.from_pretrained('tests/data_for_tests/embedding/small_gpt2') model = GPT2LMHeadModel(config) - torch.save(model.state_dict(), 'test/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin') + torch.save(model.state_dict(), 'tests/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin') print(model(torch.LongTensor([[0,1,2,3]]))) def test_gpt2_word_piece_encoder(self): # 主要检查可以运行 - weight_path = 'test/data_for_tests/embedding/small_gpt2' + weight_path = 'tests/data_for_tests/embedding/small_gpt2' ds = DataSet({'words': ["this is a test sentence".split()]}) embed = GPT2WordPieceEncoder(model_dir_or_name=weight_path, word_dropout=0.1) embed.index_datasets(ds, field_name='words') @@ -256,7 +256,7 @@ class TestGPT2WordPieceEncoder(unittest.TestCase): @unittest.skipIf('TRAVIS' in os.environ, "Skip in travis") def test_generate(self): - # weight_path = 'test/data_for_tests/embedding/small_gpt2' + # weight_path = 'tests/data_for_tests/embedding/small_gpt2' weight_path = 'en' encoder = GPT2WordPieceEncoder(model_dir_or_name=weight_path, language_model=True) diff --git a/tests/embeddings/test_roberta_embedding.py b/tests/embeddings/test_roberta_embedding.py index 7eba2644..d4874a0b 100644 --- a/tests/embeddings/test_roberta_embedding.py +++ b/tests/embeddings/test_roberta_embedding.py @@ -24,7 +24,7 @@ class TestRobertWordPieceEncoder(unittest.TestCase): def test_robert_word_piece_encoder(self): # 可正常运行即可 - weight_path = 'test/data_for_tests/embedding/small_roberta' + weight_path = 'tests/data_for_tests/embedding/small_roberta' encoder = RobertaWordPieceEncoder(model_dir_or_name=weight_path, word_dropout=0.1) ds = DataSet({'words': ["this is a test . [SEP]".split()]}) encoder.index_datasets(ds, field_name='words') @@ -33,7 +33,7 @@ class TestRobertWordPieceEncoder(unittest.TestCase): def test_roberta_embed_eq_roberta_piece_encoder(self): # 主要检查一下embedding的结果与wordpieceencoder的结果是否一致 - weight_path = 'test/data_for_tests/embedding/small_roberta' + weight_path = 'tests/data_for_tests/embedding/small_roberta' ds = DataSet({'words': ["this is a texta a sentence".split(), 'this is'.split()]}) encoder = RobertaWordPieceEncoder(model_dir_or_name=weight_path) encoder.eval() @@ -120,7 +120,7 @@ class TestRobertWordPieceEncoder(unittest.TestCase): used_vocab.update({t:i for t,i in zip(tokens, token_ids)}) import json - with open('test/data_for_tests/embedding/small_roberta/vocab.json', 'w') as f: + with open('tests/data_for_tests/embedding/small_roberta/vocab.json', 'w') as f: new_used_vocab = {} for token in ['', '', '', '', '']: # 必须为1 new_used_vocab[token] = len(new_used_vocab) @@ -135,7 +135,7 @@ class TestRobertWordPieceEncoder(unittest.TestCase): new_used_vocab[key] = len(new_used_vocab) json.dump(new_used_vocab, f) - with open('test/data_for_tests/embedding/small_roberta/merges.txt', 'w') as f: + with open('tests/data_for_tests/embedding/small_roberta/merges.txt', 'w') as f: f.write('#version: tiny\n') for k,v in sorted(sorted(used_pairs.items(), key=lambda kv:kv[1])): f.write('{} {}\n'.format(k[0], k[1])) @@ -162,10 +162,10 @@ class TestRobertWordPieceEncoder(unittest.TestCase): "type_vocab_size": 1, "vocab_size": len(new_used_vocab) } - with open('test/data_for_tests/embedding/small_roberta/config.json', 'w') as f: + with open('tests/data_for_tests/embedding/small_roberta/config.json', 'w') as f: json.dump(config, f) - new_tokenizer = RobertaTokenizer.from_pretrained('test/data_for_tests/embedding/small_roberta') + new_tokenizer = RobertaTokenizer.from_pretrained('tests/data_for_tests/embedding/small_roberta') new_all_tokens = [] for sent in [sent1, sent2, sent3]: tokens = new_tokenizer.tokenize(sent, add_prefix_space=True) @@ -177,17 +177,17 @@ class TestRobertWordPieceEncoder(unittest.TestCase): # 生成更小的merges.txt与vocab.json, 方法是通过记录tokenizer中的值实现 from fastNLP.modules.encoder.roberta import RobertaModel, BertConfig - config = BertConfig.from_json_file('test/data_for_tests/embedding/small_roberta/config.json') + config = BertConfig.from_json_file('tests/data_for_tests/embedding/small_roberta/config.json') model = RobertaModel(config) - torch.save(model.state_dict(), 'test/data_for_tests/embedding/small_roberta/small_pytorch_model.bin') + torch.save(model.state_dict(), 'tests/data_for_tests/embedding/small_roberta/small_pytorch_model.bin') print(model(torch.LongTensor([[0,1,2,3]]))) def test_save_load(self): bert_save_test = 'roberta_save_test' try: os.makedirs(bert_save_test, exist_ok=True) - embed = RobertaWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_roberta', word_dropout=0.0, + embed = RobertaWordPieceEncoder(model_dir_or_name='tests/data_for_tests/embedding/small_roberta', word_dropout=0.0, layers='-2') ds = DataSet({'words': ["this is a test . [SEP]".split()]}) embed.index_datasets(ds, field_name='words') @@ -204,7 +204,7 @@ class TestRobertWordPieceEncoder(unittest.TestCase): class TestRobertaEmbedding(unittest.TestCase): def test_roberta_embedding_1(self): - weight_path = 'test/data_for_tests/embedding/small_roberta' + weight_path = 'tests/data_for_tests/embedding/small_roberta' vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInRoberta".split()) embed = RobertaEmbedding(vocab, model_dir_or_name=weight_path, word_dropout=0.1) requires_grad = embed.requires_grad @@ -224,7 +224,7 @@ class TestRobertaEmbedding(unittest.TestCase): def test_roberta_ebembedding_2(self): # 测试only_use_pretrain_vocab与truncate_embed是否正常工作 Embedding = RobertaEmbedding - weight_path = 'test/data_for_tests/embedding/small_roberta' + weight_path = 'tests/data_for_tests/embedding/small_roberta' vocab = Vocabulary().add_word_lst("this is a texta and".split()) embed1 = Embedding(vocab, model_dir_or_name=weight_path, layers=list(range(3)), only_use_pretrain_bpe=True, truncate_embed=True, min_freq=1) @@ -266,7 +266,7 @@ class TestRobertaEmbedding(unittest.TestCase): try: os.makedirs(bert_save_test, exist_ok=True) vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInBERT".split()) - embed = RobertaEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_roberta', + embed = RobertaEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_roberta', word_dropout=0.1, auto_truncate=True) embed.save(bert_save_test) diff --git a/tests/embeddings/test_static_embedding.py b/tests/embeddings/test_static_embedding.py index 2b10a2d0..90519338 100644 --- a/tests/embeddings/test_static_embedding.py +++ b/tests/embeddings/test_static_embedding.py @@ -10,7 +10,7 @@ class TestLoad(unittest.TestCase): def test_norm1(self): # 测试只对可以找到的norm vocab = Vocabulary().add_word_lst(['the', 'a', 'notinfile']) - embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/' + embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt', only_norm_found_vector=True) self.assertEqual(round(torch.norm(embed(torch.LongTensor([[2]]))).item(), 4), 1) @@ -19,7 +19,7 @@ class TestLoad(unittest.TestCase): def test_norm2(self): # 测试对所有都norm vocab = Vocabulary().add_word_lst(['the', 'a', 'notinfile']) - embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/' + embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt', normalize=True) self.assertEqual(round(torch.norm(embed(torch.LongTensor([[2]]))).item(), 4), 1) @@ -50,13 +50,13 @@ class TestLoad(unittest.TestCase): v2 = embed_dict[word] for v1i, v2i in zip(v1, v2): self.assertAlmostEqual(v1i, v2i, places=4) - embed_dict = read_static_embed('test/data_for_tests/embedding/small_static_embedding/' + embed_dict = read_static_embed('tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt') # 测试是否只使用pretrain的word vocab = Vocabulary().add_word_lst(['the', 'a', 'notinfile']) vocab.add_word('of', no_create_entry=True) - embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/' + embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt', only_use_pretrain_word=True) # notinfile应该被置为unk @@ -66,13 +66,13 @@ class TestLoad(unittest.TestCase): # 测试在大小写情况下的使用 vocab = Vocabulary().add_word_lst(['The', 'a', 'notinfile']) vocab.add_word('Of', no_create_entry=True) - embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/' + embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt', only_use_pretrain_word=True) check_word_unk(['The', 'Of', 'notinfile'], vocab, embed) # 这些词应该找不到 check_vector_equal(['a'], vocab, embed, embed_dict) - embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/' + embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt', only_use_pretrain_word=True, lower=True) check_vector_equal(['The', 'Of', 'a'], vocab, embed, embed_dict, lower=True) @@ -82,7 +82,7 @@ class TestLoad(unittest.TestCase): vocab = Vocabulary().add_word_lst(['The', 'a', 'notinfile1', 'A', 'notinfile2', 'notinfile2']) vocab.add_word('Of', no_create_entry=True) - embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/' + embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt', only_use_pretrain_word=True, lower=True, min_freq=2, only_train_min_freq=True) @@ -92,12 +92,12 @@ class TestLoad(unittest.TestCase): def test_sequential_index(self): # 当不存在no_create_entry时,words_to_words应该是顺序的 vocab = Vocabulary().add_word_lst(['The', 'a', 'notinfile1', 'A', 'notinfile2', 'notinfile2']) - embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/' + embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt') for index,i in enumerate(embed.words_to_words): assert index==i - embed_dict = read_static_embed('test/data_for_tests/embedding/small_static_embedding/' + embed_dict = read_static_embed('tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt') for word, index in vocab: @@ -116,7 +116,7 @@ class TestLoad(unittest.TestCase): vocab = Vocabulary().add_word_lst(['The', 'a', 'notinfile1', 'A']) vocab.add_word_lst(['notinfile2', 'notinfile2'], no_create_entry=True) - embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/' + embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt') embed.save(static_test_folder) load_embed = StaticEmbedding.load(static_test_folder) @@ -125,7 +125,7 @@ class TestLoad(unittest.TestCase): # 测试不包含no_create_entry vocab = Vocabulary().add_word_lst(['The', 'a', 'notinfile1', 'A']) - embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/' + embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt') embed.save(static_test_folder) load_embed = StaticEmbedding.load(static_test_folder) @@ -134,7 +134,7 @@ class TestLoad(unittest.TestCase): # 测试lower, min_freq vocab = Vocabulary().add_word_lst(['The', 'the', 'the', 'A', 'a', 'B']) - embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/' + embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/' 'glove.6B.50d_test.txt', min_freq=2, lower=True) embed.save(static_test_folder) load_embed = StaticEmbedding.load(static_test_folder) diff --git a/tests/io/loader/test_classification_loader.py b/tests/io/loader/test_classification_loader.py index 6ed8eb15..836e24e4 100644 --- a/tests/io/loader/test_classification_loader.py +++ b/tests/io/loader/test_classification_loader.py @@ -23,14 +23,14 @@ class TestDownload(unittest.TestCase): class TestLoad(unittest.TestCase): def test_process_from_file(self): data_set_dict = { - 'yelp.p': ('test/data_for_tests/io/yelp_review_polarity', YelpPolarityLoader, (6, 6, 6), False), - 'yelp.f': ('test/data_for_tests/io/yelp_review_full', YelpFullLoader, (6, 6, 6), False), - 'sst-2': ('test/data_for_tests/io/SST-2', SST2Loader, (5, 5, 5), True), - 'sst': ('test/data_for_tests/io/SST', SSTLoader, (6, 6, 6), False), - 'imdb': ('test/data_for_tests/io/imdb', IMDBLoader, (6, 6, 6), False), - 'ChnSentiCorp': ('test/data_for_tests/io/ChnSentiCorp', ChnSentiCorpLoader, (6, 6, 6), False), - 'THUCNews': ('test/data_for_tests/io/THUCNews', THUCNewsLoader, (9, 9, 9), False), - 'WeiboSenti100k': ('test/data_for_tests/io/WeiboSenti100k', WeiboSenti100kLoader, (6, 7, 6), False), + 'yelp.p': ('tests/data_for_tests/io/yelp_review_polarity', YelpPolarityLoader, (6, 6, 6), False), + 'yelp.f': ('tests/data_for_tests/io/yelp_review_full', YelpFullLoader, (6, 6, 6), False), + 'sst-2': ('tests/data_for_tests/io/SST-2', SST2Loader, (5, 5, 5), True), + 'sst': ('tests/data_for_tests/io/SST', SSTLoader, (6, 6, 6), False), + 'imdb': ('tests/data_for_tests/io/imdb', IMDBLoader, (6, 6, 6), False), + 'ChnSentiCorp': ('tests/data_for_tests/io/ChnSentiCorp', ChnSentiCorpLoader, (6, 6, 6), False), + 'THUCNews': ('tests/data_for_tests/io/THUCNews', THUCNewsLoader, (9, 9, 9), False), + 'WeiboSenti100k': ('tests/data_for_tests/io/WeiboSenti100k', WeiboSenti100kLoader, (6, 7, 6), False), } for k, v in data_set_dict.items(): path, loader, data_set, warns = v diff --git a/tests/io/loader/test_conll_loader.py b/tests/io/loader/test_conll_loader.py index bf0ebb47..87ea57c3 100644 --- a/tests/io/loader/test_conll_loader.py +++ b/tests/io/loader/test_conll_loader.py @@ -27,12 +27,12 @@ class TestWeiboNER(unittest.TestCase): class TestConll2003Loader(unittest.TestCase): def test_load(self): - Conll2003Loader()._load('test/data_for_tests/conll_2003_example.txt') + Conll2003Loader()._load('tests/data_for_tests/conll_2003_example.txt') class TestConllLoader(unittest.TestCase): def test_conll(self): - db = Conll2003Loader().load('test/data_for_tests/io/conll2003') + db = Conll2003Loader().load('tests/data_for_tests/io/conll2003') print(db) class TestConllLoader(unittest.TestCase): @@ -40,5 +40,5 @@ class TestConllLoader(unittest.TestCase): headers = [ 'raw_words', 'ner', ] - db = ConllLoader(headers = headers,sep="\n").load('test/data_for_tests/io/MSRA_NER') + db = ConllLoader(headers = headers,sep="\n").load('tests/data_for_tests/io/MSRA_NER') print(db) diff --git a/tests/io/loader/test_coreference_loader.py b/tests/io/loader/test_coreference_loader.py index 02f3a1c5..50f27e39 100644 --- a/tests/io/loader/test_coreference_loader.py +++ b/tests/io/loader/test_coreference_loader.py @@ -5,7 +5,7 @@ import unittest class TestCR(unittest.TestCase): def test_load(self): - test_root = "test/data_for_tests/io/coreference/" + test_root = "tests/data_for_tests/io/coreference/" train_path = test_root+"coreference_train.json" dev_path = test_root+"coreference_dev.json" test_path = test_root+"coreference_test.json" diff --git a/tests/io/loader/test_cws_loader.py b/tests/io/loader/test_cws_loader.py index 80ca0406..e17d0e0d 100644 --- a/tests/io/loader/test_cws_loader.py +++ b/tests/io/loader/test_cws_loader.py @@ -19,6 +19,6 @@ class TestRunCWSLoader(unittest.TestCase): for dataset_name in dataset_names: with self.subTest(dataset_name=dataset_name): data_bundle = CWSLoader(dataset_name=dataset_name).load( - f'test/data_for_tests/io/cws_{dataset_name}' + f'tests/data_for_tests/io/cws_{dataset_name}' ) print(data_bundle) diff --git a/tests/io/loader/test_matching_loader.py b/tests/io/loader/test_matching_loader.py index 30ace410..6c7059da 100644 --- a/tests/io/loader/test_matching_loader.py +++ b/tests/io/loader/test_matching_loader.py @@ -25,14 +25,14 @@ class TestMatchingDownload(unittest.TestCase): class TestMatchingLoad(unittest.TestCase): def test_load(self): data_set_dict = { - 'RTE': ('test/data_for_tests/io/RTE', RTELoader, (5, 5, 5), True), - 'SNLI': ('test/data_for_tests/io/SNLI', SNLILoader, (5, 5, 5), False), - 'QNLI': ('test/data_for_tests/io/QNLI', QNLILoader, (5, 5, 5), True), - 'MNLI': ('test/data_for_tests/io/MNLI', MNLILoader, (5, 5, 5, 5, 6), True), - 'Quora': ('test/data_for_tests/io/Quora', QuoraLoader, (2, 2, 2), False), - 'BQCorpus': ('test/data_for_tests/io/BQCorpus', BQCorpusLoader, (5, 5, 5), False), - 'XNLI': ('test/data_for_tests/io/XNLI', CNXNLILoader, (6, 6, 8), False), - 'LCQMC': ('test/data_for_tests/io/LCQMC', LCQMCLoader, (6, 5, 6), False), + 'RTE': ('tests/data_for_tests/io/RTE', RTELoader, (5, 5, 5), True), + 'SNLI': ('tests/data_for_tests/io/SNLI', SNLILoader, (5, 5, 5), False), + 'QNLI': ('tests/data_for_tests/io/QNLI', QNLILoader, (5, 5, 5), True), + 'MNLI': ('tests/data_for_tests/io/MNLI', MNLILoader, (5, 5, 5, 5, 6), True), + 'Quora': ('tests/data_for_tests/io/Quora', QuoraLoader, (2, 2, 2), False), + 'BQCorpus': ('tests/data_for_tests/io/BQCorpus', BQCorpusLoader, (5, 5, 5), False), + 'XNLI': ('tests/data_for_tests/io/XNLI', CNXNLILoader, (6, 6, 8), False), + 'LCQMC': ('tests/data_for_tests/io/LCQMC', LCQMCLoader, (6, 5, 6), False), } for k, v in data_set_dict.items(): path, loader, instance, warns = v diff --git a/tests/io/loader/test_qa_loader.py b/tests/io/loader/test_qa_loader.py index eea067cd..99a504c5 100644 --- a/tests/io/loader/test_qa_loader.py +++ b/tests/io/loader/test_qa_loader.py @@ -5,10 +5,10 @@ from fastNLP.io.loader.qa import CMRC2018Loader class TestCMRC2018Loader(unittest.TestCase): def test__load(self): loader = CMRC2018Loader() - dataset = loader._load('test/data_for_tests/io/cmrc/train.json') + dataset = loader._load('tests/data_for_tests/io/cmrc/train.json') print(dataset) def test_load(self): loader = CMRC2018Loader() - data_bundle = loader.load('test/data_for_tests/io/cmrc/') + data_bundle = loader.load('tests/data_for_tests/io/cmrc/') print(data_bundle) diff --git a/tests/io/pipe/test_classification.py b/tests/io/pipe/test_classification.py index 8ebdb2df..e3200a1a 100644 --- a/tests/io/pipe/test_classification.py +++ b/tests/io/pipe/test_classification.py @@ -20,7 +20,7 @@ class TestClassificationPipe(unittest.TestCase): class TestRunPipe(unittest.TestCase): def test_load(self): for pipe in [IMDBPipe]: - data_bundle = pipe(tokenizer='raw').process_from_file('test/data_for_tests/io/imdb') + data_bundle = pipe(tokenizer='raw').process_from_file('tests/data_for_tests/io/imdb') print(data_bundle) @@ -37,35 +37,35 @@ class TestCNClassificationPipe(unittest.TestCase): class TestRunClassificationPipe(unittest.TestCase): def test_process_from_file(self): data_set_dict = { - 'yelp.p': ('test/data_for_tests/io/yelp_review_polarity', YelpPolarityPipe, + 'yelp.p': ('tests/data_for_tests/io/yelp_review_polarity', YelpPolarityPipe, {'train': 6, 'dev': 6, 'test': 6}, {'words': 1176, 'target': 2}, False), - 'yelp.f': ('test/data_for_tests/io/yelp_review_full', YelpFullPipe, + 'yelp.f': ('tests/data_for_tests/io/yelp_review_full', YelpFullPipe, {'train': 6, 'dev': 6, 'test': 6}, {'words': 1166, 'target': 5}, False), - 'sst-2': ('test/data_for_tests/io/SST-2', SST2Pipe, + 'sst-2': ('tests/data_for_tests/io/SST-2', SST2Pipe, {'train': 5, 'dev': 5, 'test': 5}, {'words': 139, 'target': 2}, True), - 'sst': ('test/data_for_tests/io/SST', SSTPipe, + 'sst': ('tests/data_for_tests/io/SST', SSTPipe, {'train': 354, 'dev': 6, 'test': 6}, {'words': 232, 'target': 5}, False), - 'imdb': ('test/data_for_tests/io/imdb', IMDBPipe, + 'imdb': ('tests/data_for_tests/io/imdb', IMDBPipe, {'train': 6, 'dev': 6, 'test': 6}, {'words': 1670, 'target': 2}, False), - 'ag': ('test/data_for_tests/io/ag', AGsNewsPipe, + 'ag': ('tests/data_for_tests/io/ag', AGsNewsPipe, {'train': 4, 'test': 5}, {'words': 257, 'target': 4}, False), - 'dbpedia': ('test/data_for_tests/io/dbpedia', DBPediaPipe, + 'dbpedia': ('tests/data_for_tests/io/dbpedia', DBPediaPipe, {'train': 14, 'test': 5}, {'words': 496, 'target': 14}, False), - 'ChnSentiCorp': ('test/data_for_tests/io/ChnSentiCorp', ChnSentiCorpPipe, + 'ChnSentiCorp': ('tests/data_for_tests/io/ChnSentiCorp', ChnSentiCorpPipe, {'train': 6, 'dev': 6, 'test': 6}, {'chars': 529, 'bigrams': 1296, 'trigrams': 1483, 'target': 2}, False), - 'Chn-THUCNews': ('test/data_for_tests/io/THUCNews', THUCNewsPipe, + 'Chn-THUCNews': ('tests/data_for_tests/io/THUCNews', THUCNewsPipe, {'train': 9, 'dev': 9, 'test': 9}, {'chars': 1864, 'target': 9}, False), - 'Chn-WeiboSenti100k': ('test/data_for_tests/io/WeiboSenti100k', WeiboSenti100kPipe, + 'Chn-WeiboSenti100k': ('tests/data_for_tests/io/WeiboSenti100k', WeiboSenti100kPipe, {'train': 6, 'dev': 6, 'test': 7}, {'chars': 452, 'target': 2}, False), } diff --git a/tests/io/pipe/test_conll.py b/tests/io/pipe/test_conll.py index ad41ae18..30d5b48f 100644 --- a/tests/io/pipe/test_conll.py +++ b/tests/io/pipe/test_conll.py @@ -21,7 +21,7 @@ class TestRunPipe(unittest.TestCase): for pipe in [Conll2003Pipe, Conll2003NERPipe]: with self.subTest(pipe=pipe): print(pipe) - data_bundle = pipe().process_from_file('test/data_for_tests/conll_2003_example.txt') + data_bundle = pipe().process_from_file('tests/data_for_tests/conll_2003_example.txt') print(data_bundle) @@ -35,18 +35,18 @@ class TestNERPipe(unittest.TestCase): for k, v in data_dict.items(): pipe = v with self.subTest(pipe=pipe): - data_bundle = pipe(bigrams=True, trigrams=True).process_from_file(f'test/data_for_tests/io/{k}') + data_bundle = pipe(bigrams=True, trigrams=True).process_from_file(f'tests/data_for_tests/io/{k}') print(data_bundle) - data_bundle = pipe(encoding_type='bioes').process_from_file(f'test/data_for_tests/io/{k}') + data_bundle = pipe(encoding_type='bioes').process_from_file(f'tests/data_for_tests/io/{k}') print(data_bundle) class TestConll2003Pipe(unittest.TestCase): def test_conll(self): with self.assertWarns(Warning): - data_bundle = Conll2003Pipe().process_from_file('test/data_for_tests/io/conll2003') + data_bundle = Conll2003Pipe().process_from_file('tests/data_for_tests/io/conll2003') print(data_bundle) def test_OntoNotes(self): - data_bundle = OntoNotesNERPipe().process_from_file('test/data_for_tests/io/OntoNotes') + data_bundle = OntoNotesNERPipe().process_from_file('tests/data_for_tests/io/OntoNotes') print(data_bundle) diff --git a/tests/io/pipe/test_coreference.py b/tests/io/pipe/test_coreference.py index 3a492419..784f6954 100644 --- a/tests/io/pipe/test_coreference.py +++ b/tests/io/pipe/test_coreference.py @@ -11,7 +11,7 @@ class TestCR(unittest.TestCase): char_path = None config = Config() - file_root_path = "test/data_for_tests/io/coreference/" + file_root_path = "tests/data_for_tests/io/coreference/" train_path = file_root_path + "coreference_train.json" dev_path = file_root_path + "coreference_dev.json" test_path = file_root_path + "coreference_test.json" diff --git a/tests/io/pipe/test_cws.py b/tests/io/pipe/test_cws.py index f3a95596..ef50907f 100644 --- a/tests/io/pipe/test_cws.py +++ b/tests/io/pipe/test_cws.py @@ -31,11 +31,11 @@ class TestRunCWSPipe(unittest.TestCase): for dataset_name in dataset_names: with self.subTest(dataset_name=dataset_name): data_bundle = CWSPipe(bigrams=True, trigrams=True).\ - process_from_file(f'test/data_for_tests/io/cws_{dataset_name}') + process_from_file(f'tests/data_for_tests/io/cws_{dataset_name}') print(data_bundle) def test_replace_number(self): data_bundle = CWSPipe(bigrams=True, replace_num_alpha=True).\ - process_from_file(f'test/data_for_tests/io/cws_pku') + process_from_file(f'tests/data_for_tests/io/cws_pku') for word in ['<', '>', '']: self.assertNotEqual(data_bundle.get_vocab('chars').to_index(word), 1) diff --git a/tests/io/pipe/test_matching.py b/tests/io/pipe/test_matching.py index 92993690..23f450db 100644 --- a/tests/io/pipe/test_matching.py +++ b/tests/io/pipe/test_matching.py @@ -33,13 +33,13 @@ class TestRunMatchingPipe(unittest.TestCase): def test_load(self): data_set_dict = { - 'RTE': ('test/data_for_tests/io/RTE', RTEPipe, RTEBertPipe, (5, 5, 5), (449, 2), True), - 'SNLI': ('test/data_for_tests/io/SNLI', SNLIPipe, SNLIBertPipe, (5, 5, 5), (110, 3), False), - 'QNLI': ('test/data_for_tests/io/QNLI', QNLIPipe, QNLIBertPipe, (5, 5, 5), (372, 2), True), - 'MNLI': ('test/data_for_tests/io/MNLI', MNLIPipe, MNLIBertPipe, (5, 5, 5, 5, 6), (459, 3), True), - 'BQCorpus': ('test/data_for_tests/io/BQCorpus', BQCorpusPipe, BQCorpusBertPipe, (5, 5, 5), (32, 2), False), - 'XNLI': ('test/data_for_tests/io/XNLI', CNXNLIPipe, CNXNLIBertPipe, (6, 6, 8), (39, 3), False), - 'LCQMC': ('test/data_for_tests/io/LCQMC', LCQMCPipe, LCQMCBertPipe, (6, 5, 6), (36, 2), False), + 'RTE': ('tests/data_for_tests/io/RTE', RTEPipe, RTEBertPipe, (5, 5, 5), (449, 2), True), + 'SNLI': ('tests/data_for_tests/io/SNLI', SNLIPipe, SNLIBertPipe, (5, 5, 5), (110, 3), False), + 'QNLI': ('tests/data_for_tests/io/QNLI', QNLIPipe, QNLIBertPipe, (5, 5, 5), (372, 2), True), + 'MNLI': ('tests/data_for_tests/io/MNLI', MNLIPipe, MNLIBertPipe, (5, 5, 5, 5, 6), (459, 3), True), + 'BQCorpus': ('tests/data_for_tests/io/BQCorpus', BQCorpusPipe, BQCorpusBertPipe, (5, 5, 5), (32, 2), False), + 'XNLI': ('tests/data_for_tests/io/XNLI', CNXNLIPipe, CNXNLIBertPipe, (6, 6, 8), (39, 3), False), + 'LCQMC': ('tests/data_for_tests/io/LCQMC', LCQMCPipe, LCQMCBertPipe, (6, 5, 6), (36, 2), False), } for k, v in data_set_dict.items(): path, pipe1, pipe2, data_set, vocab, warns = v @@ -76,7 +76,7 @@ class TestRunMatchingPipe(unittest.TestCase): def test_spacy(self): data_set_dict = { - 'Quora': ('test/data_for_tests/io/Quora', QuoraPipe, QuoraBertPipe, (2, 2, 2), (93, 2)), + 'Quora': ('tests/data_for_tests/io/Quora', QuoraPipe, QuoraBertPipe, (2, 2, 2), (93, 2)), } for k, v in data_set_dict.items(): path, pipe1, pipe2, data_set, vocab = v diff --git a/tests/io/pipe/test_qa.py b/tests/io/pipe/test_qa.py index ad6581f9..db2245fc 100644 --- a/tests/io/pipe/test_qa.py +++ b/tests/io/pipe/test_qa.py @@ -6,7 +6,7 @@ from fastNLP.io.loader.qa import CMRC2018Loader class CMRC2018PipeTest(unittest.TestCase): def test_process(self): - data_bundle = CMRC2018Loader().load('test/data_for_tests/io/cmrc/') + data_bundle = CMRC2018Loader().load('tests/data_for_tests/io/cmrc/') pipe = CMRC2018BertPipe() data_bundle = pipe.process(data_bundle) diff --git a/tests/io/pipe/test_summary.py b/tests/io/pipe/test_summary.py index 32508a15..03d92214 100644 --- a/tests/io/pipe/test_summary.py +++ b/tests/io/pipe/test_summary.py @@ -27,9 +27,9 @@ from fastNLP.io.pipe.summarization import ExtCNNDMPipe class TestRunExtCNNDMPipe(unittest.TestCase): def test_load(self): - data_dir = 'test/data_for_tests/io/cnndm' + data_dir = 'tests/data_for_tests/io/cnndm' vocab_size = 100000 - VOCAL_FILE = 'test/data_for_tests/io/cnndm/vocab' + VOCAL_FILE = 'tests/data_for_tests/io/cnndm/vocab' sent_max_len = 100 doc_max_timesteps = 50 dbPipe = ExtCNNDMPipe(vocab_size=vocab_size, diff --git a/tests/io/test_embed_loader.py b/tests/io/test_embed_loader.py index 70b367ec..7c8abc77 100644 --- a/tests/io/test_embed_loader.py +++ b/tests/io/test_embed_loader.py @@ -8,8 +8,8 @@ from fastNLP.io import EmbedLoader class TestEmbedLoader(unittest.TestCase): def test_load_with_vocab(self): vocab = Vocabulary() - glove = "test/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt" - word2vec = "test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt" + glove = "tests/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt" + word2vec = "tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt" vocab.add_word('the') vocab.add_word('none') g_m = EmbedLoader.load_with_vocab(glove, vocab) @@ -20,8 +20,8 @@ class TestEmbedLoader(unittest.TestCase): def test_load_without_vocab(self): words = ['the', 'of', 'in', 'a', 'to', 'and'] - glove = "test/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt" - word2vec = "test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt" + glove = "tests/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt" + word2vec = "tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt" g_m, vocab = EmbedLoader.load_without_vocab(glove) self.assertEqual(g_m.shape, (8, 50)) for word in words: diff --git a/tests/models/test_bert.py b/tests/models/test_bert.py index c3ba9454..58178bff 100644 --- a/tests/models/test_bert.py +++ b/tests/models/test_bert.py @@ -11,7 +11,7 @@ from fastNLP.embeddings.bert_embedding import BertEmbedding class TestBert(unittest.TestCase): def test_bert_1(self): vocab = Vocabulary().add_word_lst("this is a test .".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=True) model = BertForSequenceClassification(embed, 2) @@ -30,7 +30,7 @@ class TestBert(unittest.TestCase): def test_bert_1_w(self): vocab = Vocabulary().add_word_lst("this is a test .".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=False) with self.assertWarns(Warning): @@ -46,7 +46,7 @@ class TestBert(unittest.TestCase): def test_bert_2(self): vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=True) model = BertForMultipleChoice(embed, 2) @@ -62,7 +62,7 @@ class TestBert(unittest.TestCase): def test_bert_2_w(self): vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=False) with self.assertWarns(Warning): @@ -79,7 +79,7 @@ class TestBert(unittest.TestCase): def test_bert_3(self): vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=False) model = BertForTokenClassification(embed, 7) @@ -93,7 +93,7 @@ class TestBert(unittest.TestCase): def test_bert_3_w(self): vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=True) with self.assertWarns(Warning): @@ -108,7 +108,7 @@ class TestBert(unittest.TestCase): def test_bert_4(self): vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=False) model = BertForQuestionAnswering(embed) @@ -126,12 +126,12 @@ class TestBert(unittest.TestCase): from fastNLP.io import CMRC2018BertPipe from fastNLP import Trainer - data_bundle = CMRC2018BertPipe().process_from_file('test/data_for_tests/io/cmrc') + data_bundle = CMRC2018BertPipe().process_from_file('tests/data_for_tests/io/cmrc') data_bundle.rename_field('chars', 'words') train_data = data_bundle.get_dataset('train') vocab = data_bundle.get_vocab('words') - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=False, auto_truncate=True) model = BertForQuestionAnswering(embed) loss = CMRC2018Loss() @@ -142,7 +142,7 @@ class TestBert(unittest.TestCase): def test_bert_5(self): vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=True) model = BertForSentenceMatching(embed) @@ -156,7 +156,7 @@ class TestBert(unittest.TestCase): def test_bert_5_w(self): vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split()) - embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', + embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=False) with self.assertWarns(Warning): diff --git a/tests/modules/decoder/test_CRF.py b/tests/modules/decoder/test_CRF.py index 55548a41..adac3c40 100644 --- a/tests/modules/decoder/test_CRF.py +++ b/tests/modules/decoder/test_CRF.py @@ -223,7 +223,7 @@ class TestCRF(unittest.TestCase): import torch from fastNLP import seq_len_to_mask - with open('test/data_for_tests/modules/decoder/crf.json', 'r') as f: + with open('tests/data_for_tests/modules/decoder/crf.json', 'r') as f: data = json.load(f) bio_logits = torch.FloatTensor(data['bio_logits']) diff --git a/tests/modules/tokenizer/test_bert_tokenizer.py b/tests/modules/tokenizer/test_bert_tokenizer.py index 3c395164..441e7658 100644 --- a/tests/modules/tokenizer/test_bert_tokenizer.py +++ b/tests/modules/tokenizer/test_bert_tokenizer.py @@ -5,7 +5,7 @@ from fastNLP.modules.tokenizer import BertTokenizer class TestBertTokenizer(unittest.TestCase): def test_run(self): # 测试支持的两种encode方式 - tokenizer = BertTokenizer.from_pretrained('test/data_for_tests/embedding/small_bert') + tokenizer = BertTokenizer.from_pretrained('tests/data_for_tests/embedding/small_bert') tokens1 = tokenizer.encode("This is a demo") tokens2 = tokenizer.encode("This is a demo", add_special_tokens=False) diff --git a/tests/test_tutorials.py b/tests/test_tutorials.py index aa7c4a60..2a224f05 100644 --- a/tests/test_tutorials.py +++ b/tests/test_tutorials.py @@ -85,7 +85,7 @@ class TestTutorial(unittest.TestCase): class TestOldTutorial(unittest.TestCase): def test_fastnlp_10min_tutorial(self): # 从csv读取数据到DataSet - sample_path = "test/data_for_tests/tutorial_sample_dataset.csv" + sample_path = "tests/data_for_tests/tutorial_sample_dataset.csv" dataset = CSVLoader(headers=['raw_sentence', 'label'], sep=' ')._load(sample_path) print(len(dataset)) print(dataset[0]) @@ -183,7 +183,7 @@ class TestOldTutorial(unittest.TestCase): def test_fastnlp_1min_tutorial(self): # tutorials/fastnlp_1min_tutorial.ipynb - data_path = "test/data_for_tests/tutorial_sample_dataset.csv" + data_path = "tests/data_for_tests/tutorial_sample_dataset.csv" ds = CSVLoader(headers=['raw_sentence', 'label'], sep=' ')._load(data_path) print(ds[1]) -- Gitee