diff --git a/.Jenkinsfile b/.Jenkinsfile
index 7c0a64fd39f4402fa0f5f8536788161beca01091..87c286ee7a8b7a14575fc10492e38768c829c3de 100644
--- a/.Jenkinsfile
+++ b/.Jenkinsfile
@@ -29,7 +29,7 @@ pipeline {
                     steps {
                         sh 'python -m spacy download en'
                         sh 'pip install fitlog'
-                        sh 'pytest ./test --html=test_results.html --self-contained-html'
+                        sh 'pytest ./tests --html=test_results.html --self-contained-html'
                     }
                 }
             }
diff --git a/.travis.yml b/.travis.yml
index 85bac41ea10a0cfcac16a078da7d51ecfd184a11..9c1ff4d3892e790567b7cf65eb16f376b16efd7a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,7 +14,7 @@ install:
 # command to run tests
 script:
   - python -m spacy download en
-  - pytest --cov=fastNLP test/
+  - pytest --cov=fastNLP tests/
 
 after_success:
   - bash <(curl -s https://codecov.io/bash)
diff --git a/MANIFEST.in b/MANIFEST.in
index d893b45a65ce7a81a4055c8ae9c92810e3e6ae92..61279be1e084ca572e9127aea03d957d9ee7e070 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,7 +1,7 @@
 include requirements.txt
 include LICENSE
 include README.md
-prune test/
+prune tests/
 prune reproduction/
 prune fastNLP/api
 prune fastNLP/automl
\ No newline at end of file
diff --git a/fastNLP/embeddings/bert_embedding.py b/fastNLP/embeddings/bert_embedding.py
index ec2ba26b0f8341bab6b47a7c3bd2d5ef2d0c65bd..c57d2bef2a2e92c69ab3501bb9b16574f5010c0a 100644
--- a/fastNLP/embeddings/bert_embedding.py
+++ b/fastNLP/embeddings/bert_embedding.py
@@ -93,7 +93,7 @@ class BertEmbedding(ContextualEmbedding):
         """
         super(BertEmbedding, self).__init__(vocab, word_dropout=word_dropout, dropout=dropout)
 
-        if word_dropout>0:
+        if word_dropout > 0:
             assert vocab.unknown != None, "When word_drop>0, Vocabulary must contain the unknown token."
 
         if model_dir_or_name.lower() in PRETRAINED_BERT_MODEL_DIR:
@@ -370,17 +370,29 @@ class _BertWordModel(nn.Module):
                  include_cls_sep: bool = False, pooled_cls: bool = False, auto_truncate: bool = False, min_freq=2):
         super().__init__()
 
-        self.tokenzier = BertTokenizer.from_pretrained(model_dir_or_name)
-        self.encoder = BertModel.from_pretrained(model_dir_or_name)
-        self._max_position_embeddings = self.encoder.config.max_position_embeddings
-        #  检查encoder_layer_number是否合理
-        encoder_layer_number = len(self.encoder.encoder.layer)
         if isinstance(layers, list):
             self.layers = [int(l) for l in layers]
         elif isinstance(layers, str):
             self.layers = list(map(int, layers.split(',')))
         else:
             raise TypeError("`layers` only supports str or list[int]")
+        assert len(self.layers) > 0, "There is no layer selected!"
+
+        neg_num_output_layer = -16384
+        pos_num_output_layer = 0
+        for layer in self.layers:
+            if layer < 0:
+                neg_num_output_layer = max(layer, neg_num_output_layer)
+            else:
+                pos_num_output_layer = max(layer, pos_num_output_layer)
+
+        self.tokenzier = BertTokenizer.from_pretrained(model_dir_or_name)
+        self.encoder = BertModel.from_pretrained(model_dir_or_name,
+                                                 neg_num_output_layer=neg_num_output_layer,
+                                                 pos_num_output_layer=pos_num_output_layer)
+        self._max_position_embeddings = self.encoder.config.max_position_embeddings
+        #  检查encoder_layer_number是否合理
+        encoder_layer_number = len(self.encoder.encoder.layer)
         for layer in self.layers:
             if layer < 0:
                 assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
diff --git a/fastNLP/embeddings/roberta_embedding.py b/fastNLP/embeddings/roberta_embedding.py
index 90ea10854dd7e9ec8440c8a1625b5ac062b39ba2..ec95abe2a0ba15d73c3361edc7d400e7fce17ac2 100644
--- a/fastNLP/embeddings/roberta_embedding.py
+++ b/fastNLP/embeddings/roberta_embedding.py
@@ -196,20 +196,30 @@ class _RobertaWordModel(nn.Module):
                  include_cls_sep: bool = False, pooled_cls: bool = False, auto_truncate: bool = False, min_freq=2):
         super().__init__()
 
-        self.tokenizer = RobertaTokenizer.from_pretrained(model_dir_or_name)
-        self.encoder = RobertaModel.from_pretrained(model_dir_or_name)
-        # 由于RobertaEmbedding中设置了padding_idx为1, 且使用了非常神奇的position计算方式，所以-2
-        self._max_position_embeddings = self.encoder.config.max_position_embeddings - 2
-        #  检查encoder_layer_number是否合理
-        encoder_layer_number = len(self.encoder.encoder.layer)
-
         if isinstance(layers, list):
             self.layers = [int(l) for l in layers]
         elif isinstance(layers, str):
             self.layers = list(map(int, layers.split(',')))
         else:
             raise TypeError("`layers` only supports str or list[int]")
+        assert len(self.layers) > 0, "There is no layer selected!"
+
+        neg_num_output_layer = -16384
+        pos_num_output_layer = 0
+        for layer in self.layers:
+            if layer < 0:
+                neg_num_output_layer = max(layer, neg_num_output_layer)
+            else:
+                pos_num_output_layer = max(layer, pos_num_output_layer)
 
+        self.tokenizer = RobertaTokenizer.from_pretrained(model_dir_or_name)
+        self.encoder = RobertaModel.from_pretrained(model_dir_or_name,
+                                                    neg_num_output_layer=neg_num_output_layer,
+                                                    pos_num_output_layer=pos_num_output_layer)
+        # 由于RobertaEmbedding中设置了padding_idx为1, 且使用了非常神奇的position计算方式，所以-2
+        self._max_position_embeddings = self.encoder.config.max_position_embeddings - 2
+        #  检查encoder_layer_number是否合理
+        encoder_layer_number = len(self.encoder.encoder.layer)
         for layer in self.layers:
             if layer < 0:
                 assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
diff --git a/fastNLP/modules/encoder/bert.py b/fastNLP/modules/encoder/bert.py
index 7a9ba57e9eacb4289aab27113382137c54946bf1..8d5d576e8f7adab6afa2eb4eabade94cbc238ff1 100644
--- a/fastNLP/modules/encoder/bert.py
+++ b/fastNLP/modules/encoder/bert.py
@@ -366,19 +366,28 @@ class BertLayer(nn.Module):
 
 
 class BertEncoder(nn.Module):
-    def __init__(self, config):
+    def __init__(self, config, num_output_layer=-1):
         super(BertEncoder, self).__init__()
         layer = BertLayer(config)
         self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
+        num_output_layer = num_output_layer if num_output_layer >= 0 else (len(self.layer) + num_output_layer)
+        self.num_output_layer = max(min(num_output_layer, len(self.layer)), 0)
+        if self.num_output_layer + 1 < len(self.layer):
+            logger.info(f'The transformer encoder will early exit after layer-{self.num_output_layer} '
+                        f'(start from 0)!')
 
     def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
         all_encoder_layers = []
-        for layer_module in self.layer:
+        for idx, layer_module in enumerate(self.layer):
+            if idx > self.num_output_layer:
+                break
             hidden_states = layer_module(hidden_states, attention_mask)
             if output_all_encoded_layers:
                 all_encoder_layers.append(hidden_states)
         if not output_all_encoded_layers:
             all_encoder_layers.append(hidden_states)
+        if len(all_encoder_layers) == 0:
+            all_encoder_layers.append(hidden_states)
         return all_encoder_layers
 
 
@@ -435,6 +444,9 @@ class BertModel(nn.Module):
         self.config = config
         self.hidden_size = self.config.hidden_size
         self.model_type = 'bert'
+        neg_num_output_layer = kwargs.get('neg_num_output_layer', -1)
+        pos_num_output_layer = kwargs.get('pos_num_output_layer', self.config.num_hidden_layers - 1)
+        self.num_output_layer = max(neg_num_output_layer + self.config.num_hidden_layers, pos_num_output_layer)
         if hasattr(config, 'sinusoidal_pos_embds'):
             self.model_type = 'distilbert'
         elif 'model_type' in kwargs:
@@ -445,7 +457,7 @@ class BertModel(nn.Module):
         else:
             self.embeddings = BertEmbeddings(config)
 
-        self.encoder = BertEncoder(config)
+        self.encoder = BertEncoder(config, num_output_layer=self.num_output_layer)
         if self.model_type != 'distilbert':
             self.pooler = BertPooler(config)
         else:
diff --git a/fastNLP/modules/encoder/roberta.py b/fastNLP/modules/encoder/roberta.py
index da0ab5378b5d0a4c9fa59637de1ddcc2921fc963..10bdb64ba4887259eae9d141eaebb703d462d7b9 100644
--- a/fastNLP/modules/encoder/roberta.py
+++ b/fastNLP/modules/encoder/roberta.py
@@ -64,8 +64,8 @@ class RobertaModel(BertModel):
     undocumented
     """
 
-    def __init__(self, config):
-        super().__init__(config)
+    def __init__(self, config, *inputs, **kwargs):
+        super().__init__(config, *inputs, **kwargs)
 
         self.embeddings = RobertaEmbeddings(config)
         self.apply(self.init_bert_weights)
diff --git a/fastNLP/modules/encoder/seq2seq_encoder.py b/fastNLP/modules/encoder/seq2seq_encoder.py
index d280582a8d04813e519679e328edf40f142dd81b..5eae1e6d4552cffeeca363d2aa4b93fe289517e1 100644
--- a/fastNLP/modules/encoder/seq2seq_encoder.py
+++ b/fastNLP/modules/encoder/seq2seq_encoder.py
@@ -132,7 +132,7 @@ class TransformerSeq2SeqEncoder(Seq2SeqEncoder):
         x = self.input_fc(x)
         x = F.dropout(x, p=self.dropout, training=self.training)
 
-        encoder_mask = seq_len_to_mask(seq_len)
+        encoder_mask = seq_len_to_mask(seq_len, max_len=max_src_len)
         encoder_mask = encoder_mask.to(device)
 
         for layer in self.layer_stacks:
diff --git a/test/__init__.py b/tests/__init__.py
similarity index 100%
rename from test/__init__.py
rename to tests/__init__.py
diff --git a/test/core/__init__.py b/tests/core/__init__.py
similarity index 100%
rename from test/core/__init__.py
rename to tests/core/__init__.py
diff --git a/test/core/test_batch.py b/tests/core/test_batch.py
similarity index 100%
rename from test/core/test_batch.py
rename to tests/core/test_batch.py
diff --git a/test/core/test_callbacks.py b/tests/core/test_callbacks.py
similarity index 100%
rename from test/core/test_callbacks.py
rename to tests/core/test_callbacks.py
diff --git a/test/core/test_dataset.py b/tests/core/test_dataset.py
similarity index 99%
rename from test/core/test_dataset.py
rename to tests/core/test_dataset.py
index 03f24ad1322081f409984cad12fac2f8f46c32a8..94dd3bdba09ac1f13aae153b9632e5829c2e9101 100644
--- a/test/core/test_dataset.py
+++ b/tests/core/test_dataset.py
@@ -228,7 +228,7 @@ class TestDataSetMethods(unittest.TestCase):
         def split_sent(ins):
             return ins['raw_sentence'].split()
         csv_loader = CSVLoader(headers=['raw_sentence', 'label'], sep='\t')
-        data_bundle = csv_loader.load('test/data_for_tests/tutorial_sample_dataset.csv')
+        data_bundle = csv_loader.load('tests/data_for_tests/tutorial_sample_dataset.csv')
         dataset = data_bundle.datasets['train']
         dataset.drop(lambda x: len(x['raw_sentence'].split()) == 0, inplace=True)
         dataset.apply(split_sent, new_field_name='words', is_input=True)
diff --git a/test/core/test_dist_trainer.py b/tests/core/test_dist_trainer.py
similarity index 100%
rename from test/core/test_dist_trainer.py
rename to tests/core/test_dist_trainer.py
diff --git a/test/core/test_field.py b/tests/core/test_field.py
similarity index 100%
rename from test/core/test_field.py
rename to tests/core/test_field.py
diff --git a/test/core/test_instance.py b/tests/core/test_instance.py
similarity index 100%
rename from test/core/test_instance.py
rename to tests/core/test_instance.py
diff --git a/test/core/test_logger.py b/tests/core/test_logger.py
similarity index 100%
rename from test/core/test_logger.py
rename to tests/core/test_logger.py
diff --git a/test/core/test_loss.py b/tests/core/test_loss.py
similarity index 100%
rename from test/core/test_loss.py
rename to tests/core/test_loss.py
diff --git a/test/core/test_metrics.py b/tests/core/test_metrics.py
similarity index 100%
rename from test/core/test_metrics.py
rename to tests/core/test_metrics.py
diff --git a/test/core/test_optimizer.py b/tests/core/test_optimizer.py
similarity index 100%
rename from test/core/test_optimizer.py
rename to tests/core/test_optimizer.py
diff --git a/test/core/test_predictor.py b/tests/core/test_predictor.py
similarity index 100%
rename from test/core/test_predictor.py
rename to tests/core/test_predictor.py
diff --git a/test/core/test_sampler.py b/tests/core/test_sampler.py
similarity index 100%
rename from test/core/test_sampler.py
rename to tests/core/test_sampler.py
diff --git a/test/core/test_tester.py b/tests/core/test_tester.py
similarity index 100%
rename from test/core/test_tester.py
rename to tests/core/test_tester.py
diff --git a/test/core/test_trainer.py b/tests/core/test_trainer.py
similarity index 100%
rename from test/core/test_trainer.py
rename to tests/core/test_trainer.py
diff --git a/test/core/test_utils.py b/tests/core/test_utils.py
similarity index 91%
rename from test/core/test_utils.py
rename to tests/core/test_utils.py
index f4a29658de41da637151ffbdedb4b476be0e56fe..f43a526cb2fa03ba65f170f1c108751c79da6d83 100644
--- a/test/core/test_utils.py
+++ b/tests/core/test_utils.py
@@ -120,8 +120,8 @@ class TestCache(unittest.TestCase):
     def test_cache_save(self):
         try:
             start_time = time.time()
-            embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt',
-                                             'test/data_for_tests/cws_train')
+            embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt',
+                                             'tests/data_for_tests/cws_train')
             end_time = time.time()
             pre_time = end_time - start_time
             with open('test/demo1.pkl', 'rb') as f:
@@ -130,8 +130,8 @@ class TestCache(unittest.TestCase):
             for i in range(embed.shape[0]):
                 self.assertListEqual(embed[i].tolist(), _embed[i].tolist())
             start_time = time.time()
-            embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt',
-                                             'test/data_for_tests/cws_train')
+            embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt',
+                                             'tests/data_for_tests/cws_train')
             end_time = time.time()
             read_time = end_time - start_time
             print("Read using {:.3f}, while prepare using:{:.3f}".format(read_time, pre_time))
@@ -142,7 +142,7 @@ class TestCache(unittest.TestCase):
     def test_cache_save_overwrite_path(self):
         try:
             start_time = time.time()
-            embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', 'test/data_for_tests/cws_train',
+            embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt', 'tests/data_for_tests/cws_train',
                                              _cache_fp='test/demo_overwrite.pkl')
             end_time = time.time()
             pre_time = end_time - start_time
@@ -152,8 +152,8 @@ class TestCache(unittest.TestCase):
             for i in range(embed.shape[0]):
                 self.assertListEqual(embed[i].tolist(), _embed[i].tolist())
             start_time = time.time()
-            embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt',
-                                             'test/data_for_tests/cws_train',
+            embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt',
+                                             'tests/data_for_tests/cws_train',
                                              _cache_fp='test/demo_overwrite.pkl')
             end_time = time.time()
             read_time = end_time - start_time
@@ -165,8 +165,8 @@ class TestCache(unittest.TestCase):
     def test_cache_refresh(self):
         try:
             start_time = time.time()
-            embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt',
-                                             'test/data_for_tests/cws_train',
+            embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt',
+                                             'tests/data_for_tests/cws_train',
                                              _refresh=True)
             end_time = time.time()
             pre_time = end_time - start_time
@@ -176,8 +176,8 @@ class TestCache(unittest.TestCase):
             for i in range(embed.shape[0]):
                 self.assertListEqual(embed[i].tolist(), _embed[i].tolist())
             start_time = time.time()
-            embed, vocab, d = process_data_1('test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt',
-                                             'test/data_for_tests/cws_train',
+            embed, vocab, d = process_data_1('tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt',
+                                             'tests/data_for_tests/cws_train',
                                              _refresh=True)
             end_time = time.time()
             read_time = end_time - start_time
diff --git a/test/core/test_vocabulary.py b/tests/core/test_vocabulary.py
similarity index 100%
rename from test/core/test_vocabulary.py
rename to tests/core/test_vocabulary.py
diff --git a/test/data_for_tests/config b/tests/data_for_tests/config
similarity index 100%
rename from test/data_for_tests/config
rename to tests/data_for_tests/config
diff --git a/test/data_for_tests/conll_2003_example.txt b/tests/data_for_tests/conll_2003_example.txt
similarity index 100%
rename from test/data_for_tests/conll_2003_example.txt
rename to tests/data_for_tests/conll_2003_example.txt
diff --git a/test/data_for_tests/conll_example.txt b/tests/data_for_tests/conll_example.txt
similarity index 100%
rename from test/data_for_tests/conll_example.txt
rename to tests/data_for_tests/conll_example.txt
diff --git a/test/data_for_tests/cws_pku_utf_8 b/tests/data_for_tests/cws_pku_utf_8
similarity index 100%
rename from test/data_for_tests/cws_pku_utf_8
rename to tests/data_for_tests/cws_pku_utf_8
diff --git a/test/data_for_tests/cws_test b/tests/data_for_tests/cws_test
similarity index 100%
rename from test/data_for_tests/cws_test
rename to tests/data_for_tests/cws_test
diff --git a/test/data_for_tests/cws_train b/tests/data_for_tests/cws_train
similarity index 100%
rename from test/data_for_tests/cws_train
rename to tests/data_for_tests/cws_train
diff --git a/test/data_for_tests/embedding/small_bert/config.json b/tests/data_for_tests/embedding/small_bert/config.json
similarity index 100%
rename from test/data_for_tests/embedding/small_bert/config.json
rename to tests/data_for_tests/embedding/small_bert/config.json
diff --git a/test/data_for_tests/embedding/small_bert/small_pytorch_model.bin b/tests/data_for_tests/embedding/small_bert/small_pytorch_model.bin
similarity index 100%
rename from test/data_for_tests/embedding/small_bert/small_pytorch_model.bin
rename to tests/data_for_tests/embedding/small_bert/small_pytorch_model.bin
diff --git a/test/data_for_tests/embedding/small_bert/vocab.txt b/tests/data_for_tests/embedding/small_bert/vocab.txt
similarity index 100%
rename from test/data_for_tests/embedding/small_bert/vocab.txt
rename to tests/data_for_tests/embedding/small_bert/vocab.txt
diff --git a/test/data_for_tests/embedding/small_elmo/char.dic b/tests/data_for_tests/embedding/small_elmo/char.dic
similarity index 100%
rename from test/data_for_tests/embedding/small_elmo/char.dic
rename to tests/data_for_tests/embedding/small_elmo/char.dic
diff --git a/test/data_for_tests/embedding/small_elmo/elmo_1x16_16_32cnn_1xhighway_options.json b/tests/data_for_tests/embedding/small_elmo/elmo_1x16_16_32cnn_1xhighway_options.json
similarity index 100%
rename from test/data_for_tests/embedding/small_elmo/elmo_1x16_16_32cnn_1xhighway_options.json
rename to tests/data_for_tests/embedding/small_elmo/elmo_1x16_16_32cnn_1xhighway_options.json
diff --git a/test/data_for_tests/embedding/small_elmo/elmo_mini_for_testing.pkl b/tests/data_for_tests/embedding/small_elmo/elmo_mini_for_testing.pkl
similarity index 100%
rename from test/data_for_tests/embedding/small_elmo/elmo_mini_for_testing.pkl
rename to tests/data_for_tests/embedding/small_elmo/elmo_mini_for_testing.pkl
diff --git a/test/data_for_tests/embedding/small_gpt2/config.json b/tests/data_for_tests/embedding/small_gpt2/config.json
similarity index 100%
rename from test/data_for_tests/embedding/small_gpt2/config.json
rename to tests/data_for_tests/embedding/small_gpt2/config.json
diff --git a/test/data_for_tests/embedding/small_gpt2/merges.txt b/tests/data_for_tests/embedding/small_gpt2/merges.txt
similarity index 100%
rename from test/data_for_tests/embedding/small_gpt2/merges.txt
rename to tests/data_for_tests/embedding/small_gpt2/merges.txt
diff --git a/test/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin b/tests/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin
similarity index 100%
rename from test/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin
rename to tests/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin
diff --git a/test/data_for_tests/embedding/small_gpt2/vocab.json b/tests/data_for_tests/embedding/small_gpt2/vocab.json
similarity index 100%
rename from test/data_for_tests/embedding/small_gpt2/vocab.json
rename to tests/data_for_tests/embedding/small_gpt2/vocab.json
diff --git a/test/data_for_tests/embedding/small_roberta/config.json b/tests/data_for_tests/embedding/small_roberta/config.json
similarity index 100%
rename from test/data_for_tests/embedding/small_roberta/config.json
rename to tests/data_for_tests/embedding/small_roberta/config.json
diff --git a/test/data_for_tests/embedding/small_roberta/merges.txt b/tests/data_for_tests/embedding/small_roberta/merges.txt
similarity index 100%
rename from test/data_for_tests/embedding/small_roberta/merges.txt
rename to tests/data_for_tests/embedding/small_roberta/merges.txt
diff --git a/test/data_for_tests/embedding/small_roberta/small_pytorch_model.bin b/tests/data_for_tests/embedding/small_roberta/small_pytorch_model.bin
similarity index 100%
rename from test/data_for_tests/embedding/small_roberta/small_pytorch_model.bin
rename to tests/data_for_tests/embedding/small_roberta/small_pytorch_model.bin
diff --git a/test/data_for_tests/embedding/small_roberta/vocab.json b/tests/data_for_tests/embedding/small_roberta/vocab.json
similarity index 100%
rename from test/data_for_tests/embedding/small_roberta/vocab.json
rename to tests/data_for_tests/embedding/small_roberta/vocab.json
diff --git a/test/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt b/tests/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt
similarity index 100%
rename from test/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt
rename to tests/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt
diff --git a/test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt b/tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt
similarity index 100%
rename from test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt
rename to tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt
diff --git a/test/data_for_tests/io/BQCorpus/dev.txt b/tests/data_for_tests/io/BQCorpus/dev.txt
similarity index 100%
rename from test/data_for_tests/io/BQCorpus/dev.txt
rename to tests/data_for_tests/io/BQCorpus/dev.txt
diff --git a/test/data_for_tests/io/BQCorpus/test.txt b/tests/data_for_tests/io/BQCorpus/test.txt
similarity index 100%
rename from test/data_for_tests/io/BQCorpus/test.txt
rename to tests/data_for_tests/io/BQCorpus/test.txt
diff --git a/test/data_for_tests/io/BQCorpus/train.txt b/tests/data_for_tests/io/BQCorpus/train.txt
similarity index 100%
rename from test/data_for_tests/io/BQCorpus/train.txt
rename to tests/data_for_tests/io/BQCorpus/train.txt
diff --git a/test/data_for_tests/io/ChnSentiCorp/dev.txt b/tests/data_for_tests/io/ChnSentiCorp/dev.txt
similarity index 100%
rename from test/data_for_tests/io/ChnSentiCorp/dev.txt
rename to tests/data_for_tests/io/ChnSentiCorp/dev.txt
diff --git a/test/data_for_tests/io/ChnSentiCorp/test.txt b/tests/data_for_tests/io/ChnSentiCorp/test.txt
similarity index 100%
rename from test/data_for_tests/io/ChnSentiCorp/test.txt
rename to tests/data_for_tests/io/ChnSentiCorp/test.txt
diff --git a/test/data_for_tests/io/ChnSentiCorp/train.txt b/tests/data_for_tests/io/ChnSentiCorp/train.txt
similarity index 100%
rename from test/data_for_tests/io/ChnSentiCorp/train.txt
rename to tests/data_for_tests/io/ChnSentiCorp/train.txt
diff --git a/test/data_for_tests/io/LCQMC/dev.txt b/tests/data_for_tests/io/LCQMC/dev.txt
similarity index 100%
rename from test/data_for_tests/io/LCQMC/dev.txt
rename to tests/data_for_tests/io/LCQMC/dev.txt
diff --git a/test/data_for_tests/io/LCQMC/test.txt b/tests/data_for_tests/io/LCQMC/test.txt
similarity index 100%
rename from test/data_for_tests/io/LCQMC/test.txt
rename to tests/data_for_tests/io/LCQMC/test.txt
diff --git a/test/data_for_tests/io/LCQMC/train.txt b/tests/data_for_tests/io/LCQMC/train.txt
similarity index 100%
rename from test/data_for_tests/io/LCQMC/train.txt
rename to tests/data_for_tests/io/LCQMC/train.txt
diff --git a/test/data_for_tests/io/MNLI/dev_matched.tsv b/tests/data_for_tests/io/MNLI/dev_matched.tsv
similarity index 100%
rename from test/data_for_tests/io/MNLI/dev_matched.tsv
rename to tests/data_for_tests/io/MNLI/dev_matched.tsv
diff --git a/test/data_for_tests/io/MNLI/dev_mismatched.tsv b/tests/data_for_tests/io/MNLI/dev_mismatched.tsv
similarity index 100%
rename from test/data_for_tests/io/MNLI/dev_mismatched.tsv
rename to tests/data_for_tests/io/MNLI/dev_mismatched.tsv
diff --git a/test/data_for_tests/io/MNLI/test_matched.tsv b/tests/data_for_tests/io/MNLI/test_matched.tsv
similarity index 100%
rename from test/data_for_tests/io/MNLI/test_matched.tsv
rename to tests/data_for_tests/io/MNLI/test_matched.tsv
diff --git a/test/data_for_tests/io/MNLI/test_mismatched.tsv b/tests/data_for_tests/io/MNLI/test_mismatched.tsv
similarity index 100%
rename from test/data_for_tests/io/MNLI/test_mismatched.tsv
rename to tests/data_for_tests/io/MNLI/test_mismatched.tsv
diff --git a/test/data_for_tests/io/MNLI/train.tsv b/tests/data_for_tests/io/MNLI/train.tsv
similarity index 100%
rename from test/data_for_tests/io/MNLI/train.tsv
rename to tests/data_for_tests/io/MNLI/train.tsv
diff --git a/test/data_for_tests/io/MSRA_NER/dev.conll b/tests/data_for_tests/io/MSRA_NER/dev.conll
similarity index 100%
rename from test/data_for_tests/io/MSRA_NER/dev.conll
rename to tests/data_for_tests/io/MSRA_NER/dev.conll
diff --git a/test/data_for_tests/io/MSRA_NER/test.conll b/tests/data_for_tests/io/MSRA_NER/test.conll
similarity index 100%
rename from test/data_for_tests/io/MSRA_NER/test.conll
rename to tests/data_for_tests/io/MSRA_NER/test.conll
diff --git a/test/data_for_tests/io/MSRA_NER/train.conll b/tests/data_for_tests/io/MSRA_NER/train.conll
similarity index 100%
rename from test/data_for_tests/io/MSRA_NER/train.conll
rename to tests/data_for_tests/io/MSRA_NER/train.conll
diff --git a/test/data_for_tests/io/OntoNotes/dev.txt b/tests/data_for_tests/io/OntoNotes/dev.txt
similarity index 100%
rename from test/data_for_tests/io/OntoNotes/dev.txt
rename to tests/data_for_tests/io/OntoNotes/dev.txt
diff --git a/test/data_for_tests/io/OntoNotes/test.txt b/tests/data_for_tests/io/OntoNotes/test.txt
similarity index 100%
rename from test/data_for_tests/io/OntoNotes/test.txt
rename to tests/data_for_tests/io/OntoNotes/test.txt
diff --git a/test/data_for_tests/io/OntoNotes/train.txt b/tests/data_for_tests/io/OntoNotes/train.txt
similarity index 100%
rename from test/data_for_tests/io/OntoNotes/train.txt
rename to tests/data_for_tests/io/OntoNotes/train.txt
diff --git a/test/data_for_tests/io/QNLI/dev.tsv b/tests/data_for_tests/io/QNLI/dev.tsv
similarity index 100%
rename from test/data_for_tests/io/QNLI/dev.tsv
rename to tests/data_for_tests/io/QNLI/dev.tsv
diff --git a/test/data_for_tests/io/QNLI/test.tsv b/tests/data_for_tests/io/QNLI/test.tsv
similarity index 100%
rename from test/data_for_tests/io/QNLI/test.tsv
rename to tests/data_for_tests/io/QNLI/test.tsv
diff --git a/test/data_for_tests/io/QNLI/train.tsv b/tests/data_for_tests/io/QNLI/train.tsv
similarity index 100%
rename from test/data_for_tests/io/QNLI/train.tsv
rename to tests/data_for_tests/io/QNLI/train.tsv
diff --git a/test/data_for_tests/io/Quora/dev.tsv b/tests/data_for_tests/io/Quora/dev.tsv
similarity index 100%
rename from test/data_for_tests/io/Quora/dev.tsv
rename to tests/data_for_tests/io/Quora/dev.tsv
diff --git a/test/data_for_tests/io/Quora/test.tsv b/tests/data_for_tests/io/Quora/test.tsv
similarity index 100%
rename from test/data_for_tests/io/Quora/test.tsv
rename to tests/data_for_tests/io/Quora/test.tsv
diff --git a/test/data_for_tests/io/Quora/train.tsv b/tests/data_for_tests/io/Quora/train.tsv
similarity index 100%
rename from test/data_for_tests/io/Quora/train.tsv
rename to tests/data_for_tests/io/Quora/train.tsv
diff --git a/test/data_for_tests/io/RTE/dev.tsv b/tests/data_for_tests/io/RTE/dev.tsv
similarity index 100%
rename from test/data_for_tests/io/RTE/dev.tsv
rename to tests/data_for_tests/io/RTE/dev.tsv
diff --git a/test/data_for_tests/io/RTE/test.tsv b/tests/data_for_tests/io/RTE/test.tsv
similarity index 100%
rename from test/data_for_tests/io/RTE/test.tsv
rename to tests/data_for_tests/io/RTE/test.tsv
diff --git a/test/data_for_tests/io/RTE/train.tsv b/tests/data_for_tests/io/RTE/train.tsv
similarity index 100%
rename from test/data_for_tests/io/RTE/train.tsv
rename to tests/data_for_tests/io/RTE/train.tsv
diff --git a/test/data_for_tests/io/SNLI/snli_1.0_dev.jsonl b/tests/data_for_tests/io/SNLI/snli_1.0_dev.jsonl
similarity index 100%
rename from test/data_for_tests/io/SNLI/snli_1.0_dev.jsonl
rename to tests/data_for_tests/io/SNLI/snli_1.0_dev.jsonl
diff --git a/test/data_for_tests/io/SNLI/snli_1.0_test.jsonl b/tests/data_for_tests/io/SNLI/snli_1.0_test.jsonl
similarity index 100%
rename from test/data_for_tests/io/SNLI/snli_1.0_test.jsonl
rename to tests/data_for_tests/io/SNLI/snli_1.0_test.jsonl
diff --git a/test/data_for_tests/io/SNLI/snli_1.0_train.jsonl b/tests/data_for_tests/io/SNLI/snli_1.0_train.jsonl
similarity index 100%
rename from test/data_for_tests/io/SNLI/snli_1.0_train.jsonl
rename to tests/data_for_tests/io/SNLI/snli_1.0_train.jsonl
diff --git a/test/data_for_tests/io/SST-2/dev.tsv b/tests/data_for_tests/io/SST-2/dev.tsv
similarity index 100%
rename from test/data_for_tests/io/SST-2/dev.tsv
rename to tests/data_for_tests/io/SST-2/dev.tsv
diff --git a/test/data_for_tests/io/SST-2/test.tsv b/tests/data_for_tests/io/SST-2/test.tsv
similarity index 100%
rename from test/data_for_tests/io/SST-2/test.tsv
rename to tests/data_for_tests/io/SST-2/test.tsv
diff --git a/test/data_for_tests/io/SST-2/train.tsv b/tests/data_for_tests/io/SST-2/train.tsv
similarity index 100%
rename from test/data_for_tests/io/SST-2/train.tsv
rename to tests/data_for_tests/io/SST-2/train.tsv
diff --git a/test/data_for_tests/io/SST/dev.txt b/tests/data_for_tests/io/SST/dev.txt
similarity index 100%
rename from test/data_for_tests/io/SST/dev.txt
rename to tests/data_for_tests/io/SST/dev.txt
diff --git a/test/data_for_tests/io/SST/test.txt b/tests/data_for_tests/io/SST/test.txt
similarity index 100%
rename from test/data_for_tests/io/SST/test.txt
rename to tests/data_for_tests/io/SST/test.txt
diff --git a/test/data_for_tests/io/SST/train.txt b/tests/data_for_tests/io/SST/train.txt
similarity index 100%
rename from test/data_for_tests/io/SST/train.txt
rename to tests/data_for_tests/io/SST/train.txt
diff --git a/test/data_for_tests/io/THUCNews/dev.txt b/tests/data_for_tests/io/THUCNews/dev.txt
similarity index 100%
rename from test/data_for_tests/io/THUCNews/dev.txt
rename to tests/data_for_tests/io/THUCNews/dev.txt
diff --git a/test/data_for_tests/io/THUCNews/test.txt b/tests/data_for_tests/io/THUCNews/test.txt
similarity index 100%
rename from test/data_for_tests/io/THUCNews/test.txt
rename to tests/data_for_tests/io/THUCNews/test.txt
diff --git a/test/data_for_tests/io/THUCNews/train.txt b/tests/data_for_tests/io/THUCNews/train.txt
similarity index 100%
rename from test/data_for_tests/io/THUCNews/train.txt
rename to tests/data_for_tests/io/THUCNews/train.txt
diff --git a/test/data_for_tests/io/WeiboSenti100k/dev.txt b/tests/data_for_tests/io/WeiboSenti100k/dev.txt
similarity index 100%
rename from test/data_for_tests/io/WeiboSenti100k/dev.txt
rename to tests/data_for_tests/io/WeiboSenti100k/dev.txt
diff --git a/test/data_for_tests/io/WeiboSenti100k/test.txt b/tests/data_for_tests/io/WeiboSenti100k/test.txt
similarity index 100%
rename from test/data_for_tests/io/WeiboSenti100k/test.txt
rename to tests/data_for_tests/io/WeiboSenti100k/test.txt
diff --git a/test/data_for_tests/io/WeiboSenti100k/train.txt b/tests/data_for_tests/io/WeiboSenti100k/train.txt
similarity index 100%
rename from test/data_for_tests/io/WeiboSenti100k/train.txt
rename to tests/data_for_tests/io/WeiboSenti100k/train.txt
diff --git a/test/data_for_tests/io/XNLI/dev.txt b/tests/data_for_tests/io/XNLI/dev.txt
similarity index 100%
rename from test/data_for_tests/io/XNLI/dev.txt
rename to tests/data_for_tests/io/XNLI/dev.txt
diff --git a/test/data_for_tests/io/XNLI/test.txt b/tests/data_for_tests/io/XNLI/test.txt
similarity index 100%
rename from test/data_for_tests/io/XNLI/test.txt
rename to tests/data_for_tests/io/XNLI/test.txt
diff --git a/test/data_for_tests/io/XNLI/train.txt b/tests/data_for_tests/io/XNLI/train.txt
similarity index 100%
rename from test/data_for_tests/io/XNLI/train.txt
rename to tests/data_for_tests/io/XNLI/train.txt
diff --git a/test/data_for_tests/io/ag/test.csv b/tests/data_for_tests/io/ag/test.csv
similarity index 100%
rename from test/data_for_tests/io/ag/test.csv
rename to tests/data_for_tests/io/ag/test.csv
diff --git a/test/data_for_tests/io/ag/train.csv b/tests/data_for_tests/io/ag/train.csv
similarity index 100%
rename from test/data_for_tests/io/ag/train.csv
rename to tests/data_for_tests/io/ag/train.csv
diff --git a/test/data_for_tests/io/cmrc/dev.json b/tests/data_for_tests/io/cmrc/dev.json
similarity index 100%
rename from test/data_for_tests/io/cmrc/dev.json
rename to tests/data_for_tests/io/cmrc/dev.json
diff --git a/test/data_for_tests/io/cmrc/train.json b/tests/data_for_tests/io/cmrc/train.json
similarity index 100%
rename from test/data_for_tests/io/cmrc/train.json
rename to tests/data_for_tests/io/cmrc/train.json
diff --git a/test/data_for_tests/io/cnndm/dev.label.jsonl b/tests/data_for_tests/io/cnndm/dev.label.jsonl
similarity index 100%
rename from test/data_for_tests/io/cnndm/dev.label.jsonl
rename to tests/data_for_tests/io/cnndm/dev.label.jsonl
diff --git a/test/data_for_tests/io/cnndm/test.label.jsonl b/tests/data_for_tests/io/cnndm/test.label.jsonl
similarity index 100%
rename from test/data_for_tests/io/cnndm/test.label.jsonl
rename to tests/data_for_tests/io/cnndm/test.label.jsonl
diff --git a/test/data_for_tests/io/cnndm/train.cnndm.jsonl b/tests/data_for_tests/io/cnndm/train.cnndm.jsonl
similarity index 100%
rename from test/data_for_tests/io/cnndm/train.cnndm.jsonl
rename to tests/data_for_tests/io/cnndm/train.cnndm.jsonl
diff --git a/test/data_for_tests/io/cnndm/vocab b/tests/data_for_tests/io/cnndm/vocab
similarity index 100%
rename from test/data_for_tests/io/cnndm/vocab
rename to tests/data_for_tests/io/cnndm/vocab
diff --git a/test/data_for_tests/io/conll2003/dev.txt b/tests/data_for_tests/io/conll2003/dev.txt
similarity index 100%
rename from test/data_for_tests/io/conll2003/dev.txt
rename to tests/data_for_tests/io/conll2003/dev.txt
diff --git a/test/data_for_tests/io/conll2003/test.txt b/tests/data_for_tests/io/conll2003/test.txt
similarity index 100%
rename from test/data_for_tests/io/conll2003/test.txt
rename to tests/data_for_tests/io/conll2003/test.txt
diff --git a/test/data_for_tests/io/conll2003/train.txt b/tests/data_for_tests/io/conll2003/train.txt
similarity index 100%
rename from test/data_for_tests/io/conll2003/train.txt
rename to tests/data_for_tests/io/conll2003/train.txt
diff --git a/test/data_for_tests/io/coreference/coreference_dev.json b/tests/data_for_tests/io/coreference/coreference_dev.json
similarity index 100%
rename from test/data_for_tests/io/coreference/coreference_dev.json
rename to tests/data_for_tests/io/coreference/coreference_dev.json
diff --git a/test/data_for_tests/io/coreference/coreference_test.json b/tests/data_for_tests/io/coreference/coreference_test.json
similarity index 100%
rename from test/data_for_tests/io/coreference/coreference_test.json
rename to tests/data_for_tests/io/coreference/coreference_test.json
diff --git a/test/data_for_tests/io/coreference/coreference_train.json b/tests/data_for_tests/io/coreference/coreference_train.json
similarity index 100%
rename from test/data_for_tests/io/coreference/coreference_train.json
rename to tests/data_for_tests/io/coreference/coreference_train.json
diff --git a/test/data_for_tests/io/cws_as/dev.txt b/tests/data_for_tests/io/cws_as/dev.txt
similarity index 100%
rename from test/data_for_tests/io/cws_as/dev.txt
rename to tests/data_for_tests/io/cws_as/dev.txt
diff --git a/test/data_for_tests/io/cws_as/test.txt b/tests/data_for_tests/io/cws_as/test.txt
similarity index 100%
rename from test/data_for_tests/io/cws_as/test.txt
rename to tests/data_for_tests/io/cws_as/test.txt
diff --git a/test/data_for_tests/io/cws_as/train.txt b/tests/data_for_tests/io/cws_as/train.txt
similarity index 100%
rename from test/data_for_tests/io/cws_as/train.txt
rename to tests/data_for_tests/io/cws_as/train.txt
diff --git a/test/data_for_tests/io/cws_cityu/dev.txt b/tests/data_for_tests/io/cws_cityu/dev.txt
similarity index 100%
rename from test/data_for_tests/io/cws_cityu/dev.txt
rename to tests/data_for_tests/io/cws_cityu/dev.txt
diff --git a/test/data_for_tests/io/cws_cityu/test.txt b/tests/data_for_tests/io/cws_cityu/test.txt
similarity index 100%
rename from test/data_for_tests/io/cws_cityu/test.txt
rename to tests/data_for_tests/io/cws_cityu/test.txt
diff --git a/test/data_for_tests/io/cws_cityu/train.txt b/tests/data_for_tests/io/cws_cityu/train.txt
similarity index 100%
rename from test/data_for_tests/io/cws_cityu/train.txt
rename to tests/data_for_tests/io/cws_cityu/train.txt
diff --git a/test/data_for_tests/io/cws_msra/dev.txt b/tests/data_for_tests/io/cws_msra/dev.txt
similarity index 100%
rename from test/data_for_tests/io/cws_msra/dev.txt
rename to tests/data_for_tests/io/cws_msra/dev.txt
diff --git a/test/data_for_tests/io/cws_msra/test.txt b/tests/data_for_tests/io/cws_msra/test.txt
similarity index 100%
rename from test/data_for_tests/io/cws_msra/test.txt
rename to tests/data_for_tests/io/cws_msra/test.txt
diff --git a/test/data_for_tests/io/cws_msra/train.txt b/tests/data_for_tests/io/cws_msra/train.txt
similarity index 100%
rename from test/data_for_tests/io/cws_msra/train.txt
rename to tests/data_for_tests/io/cws_msra/train.txt
diff --git a/test/data_for_tests/io/cws_pku/dev.txt b/tests/data_for_tests/io/cws_pku/dev.txt
similarity index 100%
rename from test/data_for_tests/io/cws_pku/dev.txt
rename to tests/data_for_tests/io/cws_pku/dev.txt
diff --git a/test/data_for_tests/io/cws_pku/test.txt b/tests/data_for_tests/io/cws_pku/test.txt
similarity index 100%
rename from test/data_for_tests/io/cws_pku/test.txt
rename to tests/data_for_tests/io/cws_pku/test.txt
diff --git a/test/data_for_tests/io/cws_pku/train.txt b/tests/data_for_tests/io/cws_pku/train.txt
similarity index 100%
rename from test/data_for_tests/io/cws_pku/train.txt
rename to tests/data_for_tests/io/cws_pku/train.txt
diff --git a/test/data_for_tests/io/dbpedia/test.csv b/tests/data_for_tests/io/dbpedia/test.csv
similarity index 100%
rename from test/data_for_tests/io/dbpedia/test.csv
rename to tests/data_for_tests/io/dbpedia/test.csv
diff --git a/test/data_for_tests/io/dbpedia/train.csv b/tests/data_for_tests/io/dbpedia/train.csv
similarity index 100%
rename from test/data_for_tests/io/dbpedia/train.csv
rename to tests/data_for_tests/io/dbpedia/train.csv
diff --git a/test/data_for_tests/io/imdb/dev.txt b/tests/data_for_tests/io/imdb/dev.txt
similarity index 100%
rename from test/data_for_tests/io/imdb/dev.txt
rename to tests/data_for_tests/io/imdb/dev.txt
diff --git a/test/data_for_tests/io/imdb/test.txt b/tests/data_for_tests/io/imdb/test.txt
similarity index 100%
rename from test/data_for_tests/io/imdb/test.txt
rename to tests/data_for_tests/io/imdb/test.txt
diff --git a/test/data_for_tests/io/imdb/train.txt b/tests/data_for_tests/io/imdb/train.txt
similarity index 100%
rename from test/data_for_tests/io/imdb/train.txt
rename to tests/data_for_tests/io/imdb/train.txt
diff --git a/test/data_for_tests/io/peopledaily/dev.txt b/tests/data_for_tests/io/peopledaily/dev.txt
similarity index 100%
rename from test/data_for_tests/io/peopledaily/dev.txt
rename to tests/data_for_tests/io/peopledaily/dev.txt
diff --git a/test/data_for_tests/io/peopledaily/test.txt b/tests/data_for_tests/io/peopledaily/test.txt
similarity index 100%
rename from test/data_for_tests/io/peopledaily/test.txt
rename to tests/data_for_tests/io/peopledaily/test.txt
diff --git a/test/data_for_tests/io/peopledaily/train.txt b/tests/data_for_tests/io/peopledaily/train.txt
similarity index 100%
rename from test/data_for_tests/io/peopledaily/train.txt
rename to tests/data_for_tests/io/peopledaily/train.txt
diff --git a/test/data_for_tests/io/weibo_NER/dev.conll b/tests/data_for_tests/io/weibo_NER/dev.conll
similarity index 100%
rename from test/data_for_tests/io/weibo_NER/dev.conll
rename to tests/data_for_tests/io/weibo_NER/dev.conll
diff --git a/test/data_for_tests/io/weibo_NER/test.conll b/tests/data_for_tests/io/weibo_NER/test.conll
similarity index 100%
rename from test/data_for_tests/io/weibo_NER/test.conll
rename to tests/data_for_tests/io/weibo_NER/test.conll
diff --git a/test/data_for_tests/io/weibo_NER/train.conll b/tests/data_for_tests/io/weibo_NER/train.conll
similarity index 100%
rename from test/data_for_tests/io/weibo_NER/train.conll
rename to tests/data_for_tests/io/weibo_NER/train.conll
diff --git a/test/data_for_tests/io/yelp_review_full/dev.csv b/tests/data_for_tests/io/yelp_review_full/dev.csv
similarity index 100%
rename from test/data_for_tests/io/yelp_review_full/dev.csv
rename to tests/data_for_tests/io/yelp_review_full/dev.csv
diff --git a/test/data_for_tests/io/yelp_review_full/test.csv b/tests/data_for_tests/io/yelp_review_full/test.csv
similarity index 100%
rename from test/data_for_tests/io/yelp_review_full/test.csv
rename to tests/data_for_tests/io/yelp_review_full/test.csv
diff --git a/test/data_for_tests/io/yelp_review_full/train.csv b/tests/data_for_tests/io/yelp_review_full/train.csv
similarity index 100%
rename from test/data_for_tests/io/yelp_review_full/train.csv
rename to tests/data_for_tests/io/yelp_review_full/train.csv
diff --git a/test/data_for_tests/io/yelp_review_polarity/dev.csv b/tests/data_for_tests/io/yelp_review_polarity/dev.csv
similarity index 100%
rename from test/data_for_tests/io/yelp_review_polarity/dev.csv
rename to tests/data_for_tests/io/yelp_review_polarity/dev.csv
diff --git a/test/data_for_tests/io/yelp_review_polarity/test.csv b/tests/data_for_tests/io/yelp_review_polarity/test.csv
similarity index 100%
rename from test/data_for_tests/io/yelp_review_polarity/test.csv
rename to tests/data_for_tests/io/yelp_review_polarity/test.csv
diff --git a/test/data_for_tests/io/yelp_review_polarity/train.csv b/tests/data_for_tests/io/yelp_review_polarity/train.csv
similarity index 100%
rename from test/data_for_tests/io/yelp_review_polarity/train.csv
rename to tests/data_for_tests/io/yelp_review_polarity/train.csv
diff --git a/test/data_for_tests/modules/decoder/crf.json b/tests/data_for_tests/modules/decoder/crf.json
similarity index 100%
rename from test/data_for_tests/modules/decoder/crf.json
rename to tests/data_for_tests/modules/decoder/crf.json
diff --git a/test/data_for_tests/people.txt b/tests/data_for_tests/people.txt
similarity index 100%
rename from test/data_for_tests/people.txt
rename to tests/data_for_tests/people.txt
diff --git a/test/data_for_tests/people_daily_raw.txt b/tests/data_for_tests/people_daily_raw.txt
similarity index 100%
rename from test/data_for_tests/people_daily_raw.txt
rename to tests/data_for_tests/people_daily_raw.txt
diff --git a/test/data_for_tests/sample_mnli.tsv b/tests/data_for_tests/sample_mnli.tsv
similarity index 100%
rename from test/data_for_tests/sample_mnli.tsv
rename to tests/data_for_tests/sample_mnli.tsv
diff --git a/test/data_for_tests/sample_snli.jsonl b/tests/data_for_tests/sample_snli.jsonl
similarity index 100%
rename from test/data_for_tests/sample_snli.jsonl
rename to tests/data_for_tests/sample_snli.jsonl
diff --git a/test/data_for_tests/text_classify.txt b/tests/data_for_tests/text_classify.txt
similarity index 100%
rename from test/data_for_tests/text_classify.txt
rename to tests/data_for_tests/text_classify.txt
diff --git a/test/data_for_tests/tutorial_sample_dataset.csv b/tests/data_for_tests/tutorial_sample_dataset.csv
similarity index 100%
rename from test/data_for_tests/tutorial_sample_dataset.csv
rename to tests/data_for_tests/tutorial_sample_dataset.csv
diff --git a/test/data_for_tests/zh_sample.conllx b/tests/data_for_tests/zh_sample.conllx
similarity index 100%
rename from test/data_for_tests/zh_sample.conllx
rename to tests/data_for_tests/zh_sample.conllx
diff --git a/test/embeddings/__init__.py b/tests/embeddings/__init__.py
similarity index 100%
rename from test/embeddings/__init__.py
rename to tests/embeddings/__init__.py
diff --git a/test/embeddings/test_bert_embedding.py b/tests/embeddings/test_bert_embedding.py
similarity index 86%
rename from test/embeddings/test_bert_embedding.py
rename to tests/embeddings/test_bert_embedding.py
index 2e619bcb096a9b7033f891a69ea8df0c0103908e..f0104a58cf2779d955c800c06c65a2946c9cbccd 100644
--- a/test/embeddings/test_bert_embedding.py
+++ b/tests/embeddings/test_bert_embedding.py
@@ -32,7 +32,7 @@ class TestDownload(unittest.TestCase):
 class TestBertEmbedding(unittest.TestCase):
     def test_bert_embedding_1(self):
         vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInBERT".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1)
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1)
         requires_grad = embed.requires_grad
         embed.requires_grad = not requires_grad
         embed.train()
@@ -40,14 +40,14 @@ class TestBertEmbedding(unittest.TestCase):
         result = embed(words)
         self.assertEqual(result.size(), (1, 4, 16))
 
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1)
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1)
         embed.eval()
         words = torch.LongTensor([[2, 3, 4, 0]])
         result = embed(words)
         self.assertEqual(result.size(), (1, 4, 16))
 
         # 自动截断而不报错
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1,
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1,
                               auto_truncate=True)
 
         words = torch.LongTensor([[2, 3, 4, 1]*10,
@@ -60,7 +60,7 @@ class TestBertEmbedding(unittest.TestCase):
         try:
             os.makedirs(bert_save_test, exist_ok=True)
             vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInBERT".split())
-            embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1,
+            embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1,
                                   auto_truncate=True)
 
             embed.save(bert_save_test)
@@ -76,7 +76,7 @@ class TestBertEmbedding(unittest.TestCase):
 
 class TestBertWordPieceEncoder(unittest.TestCase):
     def test_bert_word_piece_encoder(self):
-        embed = BertWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1)
+        embed = BertWordPieceEncoder(model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.1)
         ds = DataSet({'words': ["this is a test . [SEP]".split()]})
         embed.index_datasets(ds, field_name='words')
         self.assertTrue(ds.has_field('word_pieces'))
@@ -84,7 +84,7 @@ class TestBertWordPieceEncoder(unittest.TestCase):
 
     def test_bert_embed_eq_bert_piece_encoder(self):
         ds = DataSet({'words': ["this is a texta model vocab".split(), 'this is'.split()]})
-        encoder = BertWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_bert')
+        encoder = BertWordPieceEncoder(model_dir_or_name='tests/data_for_tests/embedding/small_bert')
         encoder.eval()
         encoder.index_datasets(ds, field_name='words')
         word_pieces = torch.LongTensor(ds['word_pieces'].get([0, 1]))
@@ -95,7 +95,7 @@ class TestBertWordPieceEncoder(unittest.TestCase):
         vocab.index_dataset(ds, field_name='words', new_field_name='words')
         ds.set_input('words')
         words = torch.LongTensor(ds['words'].get([0, 1]))
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               pool_method='first', include_cls_sep=True, pooled_cls=False, min_freq=1)
         embed.eval()
         words_res = embed(words)
@@ -109,7 +109,7 @@ class TestBertWordPieceEncoder(unittest.TestCase):
         bert_save_test = 'bert_save_test'
         try:
             os.makedirs(bert_save_test, exist_ok=True)
-            embed = BertWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.0,
+            embed = BertWordPieceEncoder(model_dir_or_name='tests/data_for_tests/embedding/small_bert', word_dropout=0.0,
                                          layers='-2')
             ds = DataSet({'words': ["this is a test . [SEP]".split()]})
             embed.index_datasets(ds, field_name='words')
diff --git a/test/embeddings/test_char_embedding.py b/tests/embeddings/test_char_embedding.py
similarity index 100%
rename from test/embeddings/test_char_embedding.py
rename to tests/embeddings/test_char_embedding.py
diff --git a/test/embeddings/test_elmo_embedding.py b/tests/embeddings/test_elmo_embedding.py
similarity index 92%
rename from test/embeddings/test_elmo_embedding.py
rename to tests/embeddings/test_elmo_embedding.py
index ed6910b4336284d39e683e78eb14fc99a34ac810..7f6f5b35cb703f2b1ac310d58b4d837238618884 100644
--- a/test/embeddings/test_elmo_embedding.py
+++ b/tests/embeddings/test_elmo_embedding.py
@@ -21,7 +21,7 @@ class TestDownload(unittest.TestCase):
 class TestRunElmo(unittest.TestCase):
     def test_elmo_embedding(self):
         vocab = Vocabulary().add_word_lst("This is a test .".split())
-        elmo_embed = ElmoEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_elmo', layers='0,1')
+        elmo_embed = ElmoEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_elmo', layers='0,1')
         words = torch.LongTensor([[0, 1, 2]])
         hidden = elmo_embed(words)
         print(hidden.size())
@@ -30,7 +30,7 @@ class TestRunElmo(unittest.TestCase):
     def test_elmo_embedding_layer_assertion(self):
         vocab = Vocabulary().add_word_lst("This is a test .".split())
         try:
-            elmo_embed = ElmoEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_elmo',
+            elmo_embed = ElmoEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_elmo',
                                        layers='0,1,2')
         except AssertionError as e:
             print(e)
diff --git a/test/embeddings/test_gpt2_embedding.py b/tests/embeddings/test_gpt2_embedding.py
similarity index 92%
rename from test/embeddings/test_gpt2_embedding.py
rename to tests/embeddings/test_gpt2_embedding.py
index e8d0d043432e398ef1bb1c3e3d5641ce14fa86ff..070ae5289facd726cd3fbe7ff56ab353ffd924a7 100644
--- a/test/embeddings/test_gpt2_embedding.py
+++ b/tests/embeddings/test_gpt2_embedding.py
@@ -21,7 +21,7 @@ class TestGPT2Embedding(unittest.TestCase):
             print(embed(words).size())
 
     def test_gpt2_embedding(self):
-        weight_path = 'test/data_for_tests/embedding/small_gpt2'
+        weight_path = 'tests/data_for_tests/embedding/small_gpt2'
         vocab = Vocabulary().add_word_lst("this is a texta sentence".split())
         embed = GPT2Embedding(vocab, model_dir_or_name=weight_path, word_dropout=0.1)
         requires_grad = embed.requires_grad
@@ -49,7 +49,7 @@ class TestGPT2Embedding(unittest.TestCase):
     def test_gpt2_ebembedding_2(self):
         # 测试only_use_pretrain_vocab与truncate_embed是否正常工作
         Embedding = GPT2Embedding
-        weight_path = 'test/data_for_tests/embedding/small_gpt2'
+        weight_path = 'tests/data_for_tests/embedding/small_gpt2'
         vocab = Vocabulary().add_word_lst("this is a texta and".split())
         embed1 = Embedding(vocab, model_dir_or_name=weight_path,layers=list(range(3)),
                               only_use_pretrain_bpe=True, truncate_embed=True, min_freq=1)
@@ -89,13 +89,13 @@ class TestGPT2Embedding(unittest.TestCase):
     def test_gpt2_tokenizer(self):
         from fastNLP.modules.tokenizer import GPT2Tokenizer
 
-        tokenizer = GPT2Tokenizer.from_pretrained('test/data_for_tests/embedding/small_gpt2')
+        tokenizer = GPT2Tokenizer.from_pretrained('tests/data_for_tests/embedding/small_gpt2')
         print(tokenizer.encode("this is a texta a sentence"))
         print(tokenizer.encode('this is'))
 
     def test_gpt2_embed_eq_gpt2_piece_encoder(self):
         # 主要检查一下embedding的结果与wordpieceencoder的结果是否一致
-        weight_path = 'test/data_for_tests/embedding/small_gpt2'
+        weight_path = 'tests/data_for_tests/embedding/small_gpt2'
         ds = DataSet({'words': ["this is a texta a sentence".split(), 'this is'.split()]})
         encoder = GPT2WordPieceEncoder(model_dir_or_name=weight_path)
         encoder.eval()
@@ -187,7 +187,7 @@ class TestGPT2WordPieceEncoder(unittest.TestCase):
 
         print(used_pairs)
         import json
-        with open('test/data_for_tests/embedding/small_gpt2/vocab.json', 'w') as f:
+        with open('tests/data_for_tests/embedding/small_gpt2/vocab.json', 'w') as f:
             new_used_vocab = {}
             for idx, key in enumerate(used_vocab.keys()):
                 new_used_vocab[key] = len(new_used_vocab)
@@ -201,12 +201,12 @@ class TestGPT2WordPieceEncoder(unittest.TestCase):
 
             json.dump(new_used_vocab, f)
 
-        with open('test/data_for_tests/embedding/small_gpt2/merges.txt', 'w') as f:
+        with open('tests/data_for_tests/embedding/small_gpt2/merges.txt', 'w') as f:
             f.write('#version: small\n')
             for k,v in sorted(sorted(used_pairs.items(), key=lambda kv:kv[1])):
                 f.write('{} {}\n'.format(k[0], k[1]))
 
-        new_tokenizer = GPT2Tokenizer.from_pretrained('test/data_for_tests/embedding/small_gpt2')
+        new_tokenizer = GPT2Tokenizer.from_pretrained('tests/data_for_tests/embedding/small_gpt2')
         new_all_tokens = []
         for sent in [sent1, sent2, sent3]:
             tokens = new_tokenizer.tokenize(sent, add_prefix_space=True)
@@ -227,21 +227,21 @@ class TestGPT2WordPieceEncoder(unittest.TestCase):
                       "n_positions": 20,
                       "vocab_size": len(new_used_vocab)
                     }
-        with open('test/data_for_tests/embedding/small_gpt2/config.json', 'w') as f:
+        with open('tests/data_for_tests/embedding/small_gpt2/config.json', 'w') as f:
             json.dump(config, f)
 
         # 生成更小的merges.txt与vocab.json, 方法是通过记录tokenizer中的值实现
         from fastNLP.modules.encoder.gpt2 import GPT2LMHeadModel, GPT2Config
 
-        config = GPT2Config.from_pretrained('test/data_for_tests/embedding/small_gpt2')
+        config = GPT2Config.from_pretrained('tests/data_for_tests/embedding/small_gpt2')
 
         model = GPT2LMHeadModel(config)
-        torch.save(model.state_dict(), 'test/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin')
+        torch.save(model.state_dict(), 'tests/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin')
         print(model(torch.LongTensor([[0,1,2,3]])))
 
     def test_gpt2_word_piece_encoder(self):
         # 主要检查可以运行
-        weight_path = 'test/data_for_tests/embedding/small_gpt2'
+        weight_path = 'tests/data_for_tests/embedding/small_gpt2'
         ds = DataSet({'words': ["this is a test sentence".split()]})
         embed = GPT2WordPieceEncoder(model_dir_or_name=weight_path, word_dropout=0.1)
         embed.index_datasets(ds, field_name='words')
@@ -256,7 +256,7 @@ class TestGPT2WordPieceEncoder(unittest.TestCase):
 
     @unittest.skipIf('TRAVIS' in os.environ, "Skip in travis")
     def test_generate(self):
-        # weight_path = 'test/data_for_tests/embedding/small_gpt2'
+        # weight_path = 'tests/data_for_tests/embedding/small_gpt2'
         weight_path = 'en'
 
         encoder = GPT2WordPieceEncoder(model_dir_or_name=weight_path, language_model=True)
diff --git a/test/embeddings/test_roberta_embedding.py b/tests/embeddings/test_roberta_embedding.py
similarity index 92%
rename from test/embeddings/test_roberta_embedding.py
rename to tests/embeddings/test_roberta_embedding.py
index 7eba264422030f429cc83b24219ee3dd04ad76e3..d4874a0beae95b973f78e4636cf6ef297576a5f1 100644
--- a/test/embeddings/test_roberta_embedding.py
+++ b/tests/embeddings/test_roberta_embedding.py
@@ -24,7 +24,7 @@ class TestRobertWordPieceEncoder(unittest.TestCase):
 
     def test_robert_word_piece_encoder(self):
         # 可正常运行即可
-        weight_path = 'test/data_for_tests/embedding/small_roberta'
+        weight_path = 'tests/data_for_tests/embedding/small_roberta'
         encoder = RobertaWordPieceEncoder(model_dir_or_name=weight_path, word_dropout=0.1)
         ds = DataSet({'words': ["this is a test . [SEP]".split()]})
         encoder.index_datasets(ds, field_name='words')
@@ -33,7 +33,7 @@ class TestRobertWordPieceEncoder(unittest.TestCase):
 
     def test_roberta_embed_eq_roberta_piece_encoder(self):
         # 主要检查一下embedding的结果与wordpieceencoder的结果是否一致
-        weight_path = 'test/data_for_tests/embedding/small_roberta'
+        weight_path = 'tests/data_for_tests/embedding/small_roberta'
         ds = DataSet({'words': ["this is a texta a sentence".split(), 'this is'.split()]})
         encoder = RobertaWordPieceEncoder(model_dir_or_name=weight_path)
         encoder.eval()
@@ -120,7 +120,7 @@ class TestRobertWordPieceEncoder(unittest.TestCase):
             used_vocab.update({t:i for t,i in zip(tokens, token_ids)})
 
         import json
-        with open('test/data_for_tests/embedding/small_roberta/vocab.json', 'w') as f:
+        with open('tests/data_for_tests/embedding/small_roberta/vocab.json', 'w') as f:
             new_used_vocab = {}
             for token in ['<s>', '<pad>', '</s>', '<unk>', '<mask>']:  # <pad>必须为1
                 new_used_vocab[token] = len(new_used_vocab)
@@ -135,7 +135,7 @@ class TestRobertWordPieceEncoder(unittest.TestCase):
                  new_used_vocab[key] = len(new_used_vocab)
             json.dump(new_used_vocab, f)
 
-        with open('test/data_for_tests/embedding/small_roberta/merges.txt', 'w') as f:
+        with open('tests/data_for_tests/embedding/small_roberta/merges.txt', 'w') as f:
             f.write('#version: tiny\n')
             for k,v in sorted(sorted(used_pairs.items(), key=lambda kv:kv[1])):
                 f.write('{} {}\n'.format(k[0], k[1]))
@@ -162,10 +162,10 @@ class TestRobertWordPieceEncoder(unittest.TestCase):
               "type_vocab_size": 1,
               "vocab_size": len(new_used_vocab)
             }
-        with open('test/data_for_tests/embedding/small_roberta/config.json', 'w') as f:
+        with open('tests/data_for_tests/embedding/small_roberta/config.json', 'w') as f:
             json.dump(config, f)
 
-        new_tokenizer = RobertaTokenizer.from_pretrained('test/data_for_tests/embedding/small_roberta')
+        new_tokenizer = RobertaTokenizer.from_pretrained('tests/data_for_tests/embedding/small_roberta')
         new_all_tokens = []
         for sent in [sent1, sent2, sent3]:
             tokens = new_tokenizer.tokenize(sent, add_prefix_space=True)
@@ -177,17 +177,17 @@ class TestRobertWordPieceEncoder(unittest.TestCase):
         # 生成更小的merges.txt与vocab.json, 方法是通过记录tokenizer中的值实现
         from fastNLP.modules.encoder.roberta import RobertaModel, BertConfig
 
-        config = BertConfig.from_json_file('test/data_for_tests/embedding/small_roberta/config.json')
+        config = BertConfig.from_json_file('tests/data_for_tests/embedding/small_roberta/config.json')
 
         model = RobertaModel(config)
-        torch.save(model.state_dict(), 'test/data_for_tests/embedding/small_roberta/small_pytorch_model.bin')
+        torch.save(model.state_dict(), 'tests/data_for_tests/embedding/small_roberta/small_pytorch_model.bin')
         print(model(torch.LongTensor([[0,1,2,3]])))
 
     def test_save_load(self):
         bert_save_test = 'roberta_save_test'
         try:
             os.makedirs(bert_save_test, exist_ok=True)
-            embed = RobertaWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_roberta', word_dropout=0.0,
+            embed = RobertaWordPieceEncoder(model_dir_or_name='tests/data_for_tests/embedding/small_roberta', word_dropout=0.0,
                                          layers='-2')
             ds = DataSet({'words': ["this is a test . [SEP]".split()]})
             embed.index_datasets(ds, field_name='words')
@@ -204,7 +204,7 @@ class TestRobertWordPieceEncoder(unittest.TestCase):
 
 class TestRobertaEmbedding(unittest.TestCase):
     def test_roberta_embedding_1(self):
-        weight_path = 'test/data_for_tests/embedding/small_roberta'
+        weight_path = 'tests/data_for_tests/embedding/small_roberta'
         vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInRoberta".split())
         embed = RobertaEmbedding(vocab, model_dir_or_name=weight_path, word_dropout=0.1)
         requires_grad = embed.requires_grad
@@ -224,7 +224,7 @@ class TestRobertaEmbedding(unittest.TestCase):
     def test_roberta_ebembedding_2(self):
         # 测试only_use_pretrain_vocab与truncate_embed是否正常工作
         Embedding = RobertaEmbedding
-        weight_path = 'test/data_for_tests/embedding/small_roberta'
+        weight_path = 'tests/data_for_tests/embedding/small_roberta'
         vocab = Vocabulary().add_word_lst("this is a texta and".split())
         embed1 = Embedding(vocab, model_dir_or_name=weight_path, layers=list(range(3)),
                               only_use_pretrain_bpe=True, truncate_embed=True, min_freq=1)
@@ -266,7 +266,7 @@ class TestRobertaEmbedding(unittest.TestCase):
         try:
             os.makedirs(bert_save_test, exist_ok=True)
             vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInBERT".split())
-            embed = RobertaEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_roberta',
+            embed = RobertaEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_roberta',
                                      word_dropout=0.1,
                                      auto_truncate=True)
             embed.save(bert_save_test)
diff --git a/test/embeddings/test_stack_embeddings.py b/tests/embeddings/test_stack_embeddings.py
similarity index 100%
rename from test/embeddings/test_stack_embeddings.py
rename to tests/embeddings/test_stack_embeddings.py
diff --git a/test/embeddings/test_static_embedding.py b/tests/embeddings/test_static_embedding.py
similarity index 92%
rename from test/embeddings/test_static_embedding.py
rename to tests/embeddings/test_static_embedding.py
index 2b10a2d0084392e220cf4609385f4867b4f322c4..90519338b13427908571b8f2c4e57836b9a63bd5 100644
--- a/test/embeddings/test_static_embedding.py
+++ b/tests/embeddings/test_static_embedding.py
@@ -10,7 +10,7 @@ class TestLoad(unittest.TestCase):
     def test_norm1(self):
         # 测试只对可以找到的norm
         vocab = Vocabulary().add_word_lst(['the', 'a', 'notinfile'])
-        embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/'
+        embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/'
                                                          'glove.6B.50d_test.txt',
                                 only_norm_found_vector=True)
         self.assertEqual(round(torch.norm(embed(torch.LongTensor([[2]]))).item(), 4), 1)
@@ -19,7 +19,7 @@ class TestLoad(unittest.TestCase):
     def test_norm2(self):
         # 测试对所有都norm
         vocab = Vocabulary().add_word_lst(['the', 'a', 'notinfile'])
-        embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/'
+        embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/'
                                                          'glove.6B.50d_test.txt',
                                 normalize=True)
         self.assertEqual(round(torch.norm(embed(torch.LongTensor([[2]]))).item(), 4), 1)
@@ -50,13 +50,13 @@ class TestLoad(unittest.TestCase):
                 v2 = embed_dict[word]
                 for v1i, v2i in zip(v1, v2):
                     self.assertAlmostEqual(v1i, v2i, places=4)
-        embed_dict = read_static_embed('test/data_for_tests/embedding/small_static_embedding/'
+        embed_dict = read_static_embed('tests/data_for_tests/embedding/small_static_embedding/'
                                                          'glove.6B.50d_test.txt')
 
         # 测试是否只使用pretrain的word
         vocab = Vocabulary().add_word_lst(['the', 'a', 'notinfile'])
         vocab.add_word('of', no_create_entry=True)
-        embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/'
+        embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/'
                                                          'glove.6B.50d_test.txt',
                                 only_use_pretrain_word=True)
         #   notinfile应该被置为unk
@@ -66,13 +66,13 @@ class TestLoad(unittest.TestCase):
         # 测试在大小写情况下的使用
         vocab = Vocabulary().add_word_lst(['The', 'a', 'notinfile'])
         vocab.add_word('Of', no_create_entry=True)
-        embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/'
+        embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/'
                                                          'glove.6B.50d_test.txt',
                                 only_use_pretrain_word=True)
         check_word_unk(['The', 'Of', 'notinfile'], vocab, embed)  # 这些词应该找不到
         check_vector_equal(['a'], vocab, embed, embed_dict)
 
-        embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/'
+        embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/'
                                                          'glove.6B.50d_test.txt',
                                 only_use_pretrain_word=True, lower=True)
         check_vector_equal(['The', 'Of', 'a'], vocab, embed, embed_dict, lower=True)
@@ -82,7 +82,7 @@ class TestLoad(unittest.TestCase):
         vocab = Vocabulary().add_word_lst(['The', 'a', 'notinfile1', 'A', 'notinfile2', 'notinfile2'])
         vocab.add_word('Of', no_create_entry=True)
 
-        embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/'
+        embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/'
                                                          'glove.6B.50d_test.txt',
                                 only_use_pretrain_word=True, lower=True, min_freq=2, only_train_min_freq=True)
 
@@ -92,12 +92,12 @@ class TestLoad(unittest.TestCase):
     def test_sequential_index(self):
         # 当不存在no_create_entry时，words_to_words应该是顺序的
         vocab = Vocabulary().add_word_lst(['The', 'a', 'notinfile1', 'A', 'notinfile2', 'notinfile2'])
-        embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/'
+        embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/'
                                                          'glove.6B.50d_test.txt')
         for index,i in enumerate(embed.words_to_words):
             assert index==i
 
-        embed_dict = read_static_embed('test/data_for_tests/embedding/small_static_embedding/'
+        embed_dict = read_static_embed('tests/data_for_tests/embedding/small_static_embedding/'
                                                          'glove.6B.50d_test.txt')
 
         for word, index in vocab:
@@ -116,7 +116,7 @@ class TestLoad(unittest.TestCase):
 
             vocab = Vocabulary().add_word_lst(['The', 'a', 'notinfile1', 'A'])
             vocab.add_word_lst(['notinfile2', 'notinfile2'], no_create_entry=True)
-            embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/'
+            embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/'
                                                              'glove.6B.50d_test.txt')
             embed.save(static_test_folder)
             load_embed = StaticEmbedding.load(static_test_folder)
@@ -125,7 +125,7 @@ class TestLoad(unittest.TestCase):
 
             # 测试不包含no_create_entry
             vocab = Vocabulary().add_word_lst(['The', 'a', 'notinfile1', 'A'])
-            embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/'
+            embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/'
                                                              'glove.6B.50d_test.txt')
             embed.save(static_test_folder)
             load_embed = StaticEmbedding.load(static_test_folder)
@@ -134,7 +134,7 @@ class TestLoad(unittest.TestCase):
 
             # 测试lower, min_freq
             vocab = Vocabulary().add_word_lst(['The', 'the', 'the', 'A', 'a', 'B'])
-            embed = StaticEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_static_embedding/'
+            embed = StaticEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_static_embedding/'
                                                              'glove.6B.50d_test.txt', min_freq=2, lower=True)
             embed.save(static_test_folder)
             load_embed = StaticEmbedding.load(static_test_folder)
diff --git a/test/embeddings/test_transformer_embedding.py b/tests/embeddings/test_transformer_embedding.py
similarity index 100%
rename from test/embeddings/test_transformer_embedding.py
rename to tests/embeddings/test_transformer_embedding.py
diff --git a/test/io/__init__.py b/tests/io/__init__.py
similarity index 100%
rename from test/io/__init__.py
rename to tests/io/__init__.py
diff --git a/test/io/loader/test_classification_loader.py b/tests/io/loader/test_classification_loader.py
similarity index 66%
rename from test/io/loader/test_classification_loader.py
rename to tests/io/loader/test_classification_loader.py
index 6ed8eb157279f8e1941a04dd1a6647759497c4a1..836e24e4c8cd1913c6568cfa2588ac2d7725e821 100644
--- a/test/io/loader/test_classification_loader.py
+++ b/tests/io/loader/test_classification_loader.py
@@ -23,14 +23,14 @@ class TestDownload(unittest.TestCase):
 class TestLoad(unittest.TestCase):
     def test_process_from_file(self):
         data_set_dict = {
-            'yelp.p': ('test/data_for_tests/io/yelp_review_polarity', YelpPolarityLoader, (6, 6, 6), False),
-            'yelp.f': ('test/data_for_tests/io/yelp_review_full', YelpFullLoader, (6, 6, 6), False),
-            'sst-2': ('test/data_for_tests/io/SST-2', SST2Loader, (5, 5, 5), True),
-            'sst': ('test/data_for_tests/io/SST', SSTLoader, (6, 6, 6), False),
-            'imdb': ('test/data_for_tests/io/imdb', IMDBLoader, (6, 6, 6), False),
-            'ChnSentiCorp': ('test/data_for_tests/io/ChnSentiCorp', ChnSentiCorpLoader, (6, 6, 6), False),
-            'THUCNews': ('test/data_for_tests/io/THUCNews', THUCNewsLoader, (9, 9, 9), False),
-            'WeiboSenti100k': ('test/data_for_tests/io/WeiboSenti100k', WeiboSenti100kLoader, (6, 7, 6), False),
+            'yelp.p': ('tests/data_for_tests/io/yelp_review_polarity', YelpPolarityLoader, (6, 6, 6), False),
+            'yelp.f': ('tests/data_for_tests/io/yelp_review_full', YelpFullLoader, (6, 6, 6), False),
+            'sst-2': ('tests/data_for_tests/io/SST-2', SST2Loader, (5, 5, 5), True),
+            'sst': ('tests/data_for_tests/io/SST', SSTLoader, (6, 6, 6), False),
+            'imdb': ('tests/data_for_tests/io/imdb', IMDBLoader, (6, 6, 6), False),
+            'ChnSentiCorp': ('tests/data_for_tests/io/ChnSentiCorp', ChnSentiCorpLoader, (6, 6, 6), False),
+            'THUCNews': ('tests/data_for_tests/io/THUCNews', THUCNewsLoader, (9, 9, 9), False),
+            'WeiboSenti100k': ('tests/data_for_tests/io/WeiboSenti100k', WeiboSenti100kLoader, (6, 7, 6), False),
         }
         for k, v in data_set_dict.items():
             path, loader, data_set, warns = v
diff --git a/test/io/loader/test_conll_loader.py b/tests/io/loader/test_conll_loader.py
similarity index 81%
rename from test/io/loader/test_conll_loader.py
rename to tests/io/loader/test_conll_loader.py
index bf0ebb474ddcc45f139faad51c72e0b65ac79cdc..87ea57c3224da68b0e1a689896318aa9a897e700 100644
--- a/test/io/loader/test_conll_loader.py
+++ b/tests/io/loader/test_conll_loader.py
@@ -27,12 +27,12 @@ class TestWeiboNER(unittest.TestCase):
 
 class TestConll2003Loader(unittest.TestCase):
     def test_load(self):
-        Conll2003Loader()._load('test/data_for_tests/conll_2003_example.txt')
+        Conll2003Loader()._load('tests/data_for_tests/conll_2003_example.txt')
 
 
 class TestConllLoader(unittest.TestCase):
     def test_conll(self):
-        db = Conll2003Loader().load('test/data_for_tests/io/conll2003')
+        db = Conll2003Loader().load('tests/data_for_tests/io/conll2003')
         print(db)
 
 class TestConllLoader(unittest.TestCase):
@@ -40,5 +40,5 @@ class TestConllLoader(unittest.TestCase):
         headers = [
             'raw_words',  'ner',
         ]
-        db = ConllLoader(headers = headers,sep="\n").load('test/data_for_tests/io/MSRA_NER')
+        db = ConllLoader(headers = headers,sep="\n").load('tests/data_for_tests/io/MSRA_NER')
         print(db)
diff --git a/test/io/loader/test_coreference_loader.py b/tests/io/loader/test_coreference_loader.py
similarity index 93%
rename from test/io/loader/test_coreference_loader.py
rename to tests/io/loader/test_coreference_loader.py
index 02f3a1c5d727390febc1219dfb4dab85815854ef..50f27e39c5908b566053c504938ead9f07ca7fbd 100644
--- a/test/io/loader/test_coreference_loader.py
+++ b/tests/io/loader/test_coreference_loader.py
@@ -5,7 +5,7 @@ import unittest
 class TestCR(unittest.TestCase):
     def test_load(self):
 
-        test_root = "test/data_for_tests/io/coreference/"
+        test_root = "tests/data_for_tests/io/coreference/"
         train_path = test_root+"coreference_train.json"
         dev_path = test_root+"coreference_dev.json"
         test_path = test_root+"coreference_test.json"
diff --git a/test/io/loader/test_cws_loader.py b/tests/io/loader/test_cws_loader.py
similarity index 92%
rename from test/io/loader/test_cws_loader.py
rename to tests/io/loader/test_cws_loader.py
index 80ca04069bd0555d74f59d80a53c29f4ad82b7c8..e17d0e0d357f845183fb17dc1ff6ec3959bc491a 100644
--- a/test/io/loader/test_cws_loader.py
+++ b/tests/io/loader/test_cws_loader.py
@@ -19,6 +19,6 @@ class TestRunCWSLoader(unittest.TestCase):
         for dataset_name in dataset_names:
             with self.subTest(dataset_name=dataset_name):
                 data_bundle = CWSLoader(dataset_name=dataset_name).load(
-                    f'test/data_for_tests/io/cws_{dataset_name}'
+                    f'tests/data_for_tests/io/cws_{dataset_name}'
                 )
                 print(data_bundle)
diff --git a/test/io/loader/test_matching_loader.py b/tests/io/loader/test_matching_loader.py
similarity index 67%
rename from test/io/loader/test_matching_loader.py
rename to tests/io/loader/test_matching_loader.py
index 30ace410a561bcdcaf952a2c70e83c0e3313ac39..6c7059da36e182bc0de766ec8d4732c0c645d656 100644
--- a/test/io/loader/test_matching_loader.py
+++ b/tests/io/loader/test_matching_loader.py
@@ -25,14 +25,14 @@ class TestMatchingDownload(unittest.TestCase):
 class TestMatchingLoad(unittest.TestCase):
     def test_load(self):
         data_set_dict = {
-            'RTE': ('test/data_for_tests/io/RTE', RTELoader, (5, 5, 5), True),
-            'SNLI': ('test/data_for_tests/io/SNLI', SNLILoader, (5, 5, 5), False),
-            'QNLI': ('test/data_for_tests/io/QNLI', QNLILoader, (5, 5, 5), True),
-            'MNLI': ('test/data_for_tests/io/MNLI', MNLILoader, (5, 5, 5, 5, 6), True),
-            'Quora': ('test/data_for_tests/io/Quora', QuoraLoader, (2, 2, 2), False),
-            'BQCorpus': ('test/data_for_tests/io/BQCorpus', BQCorpusLoader, (5, 5, 5), False),
-            'XNLI': ('test/data_for_tests/io/XNLI', CNXNLILoader, (6, 6, 8), False),
-            'LCQMC': ('test/data_for_tests/io/LCQMC', LCQMCLoader, (6, 5, 6), False),
+            'RTE': ('tests/data_for_tests/io/RTE', RTELoader, (5, 5, 5), True),
+            'SNLI': ('tests/data_for_tests/io/SNLI', SNLILoader, (5, 5, 5), False),
+            'QNLI': ('tests/data_for_tests/io/QNLI', QNLILoader, (5, 5, 5), True),
+            'MNLI': ('tests/data_for_tests/io/MNLI', MNLILoader, (5, 5, 5, 5, 6), True),
+            'Quora': ('tests/data_for_tests/io/Quora', QuoraLoader, (2, 2, 2), False),
+            'BQCorpus': ('tests/data_for_tests/io/BQCorpus', BQCorpusLoader, (5, 5, 5), False),
+            'XNLI': ('tests/data_for_tests/io/XNLI', CNXNLILoader, (6, 6, 8), False),
+            'LCQMC': ('tests/data_for_tests/io/LCQMC', LCQMCLoader, (6, 5, 6), False),
         }
         for k, v in data_set_dict.items():
             path, loader, instance, warns = v
diff --git a/test/io/loader/test_qa_loader.py b/tests/io/loader/test_qa_loader.py
similarity index 66%
rename from test/io/loader/test_qa_loader.py
rename to tests/io/loader/test_qa_loader.py
index eea067cd15b6a1a834f8fc7425d00d5f15f0a20c..99a504c5d73d2e3b7763d6c760af840b0f98c28f 100644
--- a/test/io/loader/test_qa_loader.py
+++ b/tests/io/loader/test_qa_loader.py
@@ -5,10 +5,10 @@ from fastNLP.io.loader.qa import CMRC2018Loader
 class TestCMRC2018Loader(unittest.TestCase):
     def test__load(self):
         loader = CMRC2018Loader()
-        dataset = loader._load('test/data_for_tests/io/cmrc/train.json')
+        dataset = loader._load('tests/data_for_tests/io/cmrc/train.json')
         print(dataset)
 
     def test_load(self):
         loader = CMRC2018Loader()
-        data_bundle = loader.load('test/data_for_tests/io/cmrc/')
+        data_bundle = loader.load('tests/data_for_tests/io/cmrc/')
         print(data_bundle)
diff --git a/test/io/pipe/test_classification.py b/tests/io/pipe/test_classification.py
similarity index 83%
rename from test/io/pipe/test_classification.py
rename to tests/io/pipe/test_classification.py
index 8ebdb2dfb5902af9e41e153537a8e5019653866e..e3200a1adee4d258a8c36a3fb26639c5f0fc5fc7 100644
--- a/test/io/pipe/test_classification.py
+++ b/tests/io/pipe/test_classification.py
@@ -20,7 +20,7 @@ class TestClassificationPipe(unittest.TestCase):
 class TestRunPipe(unittest.TestCase):
     def test_load(self):
         for pipe in [IMDBPipe]:
-            data_bundle = pipe(tokenizer='raw').process_from_file('test/data_for_tests/io/imdb')
+            data_bundle = pipe(tokenizer='raw').process_from_file('tests/data_for_tests/io/imdb')
             print(data_bundle)
 
 
@@ -37,35 +37,35 @@ class TestCNClassificationPipe(unittest.TestCase):
 class TestRunClassificationPipe(unittest.TestCase):
     def test_process_from_file(self):
         data_set_dict = {
-            'yelp.p': ('test/data_for_tests/io/yelp_review_polarity', YelpPolarityPipe,
+            'yelp.p': ('tests/data_for_tests/io/yelp_review_polarity', YelpPolarityPipe,
                        {'train': 6, 'dev': 6, 'test': 6}, {'words': 1176, 'target': 2},
                        False),
-            'yelp.f': ('test/data_for_tests/io/yelp_review_full', YelpFullPipe,
+            'yelp.f': ('tests/data_for_tests/io/yelp_review_full', YelpFullPipe,
                        {'train': 6, 'dev': 6, 'test': 6}, {'words': 1166, 'target': 5},
                        False),
-            'sst-2': ('test/data_for_tests/io/SST-2', SST2Pipe,
+            'sst-2': ('tests/data_for_tests/io/SST-2', SST2Pipe,
                       {'train': 5, 'dev': 5, 'test': 5}, {'words': 139, 'target': 2},
                       True),
-            'sst': ('test/data_for_tests/io/SST', SSTPipe,
+            'sst': ('tests/data_for_tests/io/SST', SSTPipe,
                     {'train': 354, 'dev': 6, 'test': 6}, {'words': 232, 'target': 5},
                     False),
-            'imdb': ('test/data_for_tests/io/imdb', IMDBPipe,
+            'imdb': ('tests/data_for_tests/io/imdb', IMDBPipe,
                      {'train': 6, 'dev': 6, 'test': 6}, {'words': 1670, 'target': 2},
                      False),
-            'ag': ('test/data_for_tests/io/ag', AGsNewsPipe,
+            'ag': ('tests/data_for_tests/io/ag', AGsNewsPipe,
                    {'train': 4, 'test': 5}, {'words': 257, 'target': 4},
                    False),
-            'dbpedia': ('test/data_for_tests/io/dbpedia', DBPediaPipe,
+            'dbpedia': ('tests/data_for_tests/io/dbpedia', DBPediaPipe,
                         {'train': 14, 'test': 5}, {'words': 496, 'target': 14},
                         False),
-            'ChnSentiCorp': ('test/data_for_tests/io/ChnSentiCorp', ChnSentiCorpPipe,
+            'ChnSentiCorp': ('tests/data_for_tests/io/ChnSentiCorp', ChnSentiCorpPipe,
                              {'train': 6, 'dev': 6, 'test': 6},
                              {'chars': 529, 'bigrams': 1296, 'trigrams': 1483, 'target': 2},
                              False),
-            'Chn-THUCNews': ('test/data_for_tests/io/THUCNews', THUCNewsPipe,
+            'Chn-THUCNews': ('tests/data_for_tests/io/THUCNews', THUCNewsPipe,
                              {'train': 9, 'dev': 9, 'test': 9}, {'chars': 1864, 'target': 9},
                              False),
-            'Chn-WeiboSenti100k': ('test/data_for_tests/io/WeiboSenti100k', WeiboSenti100kPipe,
+            'Chn-WeiboSenti100k': ('tests/data_for_tests/io/WeiboSenti100k', WeiboSenti100kPipe,
                                    {'train': 6, 'dev': 6, 'test': 7}, {'chars': 452, 'target': 2},
                                    False),
         }
diff --git a/test/io/pipe/test_conll.py b/tests/io/pipe/test_conll.py
similarity index 86%
rename from test/io/pipe/test_conll.py
rename to tests/io/pipe/test_conll.py
index ad41ae18061eb65768084787174126fb5151581d..30d5b48fa43673520a7cbd59df6fe2ab00951ae7 100644
--- a/test/io/pipe/test_conll.py
+++ b/tests/io/pipe/test_conll.py
@@ -21,7 +21,7 @@ class TestRunPipe(unittest.TestCase):
         for pipe in [Conll2003Pipe, Conll2003NERPipe]:
             with self.subTest(pipe=pipe):
                 print(pipe)
-                data_bundle = pipe().process_from_file('test/data_for_tests/conll_2003_example.txt')
+                data_bundle = pipe().process_from_file('tests/data_for_tests/conll_2003_example.txt')
                 print(data_bundle)
 
 
@@ -35,18 +35,18 @@ class TestNERPipe(unittest.TestCase):
         for k, v in data_dict.items():
             pipe = v
             with self.subTest(pipe=pipe):
-                data_bundle = pipe(bigrams=True, trigrams=True).process_from_file(f'test/data_for_tests/io/{k}')
+                data_bundle = pipe(bigrams=True, trigrams=True).process_from_file(f'tests/data_for_tests/io/{k}')
                 print(data_bundle)
-                data_bundle = pipe(encoding_type='bioes').process_from_file(f'test/data_for_tests/io/{k}')
+                data_bundle = pipe(encoding_type='bioes').process_from_file(f'tests/data_for_tests/io/{k}')
                 print(data_bundle)
 
 
 class TestConll2003Pipe(unittest.TestCase):
     def test_conll(self):
         with self.assertWarns(Warning):
-            data_bundle = Conll2003Pipe().process_from_file('test/data_for_tests/io/conll2003')
+            data_bundle = Conll2003Pipe().process_from_file('tests/data_for_tests/io/conll2003')
         print(data_bundle)
 
     def test_OntoNotes(self):
-        data_bundle = OntoNotesNERPipe().process_from_file('test/data_for_tests/io/OntoNotes')
+        data_bundle = OntoNotesNERPipe().process_from_file('tests/data_for_tests/io/OntoNotes')
         print(data_bundle)
diff --git a/test/io/pipe/test_coreference.py b/tests/io/pipe/test_coreference.py
similarity index 94%
rename from test/io/pipe/test_coreference.py
rename to tests/io/pipe/test_coreference.py
index 3a492419253dba075d0f728a73ba1d59f2113ff2..784f695418b1cb18c484c2d6b761766dbb774e38 100644
--- a/test/io/pipe/test_coreference.py
+++ b/tests/io/pipe/test_coreference.py
@@ -11,7 +11,7 @@ class TestCR(unittest.TestCase):
             char_path = None
         config = Config()
 
-        file_root_path = "test/data_for_tests/io/coreference/"
+        file_root_path = "tests/data_for_tests/io/coreference/"
         train_path = file_root_path + "coreference_train.json"
         dev_path = file_root_path + "coreference_dev.json"
         test_path = file_root_path + "coreference_test.json"
diff --git a/test/io/pipe/test_cws.py b/tests/io/pipe/test_cws.py
similarity index 90%
rename from test/io/pipe/test_cws.py
rename to tests/io/pipe/test_cws.py
index f3a95596c7306e52bdeaaabe11857c4a85d1a196..ef50907f3103f4ca1f54e86b06eb3697f5bbad55 100644
--- a/test/io/pipe/test_cws.py
+++ b/tests/io/pipe/test_cws.py
@@ -31,11 +31,11 @@ class TestRunCWSPipe(unittest.TestCase):
         for dataset_name in dataset_names:
             with self.subTest(dataset_name=dataset_name):
                 data_bundle = CWSPipe(bigrams=True, trigrams=True).\
-                    process_from_file(f'test/data_for_tests/io/cws_{dataset_name}')
+                    process_from_file(f'tests/data_for_tests/io/cws_{dataset_name}')
                 print(data_bundle)
 
     def test_replace_number(self):
         data_bundle = CWSPipe(bigrams=True, replace_num_alpha=True).\
-                    process_from_file(f'test/data_for_tests/io/cws_pku')
+                    process_from_file(f'tests/data_for_tests/io/cws_pku')
         for word in ['<', '>', '<NUM>']:
             self.assertNotEqual(data_bundle.get_vocab('chars').to_index(word), 1)
diff --git a/test/io/pipe/test_matching.py b/tests/io/pipe/test_matching.py
similarity index 83%
rename from test/io/pipe/test_matching.py
rename to tests/io/pipe/test_matching.py
index 929936906921f8f4410f05675164cf6184dc89a3..23f450db4ca92ddf7cea11244a160590b1fce510 100644
--- a/test/io/pipe/test_matching.py
+++ b/tests/io/pipe/test_matching.py
@@ -33,13 +33,13 @@ class TestRunMatchingPipe(unittest.TestCase):
 
     def test_load(self):
         data_set_dict = {
-            'RTE': ('test/data_for_tests/io/RTE', RTEPipe, RTEBertPipe, (5, 5, 5), (449, 2), True),
-            'SNLI': ('test/data_for_tests/io/SNLI', SNLIPipe, SNLIBertPipe, (5, 5, 5), (110, 3), False),
-            'QNLI': ('test/data_for_tests/io/QNLI', QNLIPipe, QNLIBertPipe, (5, 5, 5), (372, 2), True),
-            'MNLI': ('test/data_for_tests/io/MNLI', MNLIPipe, MNLIBertPipe, (5, 5, 5, 5, 6), (459, 3), True),
-            'BQCorpus': ('test/data_for_tests/io/BQCorpus', BQCorpusPipe, BQCorpusBertPipe, (5, 5, 5), (32, 2), False),
-            'XNLI': ('test/data_for_tests/io/XNLI', CNXNLIPipe, CNXNLIBertPipe, (6, 6, 8), (39, 3), False),
-            'LCQMC': ('test/data_for_tests/io/LCQMC', LCQMCPipe, LCQMCBertPipe, (6, 5, 6), (36, 2), False),
+            'RTE': ('tests/data_for_tests/io/RTE', RTEPipe, RTEBertPipe, (5, 5, 5), (449, 2), True),
+            'SNLI': ('tests/data_for_tests/io/SNLI', SNLIPipe, SNLIBertPipe, (5, 5, 5), (110, 3), False),
+            'QNLI': ('tests/data_for_tests/io/QNLI', QNLIPipe, QNLIBertPipe, (5, 5, 5), (372, 2), True),
+            'MNLI': ('tests/data_for_tests/io/MNLI', MNLIPipe, MNLIBertPipe, (5, 5, 5, 5, 6), (459, 3), True),
+            'BQCorpus': ('tests/data_for_tests/io/BQCorpus', BQCorpusPipe, BQCorpusBertPipe, (5, 5, 5), (32, 2), False),
+            'XNLI': ('tests/data_for_tests/io/XNLI', CNXNLIPipe, CNXNLIBertPipe, (6, 6, 8), (39, 3), False),
+            'LCQMC': ('tests/data_for_tests/io/LCQMC', LCQMCPipe, LCQMCBertPipe, (6, 5, 6), (36, 2), False),
         }
         for k, v in data_set_dict.items():
             path, pipe1, pipe2, data_set, vocab, warns = v
@@ -76,7 +76,7 @@ class TestRunMatchingPipe(unittest.TestCase):
 
     def test_spacy(self):
         data_set_dict = {
-            'Quora': ('test/data_for_tests/io/Quora', QuoraPipe, QuoraBertPipe, (2, 2, 2), (93, 2)),
+            'Quora': ('tests/data_for_tests/io/Quora', QuoraPipe, QuoraBertPipe, (2, 2, 2), (93, 2)),
             }
         for k, v in data_set_dict.items():
             path, pipe1, pipe2, data_set, vocab = v
diff --git a/test/io/pipe/test_qa.py b/tests/io/pipe/test_qa.py
similarity index 92%
rename from test/io/pipe/test_qa.py
rename to tests/io/pipe/test_qa.py
index ad6581f957bec1c71f03532215071ba569855d73..db2245fcd53eacd2eb2c66226eacc9fecce38e35 100644
--- a/test/io/pipe/test_qa.py
+++ b/tests/io/pipe/test_qa.py
@@ -6,7 +6,7 @@ from fastNLP.io.loader.qa import CMRC2018Loader
 
 class CMRC2018PipeTest(unittest.TestCase):
     def test_process(self):
-        data_bundle = CMRC2018Loader().load('test/data_for_tests/io/cmrc/')
+        data_bundle = CMRC2018Loader().load('tests/data_for_tests/io/cmrc/')
         pipe = CMRC2018BertPipe()
         data_bundle = pipe.process(data_bundle)
 
diff --git a/test/io/pipe/test_summary.py b/tests/io/pipe/test_summary.py
similarity index 96%
rename from test/io/pipe/test_summary.py
rename to tests/io/pipe/test_summary.py
index 32508a15450d3fcb81c3da56c8c7d886f58911a8..03d92214a9ea103b5c0c73a9f2ed36f01516195a 100644
--- a/test/io/pipe/test_summary.py
+++ b/tests/io/pipe/test_summary.py
@@ -27,9 +27,9 @@ from fastNLP.io.pipe.summarization import ExtCNNDMPipe
 class TestRunExtCNNDMPipe(unittest.TestCase):
 
     def test_load(self):
-        data_dir = 'test/data_for_tests/io/cnndm'
+        data_dir = 'tests/data_for_tests/io/cnndm'
         vocab_size = 100000
-        VOCAL_FILE = 'test/data_for_tests/io/cnndm/vocab'
+        VOCAL_FILE = 'tests/data_for_tests/io/cnndm/vocab'
         sent_max_len = 100
         doc_max_timesteps = 50
         dbPipe = ExtCNNDMPipe(vocab_size=vocab_size,
diff --git a/test/io/test_embed_loader.py b/tests/io/test_embed_loader.py
similarity index 83%
rename from test/io/test_embed_loader.py
rename to tests/io/test_embed_loader.py
index 70b367ec0332498b61d75ea08c297351d7d948ef..7c8abc77bb63a8e32f7dc6e92f9c1d77b0b4e38b 100644
--- a/test/io/test_embed_loader.py
+++ b/tests/io/test_embed_loader.py
@@ -8,8 +8,8 @@ from fastNLP.io import EmbedLoader
 class TestEmbedLoader(unittest.TestCase):
     def test_load_with_vocab(self):
         vocab = Vocabulary()
-        glove = "test/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt"
-        word2vec = "test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt"
+        glove = "tests/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt"
+        word2vec = "tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt"
         vocab.add_word('the')
         vocab.add_word('none')
         g_m = EmbedLoader.load_with_vocab(glove, vocab)
@@ -20,8 +20,8 @@ class TestEmbedLoader(unittest.TestCase):
     
     def test_load_without_vocab(self):
         words = ['the', 'of', 'in', 'a', 'to', 'and']
-        glove = "test/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt"
-        word2vec = "test/data_for_tests/embedding/small_static_embedding/word2vec_test.txt"
+        glove = "tests/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt"
+        word2vec = "tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt"
         g_m, vocab = EmbedLoader.load_without_vocab(glove)
         self.assertEqual(g_m.shape, (8, 50))
         for word in words:
diff --git a/test/io/test_model_io.py b/tests/io/test_model_io.py
similarity index 100%
rename from test/io/test_model_io.py
rename to tests/io/test_model_io.py
diff --git a/test/models/__init__.py b/tests/models/__init__.py
similarity index 100%
rename from test/models/__init__.py
rename to tests/models/__init__.py
diff --git a/test/models/model_runner.py b/tests/models/model_runner.py
similarity index 100%
rename from test/models/model_runner.py
rename to tests/models/model_runner.py
diff --git a/test/models/test_bert.py b/tests/models/test_bert.py
similarity index 85%
rename from test/models/test_bert.py
rename to tests/models/test_bert.py
index c3ba94541c8ff254c72a18b9aaeb1802c76dbe18..58178bffd0369cc2096e48a9b46d9c7c0e55a102 100644
--- a/test/models/test_bert.py
+++ b/tests/models/test_bert.py
@@ -11,7 +11,7 @@ from fastNLP.embeddings.bert_embedding import BertEmbedding
 class TestBert(unittest.TestCase):
     def test_bert_1(self):
         vocab = Vocabulary().add_word_lst("this is a test .".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               include_cls_sep=True)
 
         model = BertForSequenceClassification(embed, 2)
@@ -30,7 +30,7 @@ class TestBert(unittest.TestCase):
 
     def test_bert_1_w(self):
         vocab = Vocabulary().add_word_lst("this is a test .".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               include_cls_sep=False)
 
         with self.assertWarns(Warning):
@@ -46,7 +46,7 @@ class TestBert(unittest.TestCase):
     def test_bert_2(self):
 
         vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               include_cls_sep=True)
 
         model = BertForMultipleChoice(embed, 2)
@@ -62,7 +62,7 @@ class TestBert(unittest.TestCase):
     def test_bert_2_w(self):
 
         vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               include_cls_sep=False)
 
         with self.assertWarns(Warning):
@@ -79,7 +79,7 @@ class TestBert(unittest.TestCase):
     def test_bert_3(self):
 
         vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               include_cls_sep=False)
         model = BertForTokenClassification(embed, 7)
 
@@ -93,7 +93,7 @@ class TestBert(unittest.TestCase):
     def test_bert_3_w(self):
 
         vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               include_cls_sep=True)
 
         with self.assertWarns(Warning):
@@ -108,7 +108,7 @@ class TestBert(unittest.TestCase):
 
     def test_bert_4(self):
         vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               include_cls_sep=False)
         model = BertForQuestionAnswering(embed)
 
@@ -126,12 +126,12 @@ class TestBert(unittest.TestCase):
         from fastNLP.io import CMRC2018BertPipe
         from fastNLP import Trainer
 
-        data_bundle = CMRC2018BertPipe().process_from_file('test/data_for_tests/io/cmrc')
+        data_bundle = CMRC2018BertPipe().process_from_file('tests/data_for_tests/io/cmrc')
         data_bundle.rename_field('chars', 'words')
         train_data = data_bundle.get_dataset('train')
         vocab = data_bundle.get_vocab('words')
 
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               include_cls_sep=False, auto_truncate=True)
         model = BertForQuestionAnswering(embed)
         loss = CMRC2018Loss()
@@ -142,7 +142,7 @@ class TestBert(unittest.TestCase):
     def test_bert_5(self):
 
         vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               include_cls_sep=True)
         model = BertForSentenceMatching(embed)
 
@@ -156,7 +156,7 @@ class TestBert(unittest.TestCase):
     def test_bert_5_w(self):
 
         vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
-        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                               include_cls_sep=False)
 
         with self.assertWarns(Warning):
diff --git a/test/models/test_biaffine_parser.py b/tests/models/test_biaffine_parser.py
similarity index 100%
rename from test/models/test_biaffine_parser.py
rename to tests/models/test_biaffine_parser.py
diff --git a/test/models/test_cnn_text_classification.py b/tests/models/test_cnn_text_classification.py
similarity index 100%
rename from test/models/test_cnn_text_classification.py
rename to tests/models/test_cnn_text_classification.py
diff --git a/test/models/test_seq2seq_generator.py b/tests/models/test_seq2seq_generator.py
similarity index 100%
rename from test/models/test_seq2seq_generator.py
rename to tests/models/test_seq2seq_generator.py
diff --git a/test/models/test_seq2seq_model.py b/tests/models/test_seq2seq_model.py
similarity index 100%
rename from test/models/test_seq2seq_model.py
rename to tests/models/test_seq2seq_model.py
diff --git a/test/models/test_sequence_labeling.py b/tests/models/test_sequence_labeling.py
similarity index 100%
rename from test/models/test_sequence_labeling.py
rename to tests/models/test_sequence_labeling.py
diff --git a/test/models/test_snli.py b/tests/models/test_snli.py
similarity index 100%
rename from test/models/test_snli.py
rename to tests/models/test_snli.py
diff --git a/test/models/test_star_trans.py b/tests/models/test_star_trans.py
similarity index 100%
rename from test/models/test_star_trans.py
rename to tests/models/test_star_trans.py
diff --git a/test/modules/__init__.py b/tests/modules/__init__.py
similarity index 100%
rename from test/modules/__init__.py
rename to tests/modules/__init__.py
diff --git a/test/modules/decoder/__init__.py b/tests/modules/decoder/__init__.py
similarity index 100%
rename from test/modules/decoder/__init__.py
rename to tests/modules/decoder/__init__.py
diff --git a/test/modules/decoder/test_CRF.py b/tests/modules/decoder/test_CRF.py
similarity index 99%
rename from test/modules/decoder/test_CRF.py
rename to tests/modules/decoder/test_CRF.py
index 55548a41dc6be0068417f72ba880d63d905acb30..adac3c4019c905c944302f29944a17010dbd1a38 100644
--- a/test/modules/decoder/test_CRF.py
+++ b/tests/modules/decoder/test_CRF.py
@@ -223,7 +223,7 @@ class TestCRF(unittest.TestCase):
         import torch
         from fastNLP import seq_len_to_mask
 
-        with open('test/data_for_tests/modules/decoder/crf.json', 'r') as f:
+        with open('tests/data_for_tests/modules/decoder/crf.json', 'r') as f:
             data = json.load(f)
 
         bio_logits = torch.FloatTensor(data['bio_logits'])
diff --git a/test/modules/decoder/test_bert.py b/tests/modules/decoder/test_bert.py
similarity index 100%
rename from test/modules/decoder/test_bert.py
rename to tests/modules/decoder/test_bert.py
diff --git a/test/modules/decoder/test_seq2seq_decoder.py b/tests/modules/decoder/test_seq2seq_decoder.py
similarity index 100%
rename from test/modules/decoder/test_seq2seq_decoder.py
rename to tests/modules/decoder/test_seq2seq_decoder.py
diff --git a/test/modules/encoder/__init__.py b/tests/modules/encoder/__init__.py
similarity index 100%
rename from test/modules/encoder/__init__.py
rename to tests/modules/encoder/__init__.py
diff --git a/test/modules/encoder/test_pooling.py b/tests/modules/encoder/test_pooling.py
similarity index 100%
rename from test/modules/encoder/test_pooling.py
rename to tests/modules/encoder/test_pooling.py
diff --git a/test/modules/encoder/test_seq2seq_encoder.py b/tests/modules/encoder/test_seq2seq_encoder.py
similarity index 100%
rename from test/modules/encoder/test_seq2seq_encoder.py
rename to tests/modules/encoder/test_seq2seq_encoder.py
diff --git a/test/modules/generator/__init__.py b/tests/modules/generator/__init__.py
similarity index 100%
rename from test/modules/generator/__init__.py
rename to tests/modules/generator/__init__.py
diff --git a/test/modules/generator/test_seq2seq_generator.py b/tests/modules/generator/test_seq2seq_generator.py
similarity index 100%
rename from test/modules/generator/test_seq2seq_generator.py
rename to tests/modules/generator/test_seq2seq_generator.py
diff --git a/test/modules/test_char_encoder.py b/tests/modules/test_char_encoder.py
similarity index 100%
rename from test/modules/test_char_encoder.py
rename to tests/modules/test_char_encoder.py
diff --git a/test/modules/test_other_modules.py b/tests/modules/test_other_modules.py
similarity index 100%
rename from test/modules/test_other_modules.py
rename to tests/modules/test_other_modules.py
diff --git a/test/modules/test_utils.py b/tests/modules/test_utils.py
similarity index 100%
rename from test/modules/test_utils.py
rename to tests/modules/test_utils.py
diff --git a/test/modules/test_variational_rnn.py b/tests/modules/test_variational_rnn.py
similarity index 100%
rename from test/modules/test_variational_rnn.py
rename to tests/modules/test_variational_rnn.py
diff --git a/test/modules/tokenizer/test_bert_tokenizer.py b/tests/modules/tokenizer/test_bert_tokenizer.py
similarity index 87%
rename from test/modules/tokenizer/test_bert_tokenizer.py
rename to tests/modules/tokenizer/test_bert_tokenizer.py
index 3c395164bc5b985b93e2b8f7a489f4fbd36c1d74..441e76580d397611103af601df7fecfb64a312e9 100644
--- a/test/modules/tokenizer/test_bert_tokenizer.py
+++ b/tests/modules/tokenizer/test_bert_tokenizer.py
@@ -5,7 +5,7 @@ from fastNLP.modules.tokenizer import BertTokenizer
 class TestBertTokenizer(unittest.TestCase):
     def test_run(self):
         # 测试支持的两种encode方式
-        tokenizer = BertTokenizer.from_pretrained('test/data_for_tests/embedding/small_bert')
+        tokenizer = BertTokenizer.from_pretrained('tests/data_for_tests/embedding/small_bert')
 
         tokens1 = tokenizer.encode("This is a demo")
         tokens2 = tokenizer.encode("This is a demo", add_special_tokens=False)
diff --git a/test/test_tutorials.py b/tests/test_tutorials.py
similarity index 98%
rename from test/test_tutorials.py
rename to tests/test_tutorials.py
index aa7c4a607b1fd34db7fc33e8315422c8901c0644..2a224f05d1c0943595b6ad7609d24e6b44e72256 100644
--- a/test/test_tutorials.py
+++ b/tests/test_tutorials.py
@@ -85,7 +85,7 @@ class TestTutorial(unittest.TestCase):
 class TestOldTutorial(unittest.TestCase):
     def test_fastnlp_10min_tutorial(self):
         # 从csv读取数据到DataSet
-        sample_path = "test/data_for_tests/tutorial_sample_dataset.csv"
+        sample_path = "tests/data_for_tests/tutorial_sample_dataset.csv"
         dataset = CSVLoader(headers=['raw_sentence', 'label'], sep='	')._load(sample_path)
         print(len(dataset))
         print(dataset[0])
@@ -183,7 +183,7 @@ class TestOldTutorial(unittest.TestCase):
 
     def test_fastnlp_1min_tutorial(self):
         # tutorials/fastnlp_1min_tutorial.ipynb
-        data_path = "test/data_for_tests/tutorial_sample_dataset.csv"
+        data_path = "tests/data_for_tests/tutorial_sample_dataset.csv"
         ds = CSVLoader(headers=['raw_sentence', 'label'], sep='	')._load(data_path)
         print(ds[1])