From a1ba7e578ce4b34e80fadfc480e87e92b3ebf93b Mon Sep 17 00:00:00 2001
From: Chijunlong <2869897445@qq.com>
Date: Tue, 28 Dec 2021 11:01:04 +0800
Subject: [PATCH] update mindtext.embeddings/index.rst and installation.rst
add mindtext.dataset.test1.md
---
.../mindtext.dataset.test1.md | 56 ++++++++++++++++++-
.../source/apis/mindtext.embeddings/index.rst | 4 --
docs/source/user/installation.rst | 9 +--
3 files changed, 60 insertions(+), 9 deletions(-)
diff --git a/docs/source/apis/mindtext.dataset/mindtext.dataset.test1.md b/docs/source/apis/mindtext.dataset/mindtext.dataset.test1.md
index 2cb259c..c886cd1 100644
--- a/docs/source/apis/mindtext.dataset/mindtext.dataset.test1.md
+++ b/docs/source/apis/mindtext.dataset/mindtext.dataset.test1.md
@@ -1 +1,55 @@
-# mindtext.dataset.test1
\ No newline at end of file
+# mindtext.dataset.test1
+
+Dataset的构建
+=
+
+Example SST-2数据集Dataset构建
+
+ from mindtext.dataset.classification import SST2Dataset
+
+ dataset = SST2Dataset(paths='./mindtext/dataset/SST-2',
+ tokenizer="./mindtext/pretrain/roberta-base",
+ max_length=128,
+ truncation_strategy=True,
+ batch_size=32)
+
+ ds = dataset()
+ ds = dataset.from_cache( columns_list=['input_ids', 'attention_mask','label'],
+ test_columns_list=['input_ids', 'attention_mask'],
+ batch_size=32
+ )
+
+ train_dataset = ds['train']
+mindtext.dataset.base_dataset.Dataset
+--
+> class mindtext.dataset.base_dataset.Dataset( vocab (Vocabulary, Optional): Convert tokens to index,default None.
+> name (str, Optional): Dataset name,default None.
+> label_map (Dict[str, int], Optional): Dataset label map,default None.)
+
+通过base_dataset中基类Dataset来构建文本分类、文本匹配和生成任务对应的数据集
+
+>init(self, vocab: Vocabulary = None, name: str = None,
+ label_map: Dict[str, int] = None)
+
+参数
+> + vocab(Vocabulary): 词表,默认为None
+> + name(str): 下游任务数据集Dataset名称,默认为None
+> + label_map(Dict[str, int], Optional):Dataset标签映射
+>
+
+mindtext.dataset.base_dataset.CLSBaseDataset
+--
+
+文本分类Dataset的基类
+
+Example
+>class SST2Dataset(CLSBaseDataset):
+
+mindtext.dataset.base_dataset.PairCLSBaseDataset
+-
+
+文本匹配Dataset基类
+
+Example
+
+>class LCQMCDataset(PairCLSBaseDataset):
\ No newline at end of file
diff --git a/docs/source/apis/mindtext.embeddings/index.rst b/docs/source/apis/mindtext.embeddings/index.rst
index 1af9f7c..87dc07d 100644
--- a/docs/source/apis/mindtext.embeddings/index.rst
+++ b/docs/source/apis/mindtext.embeddings/index.rst
@@ -9,11 +9,7 @@ mindtext.embeddings
.. toctree::
:maxdepth: 1
- mindtext.embeddings.bert_embedding
mindtext.embeddings.char_embedding
mindtext.embeddings.embedding
- mindtext.embeddings.luke_embedding
- mindtext.embeddings.region_embedding
- mindtext.embeddings.roberta_embedding
mindtext.embeddings.static_embedding
diff --git a/docs/source/user/installation.rst b/docs/source/user/installation.rst
index 5a12c6e..663ffd0 100644
--- a/docs/source/user/installation.rst
+++ b/docs/source/user/installation.rst
@@ -19,10 +19,11 @@ mindtext 依赖如下包::
numpy==1.21.2
mindspore_gpu==1.3.0
-其中PyTorch的安装可能与操作系统及 CUDA 的版本相关,请参见 `PyTorch 官网 `_ 。
-在依赖包安装完成的情况,您可以在命令行执行如下指令完成安装
+其中`mindspore`的安装可能与操作系统及 `CUDA` 的版本相关,请参见 `MindSpore 官网 `_ 。
+在依赖包安装完成的情况,您可以在命令行执行如下指令完成mindtext的安装
.. code:: shell
- >>> pip install mindtext
- >>> python -m spacy download en
+ >>> git clone https://gitee.com/mindspore/mindtext.git
+ >>> cd mindtext
+ >>> python setup.py install
--
Gitee