diff --git a/tutorials/training/source_zh_cn/advanced_use/images/classical_nlp_loss.png b/tutorials/training/source_zh_cn/advanced_use/images/classical_nlp_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..47745d5c5254253d814ad67d8d8daaaad56ff6b9 Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/classical_nlp_loss.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/nlp_loss.png b/tutorials/training/source_zh_cn/advanced_use/images/nlp_loss.png index 5746e3fe07ba2d50cec099143ebd5fcd27cb235d..920c4d270cef33660ef7d0867d412494f1415c5a 100644 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/nlp_loss.png and b/tutorials/training/source_zh_cn/advanced_use/images/nlp_loss.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/qnn_for_nlp.ipynb b/tutorials/training/source_zh_cn/advanced_use/qnn_for_nlp.ipynb index 8a56a3bd1c4530bd9284435016d21a28577f114f..65fd4cd5575d5a38108b8b867031cd256d5fac62 100644 --- a/tutorials/training/source_zh_cn/advanced_use/qnn_for_nlp.ipynb +++ b/tutorials/training/source_zh_cn/advanced_use/qnn_for_nlp.ipynb @@ -530,7 +530,7 @@ "cell_type": "code", "execution_count": 16, "metadata": { - "scrolled": false, + "scrolled": true, "tags": [] }, "outputs": [ @@ -538,21 +538,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "epoch: 25 step: 20 time: 0.580, loss is 3.154\n", - "epoch: 50 step: 20 time: 0.532, loss is 2.945\n", - "epoch: 75 step: 20 time: 0.643, loss is 0.226\n", - "epoch: 100 step: 20 time: 0.562, loss is 0.016\n", - "epoch: 125 step: 20 time: 0.516, loss is 0.002\n", - "epoch: 150 step: 20 time: 0.658, loss is 0.004\n", - "epoch: 175 step: 20 time: 0.554, loss is 0.000\n", - "epoch: 200 step: 20 time: 0.504, loss is 5.146\n", - "epoch: 225 step: 20 time: 0.493, loss is 0.001\n", - "epoch: 250 step: 20 time: 0.549, loss is 0.091\n", - "epoch: 275 step: 20 time: 0.502, loss is 0.018\n", - "epoch: 300 step: 20 time: 0.616, loss is 0.000\n", - "epoch: 325 step: 20 time: 0.545, loss is 0.004\n", - "epoch: 350 step: 20 time: 0.499, loss is 0.003\n", - "Total time used: 211.59854197502136\n" + "epoch: 25 step: 20 time: 0.592, loss is 3.154\n", + "epoch: 50 step: 20 time: 0.614, loss is 2.944\n", + "epoch: 75 step: 20 time: 0.572, loss is 0.224\n", + "epoch: 100 step: 20 time: 0.562, loss is 0.015\n", + "epoch: 125 step: 20 time: 0.545, loss is 0.009\n", + "epoch: 150 step: 20 time: 0.599, loss is 0.003\n", + "epoch: 175 step: 20 time: 0.586, loss is 0.002\n", + "epoch: 200 step: 20 time: 0.552, loss is 0.045\n", + "epoch: 225 step: 20 time: 0.590, loss is 0.001\n", + "epoch: 250 step: 20 time: 0.643, loss is 0.001\n", + "epoch: 275 step: 20 time: 0.562, loss is 0.001\n", + "epoch: 300 step: 20 time: 0.584, loss is 0.001\n", + "epoch: 325 step: 20 time: 0.566, loss is 0.000\n", + "epoch: 350 step: 20 time: 0.578, loss is 0.000\n", + "Total time used: 206.29734826087952\n" ] } ], @@ -570,7 +570,7 @@ "\n", "ms.set_seed(42)\n", "window_size = 2\n", - "embeding_dim = 10\n", + "embedding_dim = 10\n", "hidden_dim = 128\n", "word_dict, sample = GenerateWordDictAndSample(corpus, window=window_size)\n", "train_x,train_y = GenerateTrainData(sample, word_dict)\n", @@ -579,12 +579,12 @@ " \"around\": train_x,\n", " \"center\": train_y\n", "},shuffle=False).batch(3)\n", - "net = CBOW(len(word_dict), embeding_dim, window_size, 3, 4, hidden_dim)\n", + "net = CBOW(len(word_dict), embedding_dim, window_size, 3, 4, hidden_dim)\n", "net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')\n", "net_opt = nn.Momentum(net.trainable_params(), 0.01, 0.9)\n", "loss_monitor = LossMonitorWithCollection(500)\n", "model = Model(net, net_loss, net_opt)\n", - "model.train(350, train_loader, callbacks=[loss_monitor])" + "model.train(350, train_loader, callbacks=[loss_monitor], dataset_sink_mode=False)" ] }, { @@ -631,24 +631,24 @@ { "data": { "text/plain": [ - "array([ 2.2985651e+00, -5.9529102e-01, -1.4730859e+00, -8.7775314e-01,\n", - " 1.5892730e+00, 1.2431808e+00, 4.6158323e-01, 1.4650096e-02,\n", - " -1.5554352e+00, 1.7425102e+00, 8.0435050e-01, 2.9033291e-01,\n", - " 4.5533547e-01, -3.3123956e+00, -5.0230217e-03, 1.4551424e+00,\n", - " -2.2800403e+00, 2.8021520e-01, -6.8450904e-01, 3.2497895e+00,\n", - " -6.8959081e-01, -1.9466689e+00, -9.2352051e-01, 1.0398046e+00,\n", - " -8.0794984e-01, 9.3039703e-01, 1.2520922e+00, 7.7605700e-01,\n", - " -2.2833204e-01, -6.5232110e-01, -8.0807763e-01, 3.1698236e+00,\n", - " -3.1225615e+00, 1.6280267e+00, -2.9284138e-02, -6.4399004e-01,\n", - " -8.4129974e-02, 1.6495787e-02, 6.7466281e-02, 4.9414963e-01,\n", - " -9.5305812e-01, 9.5651263e-01, -2.4311234e-01, -2.3619778e-01,\n", - " 1.6919808e+00, 1.3713958e+00, -1.8677666e+00, 2.2548558e-01,\n", - " -2.4450117e-01, 1.6238995e-01, -2.4916198e-02, -4.4683918e-01,\n", - " 1.2642978e+00, 2.4357845e-01, -7.0971340e-02, 1.9800837e-01,\n", - " -2.3628614e+00, 1.9120242e-02, 9.1549194e-01, 2.4992308e-01,\n", - " 4.3245099e-02, 6.1443973e-02, -2.5041349e+00, -2.1672893e-01,\n", - " 2.4449271e-01, 2.0587114e-03, 3.3346687e-02, 2.3458574e-02,\n", - " 1.0729489e-01, 6.5198225e-01, 4.1606693e-04, -9.2881303e-03],\n", + "array([ 1.52044818e-01, 1.71521559e-01, 2.35021308e-01, -3.95286232e-01,\n", + " -3.71680595e-03, 7.96886325e-01, -4.04954888e-02, 1.55393332e-01,\n", + " 4.11805660e-02, 7.79824018e-01, 2.96543002e-01, -2.21819162e-01,\n", + " -4.67430688e-02, 4.66759771e-01, 2.75283188e-01, 1.35858059e-01,\n", + " -3.23841363e-01, -2.31937021e-01, -4.68942285e-01, -1.96520030e-01,\n", + " 2.16065589e-02, 1.23866223e-01, -9.68078300e-02, 1.69127151e-01,\n", + " -8.90062153e-01, 2.56734312e-01, 8.37369189e-02, -1.15734830e-01,\n", + " -1.34410933e-01, -3.12207133e-01, -8.90189946e-01, 1.97006428e+00,\n", + " -2.49193460e-02, 2.25960299e-01, -3.90179232e-02, -3.03875893e-01,\n", + " 2.02030335e-02, -7.07065910e-02, -4.81521547e-01, 5.04257262e-01,\n", + " -1.32081115e+00, 2.83502758e-01, 2.80248702e-01, 1.63375765e-01,\n", + " -6.91465080e-01, 6.82975233e-01, -2.67829001e-01, 2.29658693e-01,\n", + " 2.78859794e-01, -1.04206935e-01, -5.57148576e-01, 4.41706657e-01,\n", + " -6.76973104e-01, 2.47751385e-01, -2.96468334e-03, -1.66827604e-01,\n", + " -3.47717047e-01, -9.04396921e-03, -7.69433856e-01, 4.33617719e-02,\n", + " -2.09145937e-02, -1.55236557e-01, -2.16777384e-01, -2.26556376e-01,\n", + " -6.16374731e-01, 2.05871137e-03, -3.08128931e-02, -1.63372140e-02,\n", + " 1.46710426e-01, 2.31793106e-01, 4.16066934e-04, -9.28813033e-03],\n", " dtype=float32)" ] }, @@ -661,6 +661,157 @@ "net.embedding.weight.asnumpy()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 经典版词向量嵌入层\n", + "\n", + "这里我们利用经典的词向量嵌入层来搭建一个经典的CBOW神经网络,并与量子版本进行对比。\n", + "\n", + "首先,搭建经典的CBOW神经网络,其中的参数跟量子版本的类似。" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "class CBOWClassical(nn.Cell):\n", + " def __init__(self, num_embedding, embedding_dim, window, hidden_dim):\n", + " super(CBOWClassical, self).__init__()\n", + " self.dim = 2 * window * embedding_dim\n", + " self.embedding = nn.Embedding(num_embedding, embedding_dim, True)\n", + " self.dense1 = nn.Dense(self.dim, hidden_dim)\n", + " self.dense2 = nn.Dense(hidden_dim, num_embedding)\n", + " self.relu = ops.ReLU()\n", + " self.reshape = ops.Reshape()\n", + "\n", + " def construct(self, x):\n", + " embed = self.embedding(x)\n", + " embed = self.reshape(embed, (-1, self.dim))\n", + " out = self.dense1(embed)\n", + " out = self.relu(out)\n", + " out = self.dense2(out)\n", + " return out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "生成适用于经典CBOW神经网络的数据集。" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "train_x shape: (58, 4)\n", + "train_y shape: (58,)\n" + ] + } + ], + "source": [ + "train_x = []\n", + "train_y = []\n", + "for i in sample:\n", + " around, center = i\n", + " train_y.append(word_dict[center])\n", + " train_x.append([])\n", + " for j in around:\n", + " train_x[-1].append(word_dict[j])\n", + "train_x = np.array(train_x).astype(np.int32)\n", + "train_y = np.array(train_y).astype(np.int32)\n", + "print(\"train_x shape: \", train_x.shape)\n", + "print(\"train_y shape: \", train_y.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "我们对经典CBOW网络进行训练。" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch: 25 step: 20 time: 0.008, loss is 3.155\n", + "epoch: 50 step: 20 time: 0.026, loss is 3.027\n", + "epoch: 75 step: 20 time: 0.010, loss is 3.010\n", + "epoch: 100 step: 20 time: 0.009, loss is 2.955\n", + "epoch: 125 step: 20 time: 0.008, loss is 0.630\n", + "epoch: 150 step: 20 time: 0.008, loss is 0.059\n", + "epoch: 175 step: 20 time: 0.009, loss is 0.008\n", + "epoch: 200 step: 20 time: 0.008, loss is 0.003\n", + "epoch: 225 step: 20 time: 0.017, loss is 0.001\n", + "epoch: 250 step: 20 time: 0.008, loss is 0.001\n", + "epoch: 275 step: 20 time: 0.016, loss is 0.000\n", + "epoch: 300 step: 20 time: 0.008, loss is 0.000\n", + "epoch: 325 step: 20 time: 0.016, loss is 0.000\n", + "epoch: 350 step: 20 time: 0.008, loss is 0.000\n", + "Total time used: 5.06074857711792\n" + ] + } + ], + "source": [ + "train_loader = ds.NumpySlicesDataset({\n", + " \"around\": train_x,\n", + " \"center\": train_y\n", + "},shuffle=False).batch(3)\n", + "net = CBOWClassical(len(word_dict), embedding_dim, window_size, hidden_dim)\n", + "net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')\n", + "net_opt = nn.Momentum(net.trainable_params(), 0.01, 0.9)\n", + "loss_monitor = LossMonitorWithCollection(500)\n", + "model = Model(net, net_loss, net_opt)\n", + "model.train(350, train_loader, callbacks=[loss_monitor], dataset_sink_mode=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "打印收敛过程中的损失函数值:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.plot(loss_monitor.loss,'.')\n", + "plt.xlabel('Steps')\n", + "plt.ylabel('Loss')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "得到收敛图为\n", + "\n", + "![classical nlp loss](https://gitee.com/mindspore/docs/raw/master/tutorials/training/source_zh_cn/advanced_use/images/classical_nlp_loss.png)\n", + "\n", + "由上可知,通过量子模拟得到的量子版词嵌入模型也能很好的完成嵌入任务。当数据集大到经典计算机算力难以承受时,量子计算机将能够轻松处理这类问题。" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -693,4 +844,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file