diff --git a/assignment-2/handout-1/numpy_fnn.py b/assignment-2/handout-1/numpy_fnn.py
index c2f5bf9d301a40d3390cd35eef2a9b1d0c2c4249..b04cde615c3da3644a52b0ba8ae29dd342eb7f24 100644
--- a/assignment-2/handout-1/numpy_fnn.py
+++ b/assignment-2/handout-1/numpy_fnn.py
@@ -160,3 +160,5 @@ class NumpyModel:
self.W1 -= learning_rate * self.W1_grad
self.W2 -= learning_rate * self.W2_grad
self.W3 -= learning_rate * self.W3_grad
+
+
\ No newline at end of file
diff --git a/assignment-2/submission/19307130211/README.md b/assignment-2/submission/19307130211/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bcbe1fc6a17e98a4a5fd9ec9df13c372ac4455da
--- /dev/null
+++ b/assignment-2/submission/19307130211/README.md
@@ -0,0 +1,255 @@
+# Assignment2
+
+姓名:陈洋
+
+学号:19307130211
+
+
+
+### 1.算子推算:
+
+首先先推算得到FNN网络需要的算子,并编写相应代码。
+
+##### Matmul:
+
+对于Matmul的计算公式为:
+$$
+Y_{ij}=\sum_{1\leq k\leq d} W_{ik}\times X{kj}
+$$
+又因为[神经网络与深度学习-邱锡鹏](https://nndl.github.io/nndl-book.pdf)书中公式B.21
+
+得到
+$$
+\frac{\partial Y}{\partial X}=\frac{\partial(W\times X)}{\partial X}=W^T\\\\
+\frac{\partial Y}{\partial X}=\frac{\partial(W\times X)}{\partial W}=X^T
+$$
+又因为链式法则,grad_y已知,所以:
+
+$grad\_x=grad\_y\times W^T $ 即`grad_x=np.matmul(grad_y,W.T) `
+
+$grad\_w=X^T\times grad\_y$ 即`grad_W=np.matmul(x.T,grad_y)`
+
+##### ReLU:
+
+对于ReLU的计算公式:
+$$
+Y_{ij}= \begin{cases} X{ij}, X{ij}\geq 0 \\\\ 0, X{ij}<0 \end{cases}
+$$
+所以
+$$
+\frac{\partial Y_{ij}}{\partial X{ij}}= \begin{cases} 1, X{ij}\geq 0 \\\\ 0, X{ij}<0 \end{cases}
+$$
+又因为grad_y已知,所以得到:
+
+
+
+##### Log:
+
+Log 的计算公式为
+$$
+Y_{ij}=\ln(X_{ij}+\epsilon)
+$$
+所以
+$$
+\frac {\partial Y_{ij}}{\partial X_{ij}}= \frac {1}{(X_{ij}+\epsilon)}
+$$
+已知grad_y,所以:
+
+
+
+即对应于代码:
+
+~~~python
+mask=1/(x+self.epsilon) #求1/(X+e)
+grad_x=mask*grad_y
+~~~
+
+##### Softmax:
+
+softmax的计算公式为
+$$
+Y_{ij}=\frac{\exp\{X_{ij}\}}{\sum_{k=1}^d\exp\{X_{ik} \}}
+$$
+所以对应的代码为
+
+~~~python
+x_exp=np.exp(x) #先对X中每个元素求e^Xij,
+out=x_exp / np.sum(x_exp,axis=1,keepdims=True)#axis=1,压缩列,将矩阵的每一行相加,并保持x的维度
+~~~
+
+其导数的推导:
+
+对于每一行的softmax是单独的,所以这里一行为例,证明过程参考了[神经网络与深度学习-邱锡鹏](https://nndl.github.io/nndl-book.pdf)书中推导:
+
+
+
+所以对于每一行的softmax的Jacobs矩阵为:
+$$
+J=diag(Y_i)-Y_i\times Y_i^T
+$$
+所以对应的代码为
+
+~~~Python
+J=np.diag(temp)-np.outer(temp,temp)
+#每一行的导数为grad_Yi*J,所以grad_Xi计算代码如下:
+t=np.dot(grad_y[i],Jacobs[i])
+~~~
+
+### 2.网络的完善与实现
+
+通过对torch_mnist.py的阅读得到网络的结构:
+
+`input->全连接层->激活函数ReLU->全连接层->激活函数ReLU->全连接层->softmax->Log`
+
+所以其实只要按照同样的顺序调用之前实现好的算子,就可以完成实验的要求。
+
+##### 1.实验
+
+填写完代码后,初次运行numpy_mnist.py,结果如下:
+
+其中 epoch=3,learning_rate=0.1。
+
+~~~shell
+[0] Accuracy: 0.9487
+[1] Accuracy: 0.9647
+[2] Accuracy: 0.9690
+~~~
+
+
+
+##### 2.mini_batch 的实现
+
+mini_batch()函数源代码就是使用pytorch包里面的dataloader实现了对数据分组和打乱,所以我们实现的mini_batch()需要实现这两个功能。
+
+首先将data和label分别取出放在list中,通过numpy.random.choice()函数得到一个用来随机打乱的index。
+
+~~~python
+idx=np.random.choice(Num,Num,replace=False)
+~~~
+
+因为需要使用index作为索引,所以原先为list的数据需要使用np.array()进行转换,所以在训练过程中需要修改代码,如下。
+
+~~~python
+#y_pred = model.forward(x.numpy())
+y_pred = model.forward(x)
+~~~
+
+##### 3.对模型的进一步讨论
+
+增大epoch,使epoch=10,15,20:
+
+
+
+
+
+ epoch依次为10,15,20
+
+
+
+可以看到epoch越大,最后曲线越平滑。在图像前面部分一直有较大的抖动,猜测为learning_rate过大导致,于是下面对learning_rate讨论。
+
+使用不同的学习率,使learning_rate=0.05,0.01(epoch=30):
+
+| epoch | learning_rate=0.1 | learning_rate=0.05 | learning_rate=0.01 |
+| ----- | ----------------- | ------------------ | ------------------ |
+| 0 | 0.9526 | 0.9283 | 0.8705 |
+| 5 | 0.9784 | 0.9725 | 0.9377 |
+| 10 | 0.9786 | 0.9783 | 0.9530 |
+| 15 | 0.9807 | 0.9799 | 0.9623 |
+| 20 | 0.9819 | 0.9810 | 0.9688 |
+| 25 | 0.9816 | 0.9803 | 0.9714 |
+| 30 | 0.9819 | 0.9802 | 0.9728 |
+
+在测试了epoch=20情况下,未得到明显收敛的图像,于是将epoch增大为30。
+
+
+
+
+
+ learning_rate依次为0.1,0.05,0.01
+
+
+
+在改变学习率后,收敛的速度显著变慢,在学习率为0.1时反而因为快速到达收敛,其曲线更加平滑,与猜测相反。
+
+### 3.扩展探究
+
+##### 1.momentum
+
+Momentum在梯度下降中加入了惯性这一概念,使得梯度在方向不变的维度上收敛速度加快,梯度方向有所改变的维度上更新速度变慢,这样就可以加快收敛并减小震荡:
+$$
+m_t=\beta_1\cdot m_{t-1}+(1-\beta_2)\cdot g_t\\\\
+w_{t+1}=w_t-\eta\cdot m_t
+$$
+其中mt是重新计算得到的下降梯度。
+
+##### 2.Adam
+
+Adam = Adaptive + Momentum,Adam相比起momentum,多了一个Adaptive部分:我们希望能够根据参数的重要性而对不同参数进行不同程度的更新:即对于经常更新的参数,我们积累了大量关于它的知识,不希望被单个个体影响,希望学习速度慢一点;而对于偶尔更新的参数则相反,希望学习速率更大一些。
+
+而度量历史更新频率的方法为,使用一个二阶动量——该维度上迄今为止的所有梯度值的平方和:
+$$
+V_t=\sum_{\tau=1}^t(g_\tau^2)
+$$
+但是这一方法也存在一些问题,因为下降梯度为
+$$
+\eta_t=\alpha\cdot m_t/\sqrt{V_t}
+$$
+而Vt单点递增就会造成学习率单调递减到0,使得训练过程提前结束,所以修改为:
+$$
+V_t=\beta_2\cdot V_{t-1}+(1-\beta_2)\cdot g_t^2
+$$
+加上Momentum中的公式算得mt,然后有上面的公式一起算得:
+$$
+w_{t+1}=w_t-\eta_t
+$$
+(以上内容参考网站:https://zhuanlan.zhihu.com/p/32230623)
+
+##### 3.代码实现:
+
+具体参见numpy_fnn.py中新定义的两个函数optimizeM和optimizeA分别代表Momentum和Adam两种优化算法的代码。
+
+同时为了方便函数的实现,在模型初始化中新添加了以下代码:
+
+~~~python
+ self.W1_mt=0
+ self.W2_mt=0
+ self.W3_mt=0
+
+ self.W1_vt=0
+ self.W2_vt=0
+ self.W3_vt=0
+
+ self.epsilon = 1e-8
+~~~
+
+##### 4.实验
+
+实验结果:其中epoch为20,beta_1=0.9,beta_2=0.999,Adam的learning_rate=0.001。
+
+| epoch | Accuracy(normal) | Accuracy(M) | Accuracy(A) |
+| ----- | ---------------- | ----------- | ----------- |
+| 0 | 0.9470 | 0.9420 | 0.9678 |
+| 5 | 0.9780 | 0.9770 | 0.9791 |
+| 10 | 0.9819 | 0.9795 | 0.9795 |
+| 15 | 0.9816 | 0.9822 | 0.9804 |
+| 20 | 0.9827 | 0.9828 | 0.9795 |
+
+
+
+
+
+ 依次为原始方法,Momentum,Adam
+
+
+
+实验结果显示,Momentum方法和原有的方法相比,无明显优势,但是Adam方法可以看到,在训练阶段初,其抖动较小,且收敛的速度较快,在完整的数据中很快就达到了0.9827这样几乎顶点的水平,但是在后续中,抖动比起前两种方法都要剧烈,且最后结果也更差。
+
+论文([https://arxiV.org/pdf/1711.05101.pdf](https://arxiv.org/pdf/1711.05101.pdf))提到一个重要原因是因为Adam中L2正则化项并不像在SGD中那么有效,有两个原因:
+
+* L2正则和Weight Decay在Adam这种自适应学习率算法中并不等价
+
+* 使用Adam优化带L2正则的损失并不有效
+
+具体内容参见论文。
+
diff --git a/assignment-2/submission/19307130211/img/Figure_2.png b/assignment-2/submission/19307130211/img/Figure_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..76883341d0701a95d0f04e1d8af039a4e53c659a
Binary files /dev/null and b/assignment-2/submission/19307130211/img/Figure_2.png differ
diff --git a/assignment-2/submission/19307130211/img/Figure_3.png b/assignment-2/submission/19307130211/img/Figure_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..1252c8914e49c6dacad6bc5974addb86d2401ff1
Binary files /dev/null and b/assignment-2/submission/19307130211/img/Figure_3.png differ
diff --git a/assignment-2/submission/19307130211/img/Figure_4.png b/assignment-2/submission/19307130211/img/Figure_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..80dadd7dfb75da22953522acd50ae0713a02a8ec
Binary files /dev/null and b/assignment-2/submission/19307130211/img/Figure_4.png differ
diff --git a/assignment-2/submission/19307130211/img/Figure_5.png b/assignment-2/submission/19307130211/img/Figure_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..b1c7f8a838462db83e1b6475aa393605ad8c3c86
Binary files /dev/null and b/assignment-2/submission/19307130211/img/Figure_5.png differ
diff --git a/assignment-2/submission/19307130211/img/Figure_6.png b/assignment-2/submission/19307130211/img/Figure_6.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ff98409b32c15d6e7b7cf76466f60c956138546
Binary files /dev/null and b/assignment-2/submission/19307130211/img/Figure_6.png differ
diff --git a/assignment-2/submission/19307130211/img/Figure_7.png b/assignment-2/submission/19307130211/img/Figure_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..b8c1ed7a636d52445ce20afbdd7be22affb50b7b
Binary files /dev/null and b/assignment-2/submission/19307130211/img/Figure_7.png differ
diff --git a/assignment-2/submission/19307130211/img/Figure_8.png b/assignment-2/submission/19307130211/img/Figure_8.png
new file mode 100644
index 0000000000000000000000000000000000000000..8d4b2c7762d9f0f1a43dda63519e5a183dd2e16c
Binary files /dev/null and b/assignment-2/submission/19307130211/img/Figure_8.png differ
diff --git a/assignment-2/submission/19307130211/img/Figure_9.png b/assignment-2/submission/19307130211/img/Figure_9.png
new file mode 100644
index 0000000000000000000000000000000000000000..03937c6469bae49aa6a455761b21987f375ec527
Binary files /dev/null and b/assignment-2/submission/19307130211/img/Figure_9.png differ
diff --git a/assignment-2/submission/19307130211/img/figure_1.png b/assignment-2/submission/19307130211/img/figure_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..fb0f751a7d141129b7a60f8c3f9e7cfa4da56464
Binary files /dev/null and b/assignment-2/submission/19307130211/img/figure_1.png differ
diff --git a/assignment-2/submission/19307130211/img/image-20210429001857144.png b/assignment-2/submission/19307130211/img/image-20210429001857144.png
new file mode 100644
index 0000000000000000000000000000000000000000..66e9de7caab5e6827c4370ef9d642b3048d98966
Binary files /dev/null and b/assignment-2/submission/19307130211/img/image-20210429001857144.png differ
diff --git a/assignment-2/submission/19307130211/img/image-20210429002059041.png b/assignment-2/submission/19307130211/img/image-20210429002059041.png
new file mode 100644
index 0000000000000000000000000000000000000000..c75d5682f1da59bea01919241bac6bd6e5cb80fc
Binary files /dev/null and b/assignment-2/submission/19307130211/img/image-20210429002059041.png differ
diff --git a/assignment-2/submission/19307130211/img/image-20210429010513776.png b/assignment-2/submission/19307130211/img/image-20210429010513776.png
new file mode 100644
index 0000000000000000000000000000000000000000..9441ed68d0b2e868f4329c4e5b4d7221e261eda4
Binary files /dev/null and b/assignment-2/submission/19307130211/img/image-20210429010513776.png differ
diff --git a/assignment-2/submission/19307130211/img/image-20210429183140858.png b/assignment-2/submission/19307130211/img/image-20210429183140858.png
new file mode 100644
index 0000000000000000000000000000000000000000..b3cecd075a9061b0c833ab21906d416ebb73eef3
Binary files /dev/null and b/assignment-2/submission/19307130211/img/image-20210429183140858.png differ
diff --git a/assignment-2/submission/19307130211/img/image-20210430110239276.png b/assignment-2/submission/19307130211/img/image-20210430110239276.png
new file mode 100644
index 0000000000000000000000000000000000000000..e0cdc6b860e4faa860e6f7b986269c2cfcc6b400
Binary files /dev/null and b/assignment-2/submission/19307130211/img/image-20210430110239276.png differ
diff --git a/assignment-2/submission/19307130211/img/image-20210430110418797.png b/assignment-2/submission/19307130211/img/image-20210430110418797.png
new file mode 100644
index 0000000000000000000000000000000000000000..b49ea3c0b2c1869c87ec5594208f0c61c9ff1cb3
Binary files /dev/null and b/assignment-2/submission/19307130211/img/image-20210430110418797.png differ
diff --git a/assignment-2/submission/19307130211/numpy_fnn.py b/assignment-2/submission/19307130211/numpy_fnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..3761d8341357b106d67b8e109cc077bed07c88a2
--- /dev/null
+++ b/assignment-2/submission/19307130211/numpy_fnn.py
@@ -0,0 +1,237 @@
+import numpy as np
+
+
+class NumpyOp:
+
+ def __init__(self):
+ self.memory = {}
+ self.epsilon = 1e-12
+
+
+
+class Matmul(NumpyOp):
+
+ def forward(self, x, W):
+ """
+ x: shape(N, d)
+ w: shape(d, d')
+ """
+ self.memory['x'] = x
+ self.memory['W'] = W
+ h = np.matmul(x, W)
+ return h
+
+ def backward(self, grad_y):
+ """
+ grad_y: shape(N, d')
+ """
+
+ ####################
+ # code 1 #
+ ####################
+ x=self.memory['x']
+ W=self.memory['W']
+ grad_x=np.matmul(grad_y,W.T)
+ grad_W=np.matmul(x.T,grad_y)
+ return grad_x, grad_W
+
+
+class Relu(NumpyOp):
+
+ def forward(self, x):
+ self.memory['x'] = x
+ return np.where(x > 0, x, np.zeros_like(x))
+
+ def backward(self, grad_y):
+ """
+ grad_y: same shape as x
+ """
+
+ ####################
+ # code 2 #
+ ####################
+ x=self.memory['x']
+ #通过判断是否大于1,得到不同的导数
+
+ mask=np.where(x>0,np.ones_like(x),np.zeros_like(x))
+ grad_x=mask*grad_y
+ return grad_x
+
+
+class Log(NumpyOp):
+
+ def forward(self, x):
+ """
+ x: shape(N, c)
+ """
+
+ out = np.log(x + self.epsilon)
+ self.memory['x'] = x
+
+ return out
+
+ def backward(self, grad_y):
+ """
+ grad_y: same shape as x
+ """
+
+ ####################
+ # code 3 #
+ ####################
+ # grad_x = grad_y * np.reciprocal(self.memory['x'] + self.epsilon)
+ x=self.memory['x']
+ mask=1/(x+self.epsilon)
+ grad_x=mask*grad_y
+ return grad_x
+
+
+class Softmax(NumpyOp):
+ """
+ softmax over last dimension
+ """
+
+ def forward(self, x):
+ """
+ x: shape(N, c)
+ """
+ ####################
+ # code 4 #
+ ####################
+ self.memory['x']=x
+ x_exp=np.exp(x)
+ out=x_exp / np.sum(x_exp,axis=1,keepdims=True)
+ return out
+
+ def backward(self, grad_y):
+ """
+ grad_y: same shape as x
+ """
+ ####################
+ # code 5 #
+ ####################
+ x=self.memory['x']
+ softmax=self.forward(x)
+ N=x.shape[0]
+ Jacobs=[]
+
+ for i in range(N):
+ temp=softmax[i]
+ J=np.diag(temp)-np.outer(temp,temp)#计算每一层的Jacobs矩阵
+ Jacobs.append(J)
+ Jacobs=np.array(Jacobs)
+
+ grad_x=[]
+ for i in range(N):
+ t=np.dot(grad_y[i],Jacobs[i])#通过每一层的矩阵
+ grad_x.append(t)
+ grad_x=np.array(grad_x)
+ return grad_x
+
+
+class NumpyLoss:
+
+ def __init__(self):
+ self.target = None
+
+ def get_loss(self, pred, target):
+ self.target = target
+ return (-pred * target).sum(axis=1).mean()
+
+ def backward(self):
+ return -self.target / self.target.shape[0]
+
+
+class NumpyModel:
+ def __init__(self):
+ self.W1 = np.random.normal(size=(28 * 28, 256))
+ self.W2 = np.random.normal(size=(256, 64))
+ self.W3 = np.random.normal(size=(64, 10))
+
+ #为了拓展optimize设置的初始化
+ self.W1_mt=0
+ self.W2_mt=0
+ self.W3_mt=0
+
+ self.W1_vt=0
+ self.W2_vt=0
+ self.W3_vt=0
+
+ self.epsilon = 1e-8
+
+ # 以下算子会在 forward 和 backward 中使用
+ self.matmul_1 = Matmul()
+ self.relu_1 = Relu()
+ self.matmul_2 = Matmul()
+ self.relu_2 = Relu()
+ self.matmul_3 = Matmul()
+ self.softmax = Softmax()
+ self.log = Log()
+
+ # 以下变量需要在 backward 中更新。 softmax_grad, log_grad 等为算子反向传播的梯度( loss 关于算子输入的偏导)
+ self.x1_grad, self.W1_grad = None, None
+ self.relu_1_grad = None
+ self.x2_grad, self.W2_grad = None, None
+ self.relu_2_grad = None
+ self.x3_grad, self.W3_grad = None, None
+ self.softmax_grad = None
+ self.log_grad = None
+
+ def forward(self, x):
+ x = x.reshape(-1, 28 * 28)
+
+ ####################
+ # code 6 #
+ ####################
+ #模型架构:输入X->全连接层->激活函数ReLU->全连接层->激活函数ReLu->全连接层->softmax层->log->loss
+ x=self.matmul_1.forward(x, self.W1)
+ x=self.relu_1.forward(x)
+ x=self.matmul_2.forward(x, self.W2)
+ x=self.relu_2.forward(x)
+ x=self.matmul_3.forward(x, self.W3)
+ x=self.softmax.forward(x)
+ x=self.log.forward(x)
+ return x
+
+ def backward(self, y):
+
+ ####################
+ # code 7 #
+ ####################
+ self.log_grad=self.log.backward(y)
+ self.softmax_grad=self.softmax.backward(self.log_grad)
+ self.x3_grad,self.W3_grad=self.matmul_3.backward(self.softmax_grad)
+ self.relu_2_grad=self.relu_2.backward(self.x3_grad)
+ self.x2_grad,self.W2_grad=self.matmul_2.backward(self.relu_2_grad)
+ self.relu_1_grad=self.relu_1.backward(self.x2_grad)
+ self.x1_grad,self.W1_grad=self.matmul_1.backward(self.relu_1_grad)
+ pass
+
+ def optimize(self, learning_rate):
+ self.W1 -= learning_rate * self.W1_grad
+ self.W2 -= learning_rate * self.W2_grad
+ self.W3 -= learning_rate * self.W3_grad
+
+ def optimizeM(self,learning_rate,beta_1=0.9):
+ self.W1_mt=beta_1*self.W1_mt+(1-beta_1)*self.W1_grad
+ self.W2_mt=beta_1*self.W2_mt+(1-beta_1)*self.W2_grad
+ self.W3_mt=beta_1*self.W3_mt+(1-beta_1)*self.W3_grad
+
+ self.W1 -= learning_rate * self.W1_mt
+ self.W2 -= learning_rate * self.W2_mt
+ self.W3 -= learning_rate * self.W3_mt
+
+
+ def optimizeA(self,learning_rate=0.001,beta_1=0.9,beta_2=0.999):
+ self.W1_mt=beta_1*self.W1_mt+(1-beta_1)*self.W1_grad
+ self.W2_mt=beta_1*self.W2_mt+(1-beta_1)*self.W2_grad
+ self.W3_mt=beta_1*self.W3_mt+(1-beta_1)*self.W3_grad
+
+ self.W1_vt=beta_2*self.W1_vt+(1-beta_2)*self.W1_grad*self.W1_grad
+ self.W2_vt=beta_2*self.W2_vt+(1-beta_2)*self.W2_grad*self.W2_grad
+ self.W3_vt=beta_2*self.W3_vt+(1-beta_2)*self.W3_grad*self.W3_grad
+
+ self.W1-=learning_rate*self.W1_mt/(self.W1_vt**0.5+self.epsilon)
+ self.W2-=learning_rate*self.W2_mt/(self.W2_vt**0.5+self.epsilon)
+ self.W3-=learning_rate*self.W3_mt/(self.W3_vt**0.5+self.epsilon)
+
+
\ No newline at end of file
diff --git a/assignment-2/submission/19307130211/numpy_mnist.py b/assignment-2/submission/19307130211/numpy_mnist.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0cfc53b8cd7904e6ca1031461b3f8563a1a12f7
--- /dev/null
+++ b/assignment-2/submission/19307130211/numpy_mnist.py
@@ -0,0 +1,76 @@
+import numpy as np
+from numpy_fnn import NumpyModel, NumpyLoss
+from utils import download_mnist, batch, get_torch_initialization, plot_curve, one_hot
+
+
+def mini_batch(dataset,batch_size=128,numpy=False):
+
+
+ #得到data和label
+ data=[]
+ label=[]
+ for each in dataset:
+ data.append(np.array(each[0]))
+ label.append(np.array(each[1]))
+
+ #得到打乱的索引
+ Num=dataset.__len__()
+ idx=np.random.choice(Num,Num,replace=False)
+ #打乱数据
+ data=np.array(data)[idx,]
+ label=np.array(label)[idx,]
+
+ #对数据进行分割
+ result=[]
+ i=0
+ while i*batch_size <= Num:
+ start=batch_size*i
+ if (i+1)*batch_size<=Num:
+ end=(i+1)*batch_size
+ else:
+ end=Num
+ result.append((data[start:end],label[start:end]))
+ i=i+1
+ return result
+
+def numpy_run():
+ train_dataset, test_dataset = download_mnist()
+
+ model = NumpyModel()
+ numpy_loss = NumpyLoss()
+ model.W1, model.W2, model.W3 = get_torch_initialization()
+
+ train_loss = []
+
+ epoch_number = 30
+ learning_rate = 0.1
+
+ opt_m=False
+ opt_v=False
+
+ for epoch in range(epoch_number):
+ for x, y in mini_batch(train_dataset):
+ y = one_hot(y)
+
+ y_pred = model.forward(x)
+ loss = numpy_loss.get_loss(y_pred, y)
+
+ model.backward(numpy_loss.backward())
+ if opt_m:
+ model.optimizeM(learning_rate)
+ elif opt_v:
+ model.optimizeA()
+ else:
+ model.optimize(learning_rate)
+
+ train_loss.append(loss.item())
+
+ x, y = batch(test_dataset)[0]
+ accuracy = np.mean((model.forward(x).argmax(axis=1) == y))
+ print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy))
+
+ plot_curve(train_loss)
+
+
+if __name__ == "__main__":
+ numpy_run()