diff --git a/assignment-2/submission/16307130040/README.md b/assignment-2/submission/16307130040/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..74274e457388b56b4227c995ac2586137ddf1d19
--- /dev/null
+++ b/assignment-2/submission/16307130040/README.md
@@ -0,0 +1,148 @@
+# 实验报告2
+
+### 1，实验结果
+
+在完成了对mini_batch函数的替换后，模型得以顺利进行：
+
+![](.\img\Figure_1.png)
+
+```shell
+[0] Accuracy: 0.9453
+[1] Accuracy: 0.9656
+[2] Accuracy: 0.9689
+```
+
+### 2,mini_batch的替换
+
+```python
+def mini_batch(dataset, batch_size=128, numpy=True):
+    data = []
+    label = []
+    for each in dataset:
+        data.append(np.array(each[0]))
+        label.append(each[1])
+    data = np.array(data)
+    label = np.array(label)
+
+    m = data.shape[0]
+    permutation = list(np.random.permutation(m))
+    data =data[permutation]
+    label=label[permutation]
+
+    n=m//batch_size
+    mini_batches=[]
+    for i in range(n):
+        mini_batches.append([data[i*batch_size:(i+1)*batch_size],label[i*batch_size:(i+1)*batch_size]])
+
+    return mini_batches
+```
+
+整体上参考了utils.py的batch函数。前半段和batch一样，将dataset中的数据分别放到data和label之中。之后，让data和label的元素顺序随机变化，再将每一个小batch的数据和标记放入对应的列表中，依次放入一个大的列表，并最终输出。
+
+### 3，反向传播公式的推导
+
+matmul：
+
+#### ![](.\img\matmul.jpg)
+
+设输出y为l维的向量。
+
+**dx：**对于每个x中的元素xi，它对y1，y2，y3,.....,yl的偏导为wi1,wi2......,wil.
+
+dL/dxi=（dL/dy1）·wi1+（dL/dy2）·wi2+……+（dL/dyl）·wil
+
+所以，dx=dy·WT.
+
+**dW:** 对于W的元素wij,它对yj的偏导为xi。
+
+dL/dwij=(dL/dyj)·xi
+
+所以，dW=xT*dy。对于多个样本，需要求平均值。
+
+```python
+def backward(self, grad_y):
+    """
+    grad_y: shape(N, d')
+    """
+    N=grad_y.shape[0]
+
+    grad_x=np.matmul(grad_y,self.memory['W'].T)
+    grad_W=np.matmul(self.memory['x'].T,grad_y)
+
+    
+    return grad_x, grad_W
+```
+
+
+
+2，Relu函数
+
+```python
+def backward(self, grad_y):
+    """
+    grad_y: same shape as x
+    """
+    x=self.memory['x']
+    grad_x = grad_y.copy()
+    grad_x[x<=0]=0
+    return grad_x
+    
+```
+
+在xi<=0时，dyi/dxi=0，dL/dxi=0
+
+在xi>0时，dyi/dxi=1,dL/dxi=dL/dyi
+
+所以如上所示，如果xi大于0，则dx相应的位置照搬dy；如果xi小于等于0，则dx响应的位置设置为0.
+
+3，log函数
+
+```python
+def backward(self, grad_y):
+    """
+    grad_y: same shape as x
+    """
+    
+    x=self.memory['x']
+    x[x<=self.epsilon]=self.epsilon
+    grad_x=grad_y*(1/x)
+    
+    return grad_x
+```
+
+dyi/dxi=1/xi,dL/dxi=(dL/dyi)·1/xi
+
+所以，dx=dy·（1/x）.
+
+4，softmax函数
+
+![](.\img\softmax.jpg)
+
+设x和y均为l维的向量。
+
+则对于dyj/dxi,如果i=j，则dyj/dxi=yj-(yj)^2.如果i！=j，则有dyj/dxi=-yi·yj.
+
+设D=diag（y）-yT·y，有dyj/dxi=Dij
+
+这样的话，有dL/dxi=（dL/dy1）·Di1+（dL/dy2）·Di2+……+（dL/dyl）·Dil
+
+所以，dx=dy·D
+
+```python
+def backward(self, grad_y):
+    """
+    grad_y: same shape as x
+    """
+    y=self.memory['y']
+    l=y.shape[0]
+    grad_x=[]
+    for grad_y1,y1 in zip(grad_y,y):
+        D= np.diag(y1) - np.outer(y1,y1)
+        grad_x1=np.dot(grad_y1, D)
+        grad_x.append(grad_x1)
+    grad_x=np.array(grad_x)
+    return grad_x
+```
+
+不过，在实际的实现中，要考虑到一批中不只有一个数据，要一个一个数据地逐个生成dx。
+
diff --git a/assignment-2/submission/16307130040/img/Figure_1.png b/assignment-2/submission/16307130040/img/Figure_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..dab6049f889917dcbf2e93d6203b3a6579908777
Binary files /dev/null and b/assignment-2/submission/16307130040/img/Figure_1.png differ
diff --git a/assignment-2/submission/16307130040/img/matmul.jpg b/assignment-2/submission/16307130040/img/matmul.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dd071796bbe85141e48275be4c38358eefa4112f
Binary files /dev/null and b/assignment-2/submission/16307130040/img/matmul.jpg differ
diff --git a/assignment-2/submission/16307130040/img/softmax.jpg b/assignment-2/submission/16307130040/img/softmax.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2daa913899b5f8401693ffb777ff3e27ed24cf09
Binary files /dev/null and b/assignment-2/submission/16307130040/img/softmax.jpg differ
diff --git a/assignment-2/submission/16307130040/numpy_fnn.py b/assignment-2/submission/16307130040/numpy_fnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..277f81a3f11fb44523777ef4bddcb998454bcdc3
--- /dev/null
+++ b/assignment-2/submission/16307130040/numpy_fnn.py
@@ -0,0 +1,184 @@
+import numpy as np
+
+
+class NumpyOp:
+    
+    def __init__(self):
+        self.memory = {}
+        self.epsilon = 1e-12
+
+
+class Matmul(NumpyOp):
+    
+    def forward(self, x, W):
+        """
+        x: shape(N, d)
+        w: shape(d, d')
+        """
+        self.memory['x'] = x
+        self.memory['W'] = W
+        h = np.matmul(x, W)
+        return h
+    
+    def backward(self, grad_y):
+        """
+        grad_y: shape(N, d')
+        """
+
+        grad_x = np.matmul(grad_y, self.memory['W'].T)
+        grad_W = np.matmul(self.memory['x'].T, grad_y)
+
+
+        
+        return grad_x, grad_W
+
+
+class Relu(NumpyOp):
+    
+    def forward(self, x):
+        self.memory['x'] = x
+        return np.where(x > 0, x, np.zeros_like(x))
+    
+    def backward(self, grad_y):
+        """
+        grad_y: same shape as x
+        """
+        
+        x=self.memory['x']
+        grad_x = grad_y.copy()
+        grad_x[x<=0]=0
+        
+        return grad_x
+
+
+class Log(NumpyOp):
+    
+    def forward(self, x):
+        """
+        x: shape(N, c)
+        """
+        
+        out = np.log(x + self.epsilon)
+        self.memory['x'] = x
+        
+        return out
+    
+    def backward(self, grad_y):
+        """
+        grad_y: same shape as x
+        """
+        
+        x=self.memory['x']
+        x[x<=self.epsilon]=self.epsilon
+        grad_x=grad_y*(1/x)
+        
+        return grad_x
+
+
+class Softmax(NumpyOp):
+    """
+    softmax over last dimension
+    """
+    
+    def forward(self, x):
+        """
+        x: shape(N, c)
+        """
+        
+        shift_x = x - np.max(x, axis=1).reshape(-1, 1)
+        y = np.exp(shift_x) / np.sum(np.exp(shift_x), axis=1).reshape(-1, 1)
+        #y = np.exp(x+1) / np.sum(np.exp(x+1), axis=1).reshape(-1, 1)
+        self.memory['y'] = y
+        
+        return y
+    
+    def backward(self, grad_y):
+        """
+        grad_y: same shape as x
+        """
+        
+        y=self.memory['y']
+        l=y.shape[0]
+        grad_x=[]
+        for grad_y1,y1 in zip(grad_y,y):
+            D= np.diag(y1) - np.outer(y1,y1)
+            grad_x1=np.dot(grad_y1, D)
+            grad_x.append(grad_x1)
+        grad_x=np.array(grad_x)
+        
+        return grad_x
+
+
+class NumpyLoss:
+    
+    def __init__(self):
+        self.target = None
+    
+    def get_loss(self, pred, target):
+        self.target = target
+        return (-pred * target).sum(axis=1).mean()
+    
+    def backward(self):
+        return -self.target / self.target.shape[0]
+
+
+class NumpyModel:
+    def __init__(self):
+        self.W1 = np.random.normal(size=(28 * 28, 256))
+        self.W2 = np.random.normal(size=(256, 64))
+        self.W3 = np.random.normal(size=(64, 10))
+        
+        # 以下算子会在 forward 和 backward 中使用
+        self.matmul_1 = Matmul()
+        self.relu_1 = Relu()
+        self.matmul_2 = Matmul()
+        self.relu_2 = Relu()
+        self.matmul_3 = Matmul()
+        self.softmax = Softmax()
+        self.log = Log()
+        
+        # 以下变量需要在 backward 中更新。 softmax_grad, log_grad 等为算子反向传播的梯度（ loss 关于算子输入的偏导）
+        self.x1_grad, self.W1_grad = None, None
+        self.relu_1_grad = None
+        self.x2_grad, self.W2_grad = None, None
+        self.relu_2_grad = None
+        self.x3_grad, self.W3_grad = None, None
+        self.softmax_grad = None
+        self.log_grad = None
+    
+    def forward(self, x):
+        x = x.reshape(-1, 28 * 28)
+        
+        x=self.matmul_1.forward(x,self.W1)
+        x=self.relu_1.forward(x)
+        x=self.matmul_2.forward(x,self.W2)
+        x=self.relu_2.forward(x)
+        x=self.matmul_3.forward(x, self.W3)
+
+
+        x = self.softmax.forward(x)
+        x = self.log.forward(x)
+        
+        return x
+    
+    def backward(self, y):
+        
+        y = self.log.backward(y)
+        self.log_grad = y
+        y = self.softmax.backward(y)
+        self.softmax_grad = y
+        y, self.W3_grad = self.matmul_3.backward(y)
+        self.x3_grad = y
+        y = self.relu_2.backward(y)
+        y, self.W2_grad = self.matmul_2.backward(y)
+        self.x2_grad = y
+        y = self.relu_1.backward(y)
+        y, self.W1_grad = self.matmul_1.backward(y)
+        self.x1_grad = y
+        
+        pass
+    
+    def optimize(self, learning_rate):
+        self.W1 -= learning_rate * self.W1_grad
+        self.W2 -= learning_rate * self.W2_grad
+        self.W3 -= learning_rate * self.W3_grad
diff --git a/assignment-2/submission/16307130040/numpy_mnist.py b/assignment-2/submission/16307130040/numpy_mnist.py
new file mode 100644
index 0000000000000000000000000000000000000000..a688f7c64114bf150ffff2b903dfc74688bda4ad
--- /dev/null
+++ b/assignment-2/submission/16307130040/numpy_mnist.py
@@ -0,0 +1,59 @@
+import numpy as np
+from numpy_fnn import NumpyModel, NumpyLoss
+from utils import download_mnist, batch, mini_batch, get_torch_initialization, plot_curve, one_hot
+
+def mini_batch(dataset, batch_size=128, numpy=True):
+    data = []
+    label = []
+    for each in dataset:
+        data.append(np.array(each[0]))
+        label.append(each[1])
+    data = np.array(data)
+    label = np.array(label)
+
+    m = data.shape[0]
+    permutation = list(np.random.permutation(m))
+    data =data[permutation]
+    label=label[permutation]
+
+    n=m//batch_size
+    mini_batches=[]
+    for i in range(n):
+        mini_batches.append([data[i*batch_size:(i+1)*batch_size],label[i*batch_size:(i+1)*batch_size]])
+
+    return mini_batches
+
+def numpy_run():
+    train_dataset, test_dataset = download_mnist()
+    
+    model = NumpyModel()
+    numpy_loss = NumpyLoss()
+    model.W1, model.W2, model.W3 = get_torch_initialization()
+    
+    train_loss = []
+    
+    epoch_number = 3
+    learning_rate = 0.1
+    
+    for epoch in range(epoch_number):
+        for x, y in mini_batch(train_dataset):
+            y = one_hot(y)
+
+            y_pred = model.forward(x)
+            loss = numpy_loss.get_loss(y_pred, y)
+
+            model.backward(numpy_loss.backward())
+            model.optimize(learning_rate)
+            
+            train_loss.append(loss.item())
+        
+        x, y = batch(test_dataset)[0]
+        accuracy = np.mean((model.forward(x).argmax(axis=1) == y))
+        print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy))
+    
+    plot_curve(train_loss)
+
+
+
+if __name__ == "__main__":
+    numpy_run()
diff --git a/assignment-2/submission/16307130040/torch_mnist.py b/assignment-2/submission/16307130040/torch_mnist.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a5649bbfa750b3520b4b895de7260c3aa8ea7cd
--- /dev/null
+++ b/assignment-2/submission/16307130040/torch_mnist.py
@@ -0,0 +1,64 @@
+import torch
+from utils import mini_batch, batch, download_mnist, get_torch_initialization, one_hot, plot_curve
+
+
+class TorchModel:
+    
+    def __init__(self):
+        self.W1 = torch.randn((28 * 28, 256), requires_grad=True)
+        self.W2 = torch.randn((256, 64), requires_grad=True)
+        self.W3 = torch.randn((64, 10), requires_grad=True)
+    
+    def forward(self, x):
+        x = x.reshape(-1, 28 * 28)
+        x = torch.relu(torch.matmul(x, self.W1))
+        x = torch.relu(torch.matmul(x, self.W2))
+        x = torch.matmul(x, self.W3)
+        self.softmax = torch.softmax(x, 1)
+        self.log = torch.log(self.softmax)
+        self.softmax.retain_grad()  # for test only
+        self.log.retain_grad()      # for test only
+        return self.log
+    
+    def optimize(self, learning_rate):
+        with torch.no_grad():
+            self.W1 -= learning_rate * self.W1.grad
+            self.W2 -= learning_rate * self.W2.grad
+            self.W3 -= learning_rate * self.W3.grad
+            
+            self.W1.grad = None
+            self.W2.grad = None
+            self.W3.grad = None
+
+
+def torch_run():
+    train_dataset, test_dataset = download_mnist()
+    
+    model = TorchModel()
+    model.W1.data, model.W2.data, model.W3.data = get_torch_initialization(numpy=False)
+    
+    train_loss = []
+    
+    epoch_number = 3
+    learning_rate = 0.1
+    
+    for epoch in range(epoch_number):
+        for x, y in mini_batch(train_dataset, numpy=False):
+            y = one_hot(y, numpy=False)
+            
+            y_pred = model.forward(x)
+            loss = (-y_pred * y).sum(dim=1).mean()
+            loss.backward()
+            model.optimize(learning_rate)
+            
+            train_loss.append(loss.item())
+        
+        x, y = batch(test_dataset, numpy=False)[0]
+        accuracy = model.forward(x).argmax(dim=1).eq(y).float().mean().item()
+        print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy))
+    
+    plot_curve(train_loss)
+
+
+if __name__ == "__main__":
+    torch_run()