diff --git a/assignment-2/submission/17307130285/.keep b/assignment-2/submission/17307130285/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/assignment-2/submission/17307130285/README.assets/J.png b/assignment-2/submission/17307130285/README.assets/J.png new file mode 100644 index 0000000000000000000000000000000000000000..a3624f4a592f14eb27b5bc1c5c266bfca818f5bf Binary files /dev/null and b/assignment-2/submission/17307130285/README.assets/J.png differ diff --git a/assignment-2/submission/17307130285/README.assets/ex2.png b/assignment-2/submission/17307130285/README.assets/ex2.png new file mode 100644 index 0000000000000000000000000000000000000000..f5fc30c1ddeca9fe0fc15b72c429a36c86c1e532 Binary files /dev/null and b/assignment-2/submission/17307130285/README.assets/ex2.png differ diff --git a/assignment-2/submission/17307130285/README.assets/ex3.png b/assignment-2/submission/17307130285/README.assets/ex3.png new file mode 100644 index 0000000000000000000000000000000000000000..38a66f94bed72963d219addc75cbc8bc09854eca Binary files /dev/null and b/assignment-2/submission/17307130285/README.assets/ex3.png differ diff --git a/assignment-2/submission/17307130285/README.assets/sm.png b/assignment-2/submission/17307130285/README.assets/sm.png new file mode 100644 index 0000000000000000000000000000000000000000..866a2db67c4c6b809ba4cb691ae513e1e841a13a Binary files /dev/null and b/assignment-2/submission/17307130285/README.assets/sm.png differ diff --git a/assignment-2/submission/17307130285/README.assets/softmax.png b/assignment-2/submission/17307130285/README.assets/softmax.png new file mode 100644 index 0000000000000000000000000000000000000000..992cd9b2e9af2353e39c04bd04668ce91dcf18b1 Binary files /dev/null and b/assignment-2/submission/17307130285/README.assets/softmax.png differ diff --git a/assignment-2/submission/17307130285/README.assets/torch_batch.png b/assignment-2/submission/17307130285/README.assets/torch_batch.png new file mode 100644 index 0000000000000000000000000000000000000000..daaf0ee9430d06749c6b0f06aea554c3437132a4 Binary files /dev/null and b/assignment-2/submission/17307130285/README.assets/torch_batch.png differ diff --git a/assignment-2/submission/17307130285/README.md b/assignment-2/submission/17307130285/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4af83b6606f4c7d14a52e57dbda8d5850f993137 --- /dev/null +++ b/assignment-2/submission/17307130285/README.md @@ -0,0 +1,197 @@ +# Assignment2 + +**17307130285 王茹** + +## Formulas + +#### matmul + +$$ +Y = X*W +$$ +矩阵求导: +$$ +\frac{\partial Y}{\partial X} = W^{T} +$$ + +$$ +\frac{\partial Y}{\partial W} = X^{T} +$$ + +```python +def backward(self, grad_y): + """ + grad_y: shape(N, d') + """ + + #################### + # code 1 # + #################### + grad_W = np.matmul(self.memory['x'].T, grad_y) + grad_x = np.matmul(grad_y, self.memory['W'].T) + + return grad_x, grad_W +``` + +#### Relu + +$$ +Y_{ij}=\begin{cases} +X_{ij}&X_{ij}\ge0\\\\ +0&\text{otherwise} +\end{cases} +$$ +求导: +$$ +\frac{\partial Y_{ij}}{\partial X_{ij}}=\begin{cases} +1&X_{ij}>0\\\\ +0&\text{otherwise} +\end{cases} +$$ + +```python +def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + #################### + # code 2 # + #################### + #若x大于0, grad_x的结果就是传入的grad_y。否则结果为0 + x = self.memory['x'] + grad_x = np.where(x > 0, grad_y, np.zeros_like(grad_y)) + + return grad_x +``` + +#### Log + +$$ +Y=\ln(X+\epsilon) +$$ +求导: +$$ +\frac{\partial Y}{\partial X} = \frac1{X+\epsilon} +$$ + +```python +def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + #################### + # code 3 # + #################### + grad_x = np.multiply(1./(self.memory['x'] + self.epsilon), grad_y) + return grad_x +``` + + + +#### Softmax + +![sm](README.assets/sm.png) + +![softmax](README.assets/softmax.png) + +```python +def forward(self, x): + """ + x: shape(N, c) + """ + + #################### + # code 4 # + #################### + out = [] + for i in range(x.shape[0]): + t = x[i] + t = t - max(t) + t = np.exp(t) + out.append(t/sum(t)) + out = np.array(out) + self.memory['out'] = out + return out + +def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + #################### + # code 5 # + #################### + out = self.memory['out'] + j = np.array([np.diag(i) - np.outer(i, i) for i in out]) + + grad_y = grad_y[:, np.newaxis, :] + grad_x = np.matmul(grad_y, j).squeeze(axis=1) + + return grad_x + +``` + + + +## Experiments + +- mini_batch + +```python +def mini_batch(dataset, batch_size=32): + x_train = np.array([np.array(d[0]) for d in dataset]) + y_train = np.array([d[1] for d in dataset]) + # 打乱数据集 + size = x_train.shape[0] + idx = np.arange(size) + np.random.shuffle(idx) + # 防止batch_size不整除 + num = int(np.ceil(size / batch_size)) * batch_size + batches = [(x_train[idx[i:i + batch_size]], y_train[idx[i:i + batch_size]]) for i in range(0, num, batch_size)] + return batches +``` + + + +### Before implementing mini_batch with numpy + +#### Experiment 1 + +- epoch num: 3 +- learning rate: 0.1 +- batch size = 128 + +[0] Accuracy: 0.9465 +[1] Accuracy: 0.9606 +[2] Accuracy: 0.9674 + +![torch_batch](README.assets/torch_batch.png) + +### After + +#### Experiment 2 + +- epoch num: 3 +- learning rate: 0.1 +- batch size = 128 + +[0] Accuracy: 0.9492 +[1] Accuracy: 0.9642 +[2] Accuracy: 0.9708 + +![ex2](README.assets/ex2.png) + +#### Experiment 3 + +- epoch num: 3 +- learning rate: 0.1 +- batch size = 32 + +[0] Accuracy: 0.9646 +[1] Accuracy: 0.9738 +[2] Accuracy: 0.9754 + +![ex3](README.assets/ex3.png) + diff --git a/assignment-2/submission/17307130285/numpy_fnn.py b/assignment-2/submission/17307130285/numpy_fnn.py new file mode 100644 index 0000000000000000000000000000000000000000..a394d6c9cae87be907101173e2cc70592f6fb015 --- /dev/null +++ b/assignment-2/submission/17307130285/numpy_fnn.py @@ -0,0 +1,198 @@ +import numpy as np + + +class NumpyOp: + + def __init__(self): + self.memory = {} + self.epsilon = 1e-12 + + +class Matmul(NumpyOp): + + def forward(self, x, W): + """ + x: shape(N, d) + w: shape(d, d') + """ + self.memory['x'] = x + self.memory['W'] = W + h = np.matmul(x, W) + return h + + def backward(self, grad_y): + """ + grad_y: shape(N, d') + """ + + #################### + # code 1 # + #################### + grad_W = np.matmul(self.memory['x'].T, grad_y) + grad_x = np.matmul(grad_y, self.memory['W'].T) + + return grad_x, grad_W + + +class Relu(NumpyOp): + + def forward(self, x): + self.memory['x'] = x + return np.where(x > 0, x, np.zeros_like(x)) + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + #################### + # code 2 # + #################### + #若x大于0, grad_x的结果就是传入的grad_y。否则结果为0 + x = self.memory['x'] + grad_x = np.where(x > 0, grad_y, np.zeros_like(grad_y)) + + return grad_x + + +class Log(NumpyOp): + + def forward(self, x): + """ + x: shape(N, c) + """ + + out = np.log(x + self.epsilon) + self.memory['x'] = x + + return out + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + #################### + # code 3 # + #################### + grad_x = np.multiply(1./(self.memory['x'] + self.epsilon), grad_y) + return grad_x + + +class Softmax(NumpyOp): + """ + softmax over last dimension + """ + + def forward(self, x): + """ + x: shape(N, c) + """ + + #################### + # code 4 # + #################### + out = [] + for i in range(x.shape[0]): + t = x[i] + t = t - max(t) + t = np.exp(t) + out.append(t/sum(t)) + out = np.array(out) + self.memory['out'] = out + return out + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + #################### + # code 5 # + #################### + out = self.memory['out'] + j = np.array([np.diag(i) - np.outer(i, i) for i in out]) + + grad_y = grad_y[:, np.newaxis, :] + grad_x = np.matmul(grad_y, j).squeeze(axis=1) + + return grad_x + + +class NumpyLoss: + + def __init__(self): + self.target = None + + def get_loss(self, pred, target): + self.target = target + return (-pred * target).sum(axis=1).mean() + + def backward(self): + return -self.target / self.target.shape[0] + + +class NumpyModel: + def __init__(self): + self.W1 = np.random.normal(size=(28 * 28, 256)) + self.W2 = np.random.normal(size=(256, 64)) + self.W3 = np.random.normal(size=(64, 10)) + + # 以下算子会在 forward 和 backward 中使用 + self.matmul_1 = Matmul() + self.relu_1 = Relu() + self.matmul_2 = Matmul() + self.relu_2 = Relu() + self.matmul_3 = Matmul() + self.softmax = Softmax() + self.log = Log() + + # 以下变量需要在 backward 中更新。 softmax_grad, log_grad 等为算子反向传播的梯度( loss 关于算子输入的偏导) + self.x1_grad, self.W1_grad = None, None + self.relu_1_grad = None + self.x2_grad, self.W2_grad = None, None + self.relu_2_grad = None + self.x3_grad, self.W3_grad = None, None + self.softmax_grad = None + self.log_grad = None + + def forward(self, x): + x = x.reshape(-1, 28 * 28) + + #################### + # code 6 # + #################### + x = x.reshape(-1, 28 * 28) + x = self.relu_1.forward(self.matmul_1.forward(x, self.W1)) + x = self.relu_2.forward(self.matmul_2.forward(x, self.W2)) + x = self.matmul_3.forward(x, self.W3) + x = self.log.forward(self.softmax.forward(x)) + + + return x + + def backward(self, y): + + #################### + # code 7 # + #################### + self.log_grad = self.log.backward(y) + grad_y = self.log_grad + self.softmax_grad = self.softmax.backward(grad_y) + grad_y = self.softmax_grad + self.x3_grad, self.W3_grad = self.matmul_3.backward(grad_y) + grad_y = self.x3_grad + self.relu_2_grad =self.relu_2.backward(grad_y) + grad_y = self.relu_2_grad + self.x2_grad, self.W2_grad =self.matmul_2.backward(grad_y) + grad_y = self.x2_grad + self.relu_1_grad = self.relu_1.backward(grad_y) + grad_y = self.relu_1_grad + self.x1_grad, self.W1_grad = self.matmul_1.backward(grad_y) + + pass + + def optimize(self, learning_rate): + self.W1 -= learning_rate * self.W1_grad + self.W2 -= learning_rate * self.W2_grad + self.W3 -= learning_rate * self.W3_grad diff --git a/assignment-2/submission/17307130285/numpy_mnist.py b/assignment-2/submission/17307130285/numpy_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..10ad4b609c44b30aef08c24917032b9b488ce4f0 --- /dev/null +++ b/assignment-2/submission/17307130285/numpy_mnist.py @@ -0,0 +1,51 @@ +import numpy as np +from numpy_fnn import NumpyModel, NumpyLoss +from utils import download_mnist, batch, get_torch_initialization, plot_curve, one_hot + +def mini_batch(dataset, batch_size=32): + x_train = np.array([np.array(d[0]) for d in dataset]) + y_train = np.array([d[1] for d in dataset]) + # 打乱数据集 + size = x_train.shape[0] + idx = np.arange(size) + np.random.shuffle(idx) + # 防止batch_size不整除 + num = int(np.ceil(size / batch_size)) * batch_size + batches = [(x_train[idx[i:i + batch_size]], y_train[idx[i:i + batch_size]]) for i in range(0, num, batch_size)] + return batches + + + +def numpy_run(): + train_dataset, test_dataset = download_mnist() + + model = NumpyModel() + numpy_loss = NumpyLoss() + model.W1, model.W2, model.W3 = get_torch_initialization() + + train_loss = [] + + epoch_number = 3 + learning_rate = 0.1 + + for epoch in range(epoch_number): + for x, y in mini_batch(train_dataset): + y = one_hot(y) + + y_pred = model.forward(x) + loss = numpy_loss.get_loss(y_pred, y) + + model.backward(numpy_loss.backward()) + model.optimize(learning_rate) + + train_loss.append(loss.item()) + + x, y = batch(test_dataset)[0] + accuracy = np.mean((model.forward(x).argmax(axis=1) == y)) + print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy)) + + plot_curve(train_loss) + + +if __name__ == "__main__": + numpy_run()