diff --git a/assignment-1/submission/18307130104/README.md b/assignment-1/submission/18307130104/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..547377052cdbe1f8ecf6c88d8f20086418b3842e
--- /dev/null
+++ b/assignment-1/submission/18307130104/README.md
@@ -0,0 +1,195 @@
+
+
+18307130104
+
+# 课程报告
+
+这是 prml 的 assignment-1 课程报告,我的代码在 [source.py](./source.py) 中。
+
+在 assignment-1 中,用 python 实现了一个 KNN 类,调用该类下 fit() 函数可以对数据进行训练,调用 predict() 可以对多组目标数据进行预测,返回一个 list 对应每组数据的结果。
+
+## KNN 类实现
+
+### fit() 函数
+
+fit() 函数的任务有两个:将用于训练的点按照类别分别进行储存;选择合适的 K,也就是用于预测的最近点个数。
+
+接下来说明 K 数值的选择方法。对于输入的数据,选择前 $\frac 3 4$ 的数据作为训练集,后 $\frac 1 4$ 的数据作为验证集。逐一尝试 1~14 作为 K 值时模型在验证集上的正确率,选取其中正确率最高的 K 作为模型的 K 值保存下来。
+
+选择 1~14 是因为训练数据的规模为 2000,如果训练数据的规模进行了修改,这一个范围也可以进行修改,不过这一范围对大部分数据规模都比较适用。
+
+### predict() 函数
+
+predict() 函数会根据模型中存储的数据和选定的 K 对给定数据进行预测。
+
+采用欧拉距离作为两个点的距离数值,选择距离最近的 K 个点进行投票,获票最多的类别就是预测结果。对于获票相同的情况选择编号比较小的类别。
+
+## 测试与展示
+
+```shell
+python source.py g // 生成数据
+python source.py // 进行测试
+python source.py d // 生成展示图片
+```
+
+generate() 和 display() 函数均从示例代码中获得。其中 display() 函数中增加了对某种类别预测结果为空的判断防止报错。
+
+> 需要保证运行环境中有 img 文件夹,否则程序无法正确运行。(由于不能用 os 包所以不知道怎么判断是否存在文件夹)
+>
+> 另外,如果使用 wsl 等环境会导致输出图像有重叠。
+
+## 探究性实验
+
+## 实验1
+
+采用以下参数生成 3 组数据。
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+73 & 0 \\\\
+0 & 22
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+1 & 2
+\end{array}\right]
+\end{array}
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+21.2 & 0 \\\\
+0 & 32.1
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+16 & -5
+\end{array}\right]
+\end{array}
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+10 & 5 \\\\
+5 & 10
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+10 & 22
+\end{array}\right]
+\end{array}
+$$
+
+训练数据,测试数据,测试结果如下三图
+
+

+
+程序输出如下(之后的实验输出均采用如下的输出格式)
+
+| K | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
+| ------ | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| 正确率 | 0.93 | 0.92 | 0.93 | 0.94 | 0.94 | 0.95 | 0.95 | 0.95 | 0.95 | 0.95 | 0.96 | 0.96 | 0.96 | 0.96 |
+
+| 选取 K | 正确率 |
+| -- | -- |
+| 14 | 0.96 |
+
+将实验1作为基准,对不同数据集上的模型效果进行对比。这组数据的特点在于虽然不同种类之间的点有交集,但是区分仍然非常明显,比较符合实际中的分类问题的特征。
+
+## 实验2
+
+采用以下参数生成 3 组数据。
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+6 & 0 \\\\
+0 & 4
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+1 & 2
+\end{array}\right]
+\end{array}
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+5 & 0 \\\\
+0 & 7
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+16 & -5
+\end{array}\right]
+\end{array}
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+9 & 5 \\\\
+0 & 5
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+10 & 22
+\end{array}\right]
+\end{array}
+$$
+
+训练数据,测试数据,测试结果如下三图
+
+

+
+程序输出如下
+
+| K | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
+| ------ | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| 正确率 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
+
+
+| 选取 K | 正确率 |
+| -- | -- |
+|1|acc = 1.0|
+
+实验2的数据集中数据的协方差比较小,对应的,数据比较集中,数据集中区域的交叉比较小,所以对应的,模型的准确度非常高,这种情况的分类非常简单,因此模型表现优秀也在预期之中。
+
+## 实验3
+
+采用以下参数生成 3 组数据。
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+73 & 0 \\\\
+0 & 22
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+13 & -2
+\end{array}\right]
+\end{array}
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+21.2 & 0 \\\\
+0 & 32.1
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+16 & -5
+\end{array}\right]
+\end{array}
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+10 & 5 \\\\
+5 & 10
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+18 & -7
+\end{array}\right]
+\end{array}
+$$
+
+训练数据,测试数据,测试结果如下三图
+
+

+
+程序输出如下
+
+| K | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
+| ------ | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| 正确率 | 0.67 | 0.76 | 0.73 | 0.75 | 0.73 | 0.74 | 0.74 | 0.76 | 0.74 | 0.75 | 0.74 | 0.74 | 0.74 | 0.74 |
+
+| 选取 K | 正确率 |
+| -- | -- |
+|2|acc = 0.71|
+
+相比于实验1,虽然数据的协方差没有变化,但是数据的中心点比较靠近,具体表现出来,数据集中区域的重合部分非常大,非常难以区别。可以看到正确率也有非常大幅度的下滑。
+
+如果再加大协方差,测试准确率也会进一步下降。
+
+## 结论
+
+可以看到对于数据集中不同类别区分度比较大的情况,KNN 有着非常优秀的表现。对于数据重叠情况比较大的情况,KNN 的效果也并不理想。
diff --git a/assignment-1/submission/18307130104/img/test-1.png b/assignment-1/submission/18307130104/img/test-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..dee425303e2612d99294b8b67d9d3caa62c45d19
Binary files /dev/null and b/assignment-1/submission/18307130104/img/test-1.png differ
diff --git a/assignment-1/submission/18307130104/img/test-2.png b/assignment-1/submission/18307130104/img/test-2.png
new file mode 100644
index 0000000000000000000000000000000000000000..af4d06ba839eb16dc7864f7f9ccbbb0fc6288353
Binary files /dev/null and b/assignment-1/submission/18307130104/img/test-2.png differ
diff --git a/assignment-1/submission/18307130104/img/test-3.png b/assignment-1/submission/18307130104/img/test-3.png
new file mode 100644
index 0000000000000000000000000000000000000000..df464c743c65e3df01aab75decd3d46b5040f981
Binary files /dev/null and b/assignment-1/submission/18307130104/img/test-3.png differ
diff --git a/assignment-1/submission/18307130104/img/test.png b/assignment-1/submission/18307130104/img/test.png
new file mode 100644
index 0000000000000000000000000000000000000000..d4880f0b716a928bb8c98e57db1450c4005e4de0
Binary files /dev/null and b/assignment-1/submission/18307130104/img/test.png differ
diff --git a/assignment-1/submission/18307130104/img/test_res-1.png b/assignment-1/submission/18307130104/img/test_res-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..b59def122bd96d11caef1846cbed617f2ff1e77a
Binary files /dev/null and b/assignment-1/submission/18307130104/img/test_res-1.png differ
diff --git a/assignment-1/submission/18307130104/img/test_res-2.png b/assignment-1/submission/18307130104/img/test_res-2.png
new file mode 100644
index 0000000000000000000000000000000000000000..af4d06ba839eb16dc7864f7f9ccbbb0fc6288353
Binary files /dev/null and b/assignment-1/submission/18307130104/img/test_res-2.png differ
diff --git a/assignment-1/submission/18307130104/img/test_res-3.png b/assignment-1/submission/18307130104/img/test_res-3.png
new file mode 100644
index 0000000000000000000000000000000000000000..4e2e9e4b84105a214163ef8001950c84551cc868
Binary files /dev/null and b/assignment-1/submission/18307130104/img/test_res-3.png differ
diff --git a/assignment-1/submission/18307130104/img/test_res.png b/assignment-1/submission/18307130104/img/test_res.png
new file mode 100644
index 0000000000000000000000000000000000000000..3398f85d9e2c12e1aed1d46aef32b0ebfe40a736
Binary files /dev/null and b/assignment-1/submission/18307130104/img/test_res.png differ
diff --git a/assignment-1/submission/18307130104/img/train-1.png b/assignment-1/submission/18307130104/img/train-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..8ac4cafb374d598c4c81167a7a3e249856803f82
Binary files /dev/null and b/assignment-1/submission/18307130104/img/train-1.png differ
diff --git a/assignment-1/submission/18307130104/img/train-2.png b/assignment-1/submission/18307130104/img/train-2.png
new file mode 100644
index 0000000000000000000000000000000000000000..5574b29d7ef53b5e38af79f85a1ae4ebcbb8f137
Binary files /dev/null and b/assignment-1/submission/18307130104/img/train-2.png differ
diff --git a/assignment-1/submission/18307130104/img/train-3.png b/assignment-1/submission/18307130104/img/train-3.png
new file mode 100644
index 0000000000000000000000000000000000000000..4e5eb59db4f0293e1e59fa4747e61112138f3153
Binary files /dev/null and b/assignment-1/submission/18307130104/img/train-3.png differ
diff --git a/assignment-1/submission/18307130104/img/train.png b/assignment-1/submission/18307130104/img/train.png
new file mode 100644
index 0000000000000000000000000000000000000000..1906f232e9f385005824fea153d1c8c649000cdb
Binary files /dev/null and b/assignment-1/submission/18307130104/img/train.png differ
diff --git a/assignment-1/submission/18307130104/source.py b/assignment-1/submission/18307130104/source.py
new file mode 100644
index 0000000000000000000000000000000000000000..060fd3ca6cbd1b0c81fe22fb1c051eb7e275651d
--- /dev/null
+++ b/assignment-1/submission/18307130104/source.py
@@ -0,0 +1,147 @@
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+
+class KNN:
+
+ def __init__(self):
+ self.ldata = {}
+ self.K = 10
+ self.cnt = 0
+
+ def fit(self, train_data, train_label):
+ totsz = len(train_data)
+ pretrainsz = totsz * 3 // 4
+ for i in range(0, pretrainsz):
+ if train_label[i] in self.ldata:
+ self.ldata[train_label[i]].append(train_data[i])
+ else:
+ self.ldata[train_label[i]] = [train_data[i]]
+ pretraindata = train_data[pretrainsz : totsz]
+ pretrainlabel = train_label[pretrainsz : totsz]
+ maxAcc = 0
+ takeK = 3
+ for preK in range(1, 15):
+ pretrainres = []
+ self.K = preK
+ for d in pretraindata:
+ pretrainres.append(self.predict_one(d))
+ acc = np.mean(np.equal(pretrainres, pretrainlabel))
+ print(acc)
+ if acc > maxAcc:
+ maxAcc = acc
+ takeK = preK
+ self.K = takeK
+ print("take K", takeK)
+ self.ldata.clear()
+ for (d, l) in zip(train_data, train_label):
+ if(l in self.ldata):
+ self.ldata[l].append(d)
+ else:
+ self.ldata[l] = [d]
+
+ def dist(self, s1, s2):
+ sum = 0
+ for (k1, k2) in zip(s1, s2):
+ sum += (k1 - k2) ** 2
+ return sum
+ def takeFirst(self, elem):
+ return elem[0]
+ def predict_one(self, data):
+ result = None
+ tmpl = []
+ for l in self.ldata:
+ for s in self.ldata[l]:
+ tmpl.append([self.dist(s, data), l])
+ tmpl.sort(key=self.takeFirst)
+ num = {}
+ for i in self.ldata:
+ num[i] = 0
+ cnt = 0
+ # for l in tmpl:
+ # print(l)
+ # print(' ')
+ for l in tmpl:
+ num[l[1]] += 1
+ cnt += 1
+ if(cnt >= self.K):
+ break
+ maxi = -1
+ for i in self.ldata:
+ # print(i)
+ if num[i] > maxi:
+ maxi = num[i]
+ result = i
+ # print(result)
+ return result
+
+ def predict(self, test_data):
+ result = []
+ for x in test_data:
+ result.append(self.predict_one(x))
+ return result
+
+def generate():
+ mean = (1, 2)
+ cov = np.array([[73, 0], [0, 22]])
+ x = np.random.multivariate_normal(mean, cov, (800,))
+
+ mean = (16, -5)
+ cov = np.array([[21.2, 0], [0, 32.1]])
+ y = np.random.multivariate_normal(mean, cov, (200,))
+
+ mean = (10, 22)
+ cov = np.array([[10,5],[5,10]])
+ z = np.random.multivariate_normal(mean, cov, (1000,))
+
+ idx = np.arange(2000)
+ np.random.shuffle(idx)
+ data = np.concatenate([x,y,z])
+ label = np.concatenate([
+ np.zeros((800,),dtype=int),
+ np.ones((200,),dtype=int),
+ np.ones((1000,),dtype=int)*2
+ ])
+ data = data[idx]
+ label = label[idx]
+
+ train_data, test_data = data[:1600,], data[1600:,]
+ train_label, test_label = label[:1600,], label[1600:,]
+ np.save("data.npy",(
+ (train_data, train_label), (test_data, test_label)
+ ))
+
+def display(data, label, name):
+ datas =[[],[],[]]
+ for i in range(len(data)):
+ datas[label[i]].append(data[i])
+
+ for each in datas:
+ each = np.array(each)
+ if(each.size > 0):
+ plt.scatter(each[:, 0], each[:, 1])
+ plt.savefig(f'img/{name}')
+ plt.show()
+
+def read():
+ (train_data, train_label), (test_data, test_label) = np.load("data.npy",allow_pickle=True)
+ return (train_data, train_label), (test_data, test_label)
+
+if __name__ == "__main__":
+ if len(sys.argv) > 1 and sys.argv[1] == "g":
+ print("generate")
+ generate()
+ if len(sys.argv) > 1 and sys.argv[1] == "d":
+ (train_data, train_label), (test_data, test_label) = read()
+ # for l in test_label:
+ # print(l)
+ display(train_data, train_label, 'train')
+ display(test_data, test_label, 'test')
+ else:
+ (train_data, train_label), (test_data, test_label) = read()
+
+ model = KNN()
+ model.fit(train_data, train_label)
+ res = model.predict(test_data)
+ display(test_data, res, 'test_res')
+ print("acc =",np.mean(np.equal(res, test_label)))
\ No newline at end of file