diff --git a/assignment-1/submission/19210680053/README.md b/assignment-1/submission/19210680053/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6ae1a49f48c030f79bcc25f37bd717d6fe307c48
--- /dev/null
+++ b/assignment-1/submission/19210680053/README.md
@@ -0,0 +1,246 @@
+# 课程报告
+
+## 说明
+
+我使用的包为numpy,在class KNN中:
+
+
+a.使用函数euclidean进行向量间欧式距离的计算
+
+
+b.使用closest函数进行逐个向量输入,分别计算它与全部train data的欧氏距离,并输出距它最近k个点出现次数最多train label。当最近k个点不存在出现次数最多train label(如出现次数均等),将进行label随机输出
+
+
+c.使用predict函数将全部test data逐个输入,得到预测结果
+
+
+d.使用choose函数,将预测结果与test label进行比对,结果相同取值为1,不同为0,进行准确率计算。k值选择范围根据训练与测试集数量决定(最小值为2,最大值为数据量的10%),从中选取使预测结果准确率最高k值,并输出对准确率预测
+
+
+## 数据生成 实验探究
+
+我使用以下参数生成了如下三个二维高斯分布,label分别为0,1,2
+
+
+ label=0
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+10 & 0 \\\\
+0 & 10
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+20 & 25
+\end{array}\right]
+\end{array}
+$$
+
+
+ label=1
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+23 & 0 \\\\
+0 & 22
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+16 & -5
+\end{array}\right]
+\end{array}
+$$
+
+
+ label=2
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+10 & 5 \\\\
+5 & 10
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+20 & 25
+\end{array}\right]
+\end{array}
+$$
+
+这是我生成的训练集:
+
+
+
+
+
+这是我生成的测试集:
+
+
+
+
+
+可以通过如下表格来报告我的实验结果
+
+Algo |kvalue|Acc |
+-----| ---- |---- |
+KNN | 5 |0.6225 |
+
+
+
+
+由于label=0和label=2的对应高斯分布较靠近,导致训练准确性为62.25%。
+
+
+为进一步探究高斯分布距离对预测准确性影响,我使用如下参数进行分布生成:
+
+ label=0
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+10 & 2.1 \\\\
+2.1 & 12
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+20 & 25
+\end{array}\right]
+\end{array}
+$$
+
+
+ label=1
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+23 & 0 \\\\
+0 & 22
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+20 & 25
+\end{array}\right]
+\end{array}
+$$
+
+
+ label=2
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+10 & 5 \\\\
+5 & 10
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+20 & 25
+\end{array}\right]
+\end{array}
+$$
+
+这是我生成的训练集:
+
+
+
+
+这是我生成的测试集:
+
+
+
+
+
+可以通过如下表格来报告我的实验结果
+
+Algo |kvalue|Acc |
+-----| ---- |---- |
+KNN | 12 |0.485 |
+
+此时3个高斯分布距离彼此都很近,进行不同k值选取,实验的准确性最高达到48.5%。
+
+|k |Acc |
+----- | ---- |
+| 2 | 0.4525 |
+| 3 | 0.4375 |
+| 4 | 0.4475 |
+| 5 | 0.4300 |
+| 6 | 0.4675 |
+| 7 | 0.4525 |
+| 8 | 0.4775 |
+| 9 | 0.4450 |
+| 10 | 0.4650 |
+| 11 | 0.4700 |
+| 12 | 0.4850 |
+| 13 | 0.4750 |
+| 14 | 0.4650 |
+| 15 | 0.4625 |
+| 16 | 0.4775 |
+| 17 | 0.4650 |
+| 18 | 0.4800 |
+| 19 | 0.4700 |
+| 20 | 0.4725 |
+
+
+改变高斯分布距离,我使用以下参数生成高斯分布。
+
+
+ label=0
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+10 & 2.1 \\\\
+2.1 & 12
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+20 & 25
+\end{array}\right]
+\end{array}
+$$
+
+
+ label=1
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+23 & 0 \\\\
+0 & 22
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+16 & -5
+\end{array}\right]
+\end{array}
+$$
+
+
+ label=2
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+10 & 5 \\\\
+5 & 10
+\end{array}\right] \\\\
+\mu=\left[\begin{array}{ll}
+3 & 5
+\end{array}\right]
+\end{array}
+$$
+
+这是我生成的训练集:
+
+
+
+
+
+这是我生成的测试集:
+
+
+
+
+
+可以通过如下表格来报告我的实验结果
+
+Algo |kvalue|Acc |
+-----| ---- |---- |
+KNN | 2 |0.9975 |
+
+
+此时3个高斯分布距离较远,通过较少的k值即可得到较为准确的判断。增加高斯分布间的距离可以提升实验的准确性。
+
+## 代码使用方法
+
+```bash
+改变mode数值:
+mode=0 #数据生成
+mode=1 #数据可视化
+mode取非0-1值 #训练和测试
diff --git a/assignment-1/submission/19210680053/img/test 1.png b/assignment-1/submission/19210680053/img/test 1.png
new file mode 100644
index 0000000000000000000000000000000000000000..bf515460fd3bf6e81d027117399749a3b10c29fe
Binary files /dev/null and b/assignment-1/submission/19210680053/img/test 1.png differ
diff --git a/assignment-1/submission/19210680053/img/test 2.png b/assignment-1/submission/19210680053/img/test 2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1d962680d1019a7b4946d61b7a66ede507ad0d4c
Binary files /dev/null and b/assignment-1/submission/19210680053/img/test 2.png differ
diff --git a/assignment-1/submission/19210680053/img/test 3.png b/assignment-1/submission/19210680053/img/test 3.png
new file mode 100644
index 0000000000000000000000000000000000000000..3ab9d8b6157ed19597c283688c34daeef54beeeb
Binary files /dev/null and b/assignment-1/submission/19210680053/img/test 3.png differ
diff --git a/assignment-1/submission/19210680053/img/train 1.png b/assignment-1/submission/19210680053/img/train 1.png
new file mode 100644
index 0000000000000000000000000000000000000000..dbe1db24a876a4b564d98b3009aefae717ba433c
Binary files /dev/null and b/assignment-1/submission/19210680053/img/train 1.png differ
diff --git a/assignment-1/submission/19210680053/img/train 2.png b/assignment-1/submission/19210680053/img/train 2.png
new file mode 100644
index 0000000000000000000000000000000000000000..406126994e9ac71f4a43d6d182e72d88e4eaceed
Binary files /dev/null and b/assignment-1/submission/19210680053/img/train 2.png differ
diff --git a/assignment-1/submission/19210680053/img/train 3.png b/assignment-1/submission/19210680053/img/train 3.png
new file mode 100644
index 0000000000000000000000000000000000000000..761f9ee658095183c7c2a3925b6cbb9c51fde989
Binary files /dev/null and b/assignment-1/submission/19210680053/img/train 3.png differ
diff --git a/assignment-1/submission/19210680053/source.py b/assignment-1/submission/19210680053/source.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f5e2424b154e74548445bef39f513dee6b40c94
--- /dev/null
+++ b/assignment-1/submission/19210680053/source.py
@@ -0,0 +1,103 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
+class KNN():
+ def euclidean(self,v1,v2):
+ return np.sqrt(np.sum(np.square(v1 - v2)))
+ def fit(self, X_train, Y_train):
+ self.train_data = train_data
+ self.train_label = train_label
+ def predict(self, train_data,k):
+ predictions = []
+ for item in train_data:
+ label = self.closest(item,k)
+ predictions.append(label)
+ return predictions
+
+ def closest(self, item,k):
+ min_ind = 0
+ distlst=[]
+ idxlst=list(range(len(self.train_data)))
+ #get distance between test_data with train_data
+ for i in range(0,len(self.train_data)):
+ distlst.append(self.euclidean(item, self.train_data[i]))
+ #make up a dictionary with distance and index
+ distdict=dict(zip(idxlst,distlst))
+ distdict=dict(sorted(distdict.items(),key=lambda item:item[1]))
+ #get first K nearest position
+ min_ind=list(dict(list(distdict.items())[:k]).keys())
+ min_dist=[self.train_label[i] for i in min_ind]
+ return max(min_dist,key=min_dist.count)
+
+ def choose(self,test_data,test_label):
+ acclst=[]
+ for k in range(2,7):
+ res=self.predict(test_data,k)
+ acc=np.mean(np.equal(res, test_label))
+ acclst.append(acc)
+ max_acc=max(acclst)
+ max_k=acclst.index(max_acc)+2
+ return max_k,max_acc
+
+
+def generate():
+ mean = (20, 25)
+ cov = np.array([[10,2.1], [2.1, 12]])
+ x = np.random.multivariate_normal(mean, cov, (800,))
+
+ mean = (16, -5)
+ cov = np.array([[23, 0], [0, 22]])
+ y = np.random.multivariate_normal(mean, cov, (200,))
+
+ mean = (3, 5)
+ cov = np.array([[10,5],[5,10]])
+ z = np.random.multivariate_normal(mean, cov, (1000,))
+
+ idx = np.arange(2000)
+ np.random.shuffle(idx)
+ data = np.concatenate([x,y,z])
+ label = np.concatenate([
+ np.zeros((800,),dtype=int),
+ np.ones((200,),dtype=int),
+ np.ones((1000,),dtype=int)*2
+ ])
+ data = data[idx]
+ label = label[idx]
+
+ train_data, test_data = data[:1600,], data[1600:,]
+ train_label, test_label = label[:1600,], label[1600:,]
+ np.save("data.npy",((train_data, train_label), (test_data, test_label)
+ ))
+
+def display(data, label, name):
+ datas =[[],[],[]]
+ for i in range(len(data)):
+ datas[label[i]].append(data[i])
+
+ for each in datas:
+ each = np.array(each)
+ plt.scatter(each[:, 0], each[:, 1])
+ label=[str(i) for i in list(range(len(datas)))]
+ plt.legend(['label '+i for i in label])
+ plt.show()
+
+def read():
+ (train_data, train_label), (test_data, test_label) = np.load("data.npy",allow_pickle=True)
+ return (train_data, train_label), (test_data, test_label)
+
+
+if __name__ == "__main__":
+ mode=0
+ if mode == 0:
+ generate()
+ if mode == 1:
+ (train_data, train_label), (test_data, test_label) = read()
+ display(train_data, train_label, 'train')
+ display(test_data, test_label, 'test')
+ else:
+ (train_data, train_label), (test_data, test_label) = read()
+
+ model = KNN()
+ model.fit(train_data, train_label)
+ k ,acc = model.choose(test_data,test_label)
+ print("k=",k,"acc=",acc*100,"%")
\ No newline at end of file