diff --git a/assignment-1/submission/16307130040/README.md b/assignment-1/submission/16307130040/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c10dbc29cf23e901dd691ab9402d13c098c79950 --- /dev/null +++ b/assignment-1/submission/16307130040/README.md @@ -0,0 +1,80 @@ +# 实验报告1 + + + +### 1,KNN的实现 + +```python + def predict1(self,test_data,k): + predict=[] + for instance in test_data: + distances=np.array([self.distance (x,instance) for x in self.X]) + + kneighbors=np.argsort(np.array(distances))[:k] + count = Counter(self.y[kneighbors]) + predict.append(count.most_common()[0][0]) + return predict +``` + +将测试点最近的k个点列出,并找出其中出现最多的标签,作为预测的结果。 + +```python + def fit(self, train_data, train_label): + X_train,X_test,y_train,y_test=self.train_test_split(train_data, train_label) + self.X=np.array(X_train) + self.y=np.array(y_train) + max_accurate=0 + best_k=0 + for k in self.k_n: + accurate=0 + train_predict=self.predict1(X_test,k) + correct = np.count_nonzero((train_predict==y_test)==True) + + accurate=correct/len(X_test) + if (accurate>max_accurate): + max_accurate=accurate + best_k=k + self.k_select=best_k +``` + +k_n为[1,2,3,4,5],knn将输入的数据分为训练集和测试集,并从k_n中选择一个准确率最高的k值。 + +### 2,实验部分 + +```python +def generate(): + X1 = np.random.multivariate_normal([1,50], [[1,0],[0,10]], 100) + X2 = np.random.multivariate_normal([3,50], [[1,0],[0,10]], 100) + X3 = np.random.multivariate_normal([5,50], [[1,0],[0,10]], 100) + X = np.concatenate([X1,X2,X3]) + y = np.array([0]*100 + [1]*100 +[2]*100) + idx = np.arange(300) + np.random.shuffle(idx) + data=X=X[idx] + label=y=y[idx] + + X_train=X[:240] + X_test=X[240:] + y_train=y[:240] + y_test=y[240:] + return np.array(X_train),np.array(X_test),np.array(y_train),np.array(y_test) + +``` + +生成数据,将它们分为训练集和测试集。将训练集输入KNN,之后利用KNN预测测试集的标签。 + +这是训练集: + +![avatar](/img/train.png) + +这是测试集: + + + +​ ![avatar](/img/test.png) + +这是对测试集的预测: + +![avatar](/img/predict.png) + +这个预测的准确度为0.75. \ No newline at end of file diff --git a/assignment-1/submission/16307130040/img/predict.png b/assignment-1/submission/16307130040/img/predict.png new file mode 100644 index 0000000000000000000000000000000000000000..a853678b11d98ccc9a012b637a588a37723b42ff Binary files /dev/null and b/assignment-1/submission/16307130040/img/predict.png differ diff --git a/assignment-1/submission/16307130040/img/test.png b/assignment-1/submission/16307130040/img/test.png new file mode 100644 index 0000000000000000000000000000000000000000..d9d8b3f379704464fcf2c4bff575425ef3404c0f Binary files /dev/null and b/assignment-1/submission/16307130040/img/test.png differ diff --git a/assignment-1/submission/16307130040/img/train.png b/assignment-1/submission/16307130040/img/train.png new file mode 100644 index 0000000000000000000000000000000000000000..2d61295b21b4bf115c35c594ca716f564a79a00a Binary files /dev/null and b/assignment-1/submission/16307130040/img/train.png differ diff --git a/assignment-1/submission/16307130040/source.py b/assignment-1/submission/16307130040/source.py new file mode 100644 index 0000000000000000000000000000000000000000..397fbc05ad396f8bac4e0fee34426e6ee57d7f8f --- /dev/null +++ b/assignment-1/submission/16307130040/source.py @@ -0,0 +1,94 @@ +import numpy as np +from collections import Counter +import matplotlib.pyplot as plt + +class KNN: + k_n=[1,2,3,4,5] + k_select=0 + X=[] + y=[] + def __init__(self): + pass + + def train_test_split(self,X,y): + offset=int(len(X)*0.8) + X_train=X[:offset] + X_test=X[offset:] + y_train=y[:offset] + y_test=y[offset:] + + return np.array(X_train),np.array(X_test),np.array(y_train),np.array(y_test) + + def distance(self,instance1,instance2): + dist = np.sqrt(sum((instance1 - instance2)**2)) + return dist + + def predict1(self,test_data,k): + predict=[] + for instance in test_data: + distances=np.array([self.distance (x,instance) for x in self.X]) + + kneighbors=np.argsort(np.array(distances))[:k] + count = Counter(self.y[kneighbors]) + predict.append(count.most_common()[0][0]) + return predict + + def fit(self, train_data, train_label): + X_train,X_test,y_train,y_test=self.train_test_split(train_data, train_label) + self.X=np.array(X_train) + self.y=np.array(y_train) + max_accurate=0 + best_k=0 + for k in self.k_n: + accurate=0 + train_predict=self.predict1(X_test,k) + correct = np.count_nonzero((train_predict==y_test)==True) + + accurate=correct/len(X_test) + if (accurate>max_accurate): + max_accurate=accurate + best_k=k + self.k_select=best_k + + def predict(self, test_data): + return self.predict1(test_data,self.k_select) + +def generate(): + X1 = np.random.multivariate_normal([1,50], [[1,0],[0,10]], 100) + X2 = np.random.multivariate_normal([3,50], [[1,0],[0,10]], 100) + X3 = np.random.multivariate_normal([5,50], [[1,0],[0,10]], 100) + X = np.concatenate([X1,X2,X3]) + y = np.array([0]*100 + [1]*100 +[2]*100) + idx = np.arange(300) + np.random.shuffle(idx) + + data=X=X[idx] + label=y=y[idx] + + X_train=X[:240] + X_test=X[240:] + y_train=y[:240] + y_test=y[240:] + return np.array(X_train),np.array(X_test),np.array(y_train),np.array(y_test) + +def display(data, label, name): + datas =[[],[],[]] + colors=['b','r','y'] + for i in range(len(data)): + datas[label[i]].append(data[i]) + for i,each in enumerate(datas): + each = np.array(each) + plt.scatter(each[:, 0], each[:, 1],c=colors[i]) + plt.show() + +if __name__ == '__main__': + X_train,X_test,y_train,y_test=generate() + model=KNN() + model.fit(X_train,y_train) + predict=model.predict(X_test) + display(X_train,y_train,'train') + display(X_test,y_test,'test') + display(X_test,predict,'predict') + correct = np.count_nonzero((predict==y_test)) + accurate=correct/len(X_test) + print('accu=',accurate) \ No newline at end of file