From 4e040f8306b42270b8c386b7fa636817fcd7020f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=A2=81=E9=94=A6=E6=B6=9B?= <1971962997@qq.com> Date: Thu, 20 Jan 2022 06:37:34 +0000 Subject: [PATCH] =?UTF-8?q?add=20=E7=AC=AC=E4=B8=89=E6=AC=A1=E4=BD=9C?= =?UTF-8?q?=E4=B8=9A.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...0\211\346\254\241\344\275\234\344\270\232" | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 "\347\254\254\344\270\211\346\254\241\344\275\234\344\270\232" diff --git "a/\347\254\254\344\270\211\346\254\241\344\275\234\344\270\232" "b/\347\254\254\344\270\211\346\254\241\344\275\234\344\270\232" new file mode 100644 index 0000000..224128a --- /dev/null +++ "b/\347\254\254\344\270\211\346\254\241\344\275\234\344\270\232" @@ -0,0 +1,84 @@ +import pandas as pd +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + +def get_data(file): + df = pd.read_csv(file,header = None) + return df.values + +def get_Polar(data): + dist = np.sqrt(data[0]**2 + data[1]**2) + angle = np.arctan(data[1]/data[0]) + return [dist,angle] + +def get_distance(data,origin): + if(data == origin).all(): + return 0 + dist = np.sqrt((data[0]-origin[0])**2 + (data[1]-origin[1])**2) + return dist + +def kmeans(): + data = get_data("dataset_circles.csv") + + polar_data = list(map(get_Polar,data.tolist())) + polar_data = np.array(polar_data) + # print(polar_data) + # plt.scatter(polar_data[:,0], polar_data[:,1], s=None, c="b") + # # plt.scatter(cluster2[:,0], cluster2[:,1], s=None, c="r") + # plt.show() + + + data_x = polar_data[:,0] + data_y = polar_data[:,1] + + cluster_center1 = polar_data[np.random.randint(data_x.shape[0])] + cluster_center2 = polar_data[np.random.randint(data_y.shape[0])] + + cluster1_index = np.array([],dtype="int64") + cluster2_index = np.array([],dtype="int64") + + last_center1 = np.array([],dtype="float64") + last_center2 = np.array([],dtype="float64") + while True: + for i in range(data.shape[0]): + # print(origin) + dist1 = get_distance(polar_data[i],cluster_center1)#获取距离 + dist2 = get_distance(polar_data[i],cluster_center2) + if dist1 > dist2:#比距离,放到距离中心小的坐标系中 + cluster2_index = np.append(cluster2_index,i) + else: + cluster1_index = np.append(cluster1_index,i) + + last_center1 = cluster_center1 + last_center2 = cluster_center2 + + cluster1 = polar_data[cluster1_index.tolist()]#获取两类数据 + cluster2 = polar_data[cluster2_index.tolist()] + cluster_center1 = np.mean(cluster1[:,:2],axis=0)#求均值重新判断中心 + cluster_center2 = np.mean(cluster2[:,:2],axis=0) + + + if(cluster_center1 == last_center1).all() and (cluster_center2 == last_center2).all():##如果两次聚类没有发生变化就弹出 + break + # print("1: ",cluster_center1," ",last_center1) + # print("2: ",cluster_center2," ",last_center2) + # print(cluster1_index.shape[0]) + # print(cluster2_index.shape[0]) + + # print() + cluster1_index = np.array([],dtype="int64") + cluster2_index = np.array([],dtype="int64") + + # print(cluster1_index) + # print(cluster2_index) + plt.scatter(data[cluster1_index.tolist()][:,0], data[cluster1_index.tolist()][:,1], s=None, c="b") + plt.scatter(data[cluster2_index.tolist()][:,0], data[cluster2_index.tolist()][:,1], s=None, c="r") + plt.show() + + +def main(): + kmeans() + +if __name__ == "__main__": + main() \ No newline at end of file -- Gitee