diff --git a/assignment-3/handout/source.py b/assignment-3/handout/source.py
index 43aa5bb327e9a34a31b8c553af7cd9104acdd220..5cdb02368d31afb3cdf967cc26eec7b132e4b9f7 100644
--- a/assignment-3/handout/source.py
+++ b/assignment-3/handout/source.py
@@ -1,34 +1,37 @@
 import numpy as np
 
+
 class KMeans:
 
     def __init__(self, n_clusters):
         pass
-    
+
     def fit(self, train_data):
         pass
-    
+
     def predict(self, test_data):
         pass
 
+
 class GaussianMixture:
 
     def __init__(self, n_clusters):
         pass
-    
+
     def fit(self, train_data):
         pass
-    
+
     def predict(self, test_data):
         pass
 
+
 class ClusteringAlgorithm:
 
     def __init__(self):
         pass
-    
+
     def fit(self, train_data):
         pass
-    
+
     def predict(self, test_data):
-        pass
+        pass
\ No newline at end of file
diff --git a/assignment-3/handout/tester_demo.py b/assignment-3/handout/tester_demo.py
index 19ec0e8091691d4aaaa6b53dbb695fde9e826d89..a569dddc62de7f598dcc986179caedd842e44b77 100644
--- a/assignment-3/handout/tester_demo.py
+++ b/assignment-3/handout/tester_demo.py
@@ -114,4 +114,4 @@ if __name__ == "__main__":
     if len(sys.argv) > 1 and sys.argv[1] == "--report":
         test_all(True)
     else:
-        test_all()
+        test_all()
\ No newline at end of file
diff --git a/assignment-3/submission/19307110020/README.md b/assignment-3/submission/19307110020/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f708b655d91352c64baa33d2ba2ddb4fb84d70dd
--- /dev/null
+++ b/assignment-3/submission/19307110020/README.md
@@ -0,0 +1,210 @@
+# HW3-聚类算法
+
+## 19307110020-贺正夫
+
+[TOC]
+
+### 一、简介
+
+-   在本次作业中，实现了K-means++和使用EM算法完成无监督学习的高斯混合模型。
+-   实现了各个维度与各个数量级样本数量下的实验与可视化，验证了算法的正确性与对输入数据的鲁棒性。
+-   实现了自动寻找最优聚类数量的算法。
+
+### 二、模型说明
+
+在本作业中实现了两种聚类算法。其中Kmeans算法采用Kmeans++初始化，其余方法十分平凡，不赘述。
+
+GMM算法也十分平凡，但是在此处由于需要初始化协方差矩阵，想要尽可能精确地初始化并没有什么好的办法，有同学认为精确的协方差矩阵初始化不是必要的，我没有系统学过概率论，但是有总比没有好嘛。于是对每个维度单独min-max标准化，并采用单位阵做初始协方差矩阵，这样尽可能的忠实贴合原数据分布，不影响聚类结果，在可视化中将其复原。以下是EM算法的推导：
+
+#### Initialization：
+
+采用Kmeans++初始化K个高斯分布的均值，每个分布的prior是相同的。
+
+#### E_step：
+
+计算概率的时候，先按照每个类算后验，再按每个点归一化。
+
+#### M_step：
+
+先计算每一类的概率之和。
+
+
+$$
+n_k = \sum_{x \in C} P(x_k)
+$$
+第k类的均值更新公式为
+$$
+\mu_k = \sum_{x\in C}x *P(x_k) /n_k
+$$
+第k类的方差更新公式为:
+$$
+Cov_k=(X - \mu_k)^2 * P(x_k)
+$$
+其中的减法与乘法都是np中适用的。
+
+### 三、基础实验
+
+本文针对上述两种算法，做了如下实验：
+
+#### 实验1：基础
+
+基本实验信息：
+
+**聚类类别：3**
+
+| 原始数据信息 | Mean     | Covariance             |
+| ------------ | -------- | ---------------------- |
+| Cluster 1    | (1, 2)   | [[73, 0], [0, 22]]     |
+| Cluster 2    | (16, -5) | [[21.2, 0], [0, 32.1]] |
+| Cluster 3    | (10, 22) | [[10, 5], [5, 10]]     |
+
+| K-means聚类中心结果 | Kmeans聚类中心坐标 | Ground Truth |
+| ------------------- | ------------------ | ------------ |
+| Cluster 1           | [-2.7,  2.2]       | (1, 2)       |
+| Cluster 2           | [13.4, -2.0]       | (16, -5)     |
+| Cluster 3           | [10.0, 21.9]       | (10, 22)     |
+
+已经较好的完成了需求
+
+![Figure_1](img/Figure_1.png)
+
+
+
+| GMM聚类中心结果 | GMM聚类中心坐标 | Ground Truth |
+| --------------- | --------------- | ------------ |
+| Cluster 1       | [1.0  1.9]      | (1, 2)       |
+| Cluster 2       | [15.7, -6.0]    | (16, -5)     |
+| Cluster 3       | [9.9, 22.0]     | (10, 22)     |
+
+| GMM聚类方差结果 | GMM聚类方差                  | Ground Truth           |
+| --------------- | ---------------------------- | ---------------------- |
+| Cluster 1       | [[66.0, 3.9], [3.9, 21.9]]   | [[73, 0], [0, 22]]     |
+| Cluster 2       | [[28.9, -4.6], [-4.6, 34.4]] | [[21.2, 0], [0, 32.1]] |
+| Cluster 3       | [[10.1,  4.3], [4.3, 9.3]]   | [[10, 5], [5, 10]]     |
+
+显而易见的是，GMM得到了更准确的聚类中心。方差一项无从比较，但是也十分贴合原有的方差分布。
+
+![Figure_1](img/Figure_2.png)
+
+#### 实验2：更多类，更崎岖的高斯分布
+
+基本实验信息：
+
+**聚类类别：5**
+
+| 原始数据信息 | Mean       | Covariance             |
+| ------------ | ---------- | ---------------------- |
+| Cluster 1    | (1, 2)     | [[73, 0], [0, 22]]     |
+| Cluster 2    | (16, -5)   | [[21.2, 0], [0, 32.1]] |
+| Cluster 3    | (10, 22)   | [[10, 5], [5, 10]]     |
+| Cluster 4    | (-2, 18)   | [[10, 5], [5, 10]]     |
+| Cluster 5    | (-15, -10) | [[22, 0], [0, 88]]     |
+
+| K-means聚类中心结果 | Kmeans聚类中心坐标 | Ground Truth | 备注                     |
+| ------------------- | ------------------ | ------------ | ------------------------ |
+| Cluster 1           | [-9.75 , 0.0]      | (1, 2)       | 这类几乎不能找到对应的GT |
+| Cluster 2           | [10.3, -1.3]       | (16, -5)     | 这类几乎不能找到对应的GT |
+| Cluster 3           | [ 9.9, 22.0]       | (10, 22)     |                          |
+| Cluster 4           | [-1.7, 15.7]       | (-2, 18)     |                          |
+| Cluster 5           | [-15.4, -15.7]     | (-15, -10)   |                          |
+
+此时KMeans表现并不好，原因很显然：数据的分布十分不规整，正态分布形状各异，单纯的靠距离最近的算法已经很难找到合适的中心。这是因为采样是概率的，但是距离最近不是概然的。这是由于其默认每个分布的协方差矩阵总是单位阵的倍数的原因。因此在采样密度较小的区域，采样对应的高斯分布将完全失去对这些区域的预测能力，相反地，距离更近的聚类中心会获得这些点，故产生了许多错误预测。
+
+从下图中可见，每个聚类的高斯轮廓都接近圆形，这是Kmeans在处理形状各异的高斯分布时很大的缺陷。
+
+![Figure_4](img/Figure_4.png)
+
+| GMM聚类中心结果 | GMM聚类中心坐标 | Ground Truth |
+| --------------- | :-------------- | ------------ |
+| Cluster 1       | [ -2.1  18.0]   | (-2, 18)     |
+| Cluster 2       | [ -1.0   1.8]   | (1, 2)       |
+| Cluster 3       | [ 15.4  -3.4]   | (16, -5)     |
+| Cluster 4       | [ 10.0  22.1]   | (10, 22)     |
+| Cluster 5       | [-15.0 -11.5]   | (-15, -10)   |
+
+这一组数据相当精确。在寻找对应的聚类中心时几乎没有任何困难。
+
+| GMM聚类方差结果 | GMM聚类方差                 | Ground Truth           |
+| --------------- | :-------------------------- | ---------------------- |
+| Cluster 1       | [[ 8.5  4.6], [ 4.6 10.8]]  | [[10, 5], [5, 10]]     |
+| Cluster 2       | [[68.7 4.9], [ 4.9 27.5]]   | [[73, 0], [0, 22]]     |
+| Cluster 3       | [[21.8 -2.9 ], [-2.9 33.8]] | [[21.2, 0], [0, 32.1]] |
+| Cluster 4       | [[11.3  5.4], [5.4 9.8]]    | [[10, 5], [5, 10]]     |
+| Cluster 5       | [[21.7  0.2], [ 0.2 75.6]]] | [[22, 0], [0, 88]]     |
+
+这组数据也极其精确。可以认为本算法在二维上的分类具有极其精确的聚类效果，即使不同隐变量对应的高斯分布交叠较多，也可以在很高的精度下完成聚类任务。
+
+![Figure_3](img/Figure_3.png)
+
+#### 实验3：极少数据下的鲁棒性
+
+为方便可视化，在本实验中采用二维数据。在更高维数据上均可以达到一样的效果，如tester中的样例2。
+
+**聚类类别：2**
+
+仅输入三个点。两个较近一个较远。此时不需要统计学数据来表达算法的结果，直接看如下图片即可：
+
+![Figure_5](img/Figure_5.png)
+
+![Figure_6](img/Figure_6.png)
+
+此时红色聚类对应的高斯分布并没有在图上显示出来！因为它尺度坍缩到了1e-150数量级。对此我不知道如何解释，但是可以正确聚类。
+
+#### 实验4：忽略离群点
+
+在有些数据中，本可以明显分成K类，但是由于数据并不总是干净的，可能出现一些离群点，本部分试图探索算法能否忽略它们。
+
+主体数据采用实验1中的数据，手动加入离群点(100, 100), (100, -100), (-100, 100), (-100, -100)。结果如下：
+
+
+
+![Figure_7](img/Figure_7.png)
+
+![Figure_8](img/Figure_8.png)
+
+显然，四个点就可以让这两个聚类算法败的很惨。其中GMM的图是蓝色的是因为红色聚类对应的正态分布尺度太大了，已经可以覆盖到3000-4000坐标。这显然不是我们期待的。
+
+这和采用的Kmeans++初始化脱不了干系。于是本作业中提出的Kmeans++++应运而生。
+
+### 四、Kmeans++++
+
+在Kmeans++逐一选取剩余K-1个聚类中心时，如果有明显比距离均值高出`threshold`倍的点，那么其就不会成为可被选取的中心点，其距离（即之后被采样的概率）被置为0。代码如下：
+
+```python
+if self.quadra_plus_threshold is not None:
+    for i in range(distances.shape[0]):
+        if distances[i] > self.quadra_plus_threshold * distances.mean():
+            distances[i] = 0.
+```
+
+相比于Kmeans++的效果，Kmeans++++表现如下：
+
+![Figure_9](img/Figure_9.png)
+
+其不受离群点影响，回到了正常的聚类情况。
+
+但是对于GMM来说，这一改进并没有带来太大的帮助，这是因为其在Maximization步继续会受到离群点的影响。思索良久也没有找到解决办法，似乎只能做预先的离群点处理，限于时间，遂作罢。
+
+### 五、自动寻找K值
+
+Elbow method是一个经验方法，而且肉眼观察也因人而异，特别是遇到模棱两可的时候。因此本文采用Stanford University于2000年提出的Gap Statistics方法。原理如下：
+
+
+$$
+Gap(K)=E(logD_k)−logD_k \\
+where\ \ D_k  =\sum_{i=1}^K\sum_{x\in C_i}||x-M_i||
+$$
+其中$M_i$表示第$i$个聚类中心点，这和Elbow Method采用的定义相同。
+
+直观来讲，Elbow Method希望找到使$D_k$下降最快的k，但究竟什么是最快需要自己判断，在Gap Statistics中，引入了一个相对均匀变化的量，随着K的增加，其可以作为$D_k$减少的参照，因此gap就衡量了其成为elbow的权重。取argmax即可找到合适的K。
+
+具体实现详见代码与注释。
+
+以下是对一简单样例的运行结果：![Figure_10](img/Figure_10.png)
+
+以下是对难度较高的样例1的运行结果：![Figure_11](img/Figure_11.png)
+
+### 六、感想
+
+最后一次作业了。感觉自己学到了很多发现问题解决问题的能力，但是代码水平还是不够看，最后一个实验跑的时间很久，差不多可以训练一个小模型了...以及自己的概率论水平也不太够。希望多多锻炼。
+
diff --git a/assignment-3/submission/19307110020/img/Figure_1.png b/assignment-3/submission/19307110020/img/Figure_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..43f21a7ef70ede47b53b4494e1a238902c7bb899
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_1.png differ
diff --git a/assignment-3/submission/19307110020/img/Figure_10.png b/assignment-3/submission/19307110020/img/Figure_10.png
new file mode 100644
index 0000000000000000000000000000000000000000..1779c81a8ab7b6cf3f5d9a25fb369dece13052cd
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_10.png differ
diff --git a/assignment-3/submission/19307110020/img/Figure_11.png b/assignment-3/submission/19307110020/img/Figure_11.png
new file mode 100644
index 0000000000000000000000000000000000000000..6d8d181b72d6a136562b629e75de7532f816b0bd
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_11.png differ
diff --git a/assignment-3/submission/19307110020/img/Figure_2.png b/assignment-3/submission/19307110020/img/Figure_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..2646c4a49a909fabb1abae549a2e770cf983b2a6
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_2.png differ
diff --git a/assignment-3/submission/19307110020/img/Figure_3.png b/assignment-3/submission/19307110020/img/Figure_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..38ea0a5f1655358633cb801b09e276651e536bb9
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_3.png differ
diff --git a/assignment-3/submission/19307110020/img/Figure_4.png b/assignment-3/submission/19307110020/img/Figure_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..87ec588d33c78305784e7996f13c352ec73de5ec
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_4.png differ
diff --git a/assignment-3/submission/19307110020/img/Figure_5.png b/assignment-3/submission/19307110020/img/Figure_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..0a7643a449a8f19f28d367a6ea50b28d9c646eb5
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_5.png differ
diff --git a/assignment-3/submission/19307110020/img/Figure_6.png b/assignment-3/submission/19307110020/img/Figure_6.png
new file mode 100644
index 0000000000000000000000000000000000000000..7376955f3755f3f8c414702644ce7eab9f8db269
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_6.png differ
diff --git a/assignment-3/submission/19307110020/img/Figure_7.png b/assignment-3/submission/19307110020/img/Figure_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a10faaddcb477cbc78ff4bff89da21bbd326116
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_7.png differ
diff --git a/assignment-3/submission/19307110020/img/Figure_8.png b/assignment-3/submission/19307110020/img/Figure_8.png
new file mode 100644
index 0000000000000000000000000000000000000000..375c2122e3ac1bca915337b4ec8a51a5866c1bfb
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_8.png differ
diff --git a/assignment-3/submission/19307110020/img/Figure_9.png b/assignment-3/submission/19307110020/img/Figure_9.png
new file mode 100644
index 0000000000000000000000000000000000000000..4bbe40d2ec4322239b17fa51c924797a4a70bd3e
Binary files /dev/null and b/assignment-3/submission/19307110020/img/Figure_9.png differ
diff --git a/assignment-3/submission/19307110020/source.py b/assignment-3/submission/19307110020/source.py
new file mode 100644
index 0000000000000000000000000000000000000000..79f46e7e5e3717ce07837a661b57b3592b4d10fe
--- /dev/null
+++ b/assignment-3/submission/19307110020/source.py
@@ -0,0 +1,201 @@
+import numpy as np
+
+
+def calculate_distance(x1, x2):
+    return np.mean(np.sqrt((x1 - x2) ** 2))
+
+
+class KMeans:
+    def __init__(self, n_cluster, epochs=10, quadra_plus_threshold=10):  # class initialization
+        self.n_cluster = n_cluster
+        self.epochs = epochs
+        self.centers = []
+        self.quadra_plus_threshold = quadra_plus_threshold
+
+    def fit(self, train_data):
+        def find_nearest_centroid(point):
+            """
+            find the nearest class center and returns the distance.
+            :param point: query point
+            :return: minimum distance to every current center.
+            """
+            res = calculate_distance(point, centroid_list[0])
+            for _ in range(1, len(centroid_list)):
+                res = min(res, calculate_distance(point, centroid_list[_]))
+            return res
+
+        # randomly initialize the first centroid.
+        points_list = list(train_data)
+        centroid_list = []
+        index = np.random.randint(0, len(points_list) - 1)
+        centroid_list.append(points_list[index])
+        del points_list[index]
+
+        # randomly choose the rest cluster centers.
+        # the sampling weight of each point is the minimum distance to every current center.
+        for _ in range(self.n_cluster - 1):
+            distances = [find_nearest_centroid(__) for __ in points_list]
+            distances = np.array([x ** 2 for x in distances])
+            tmpmean = distances.mean()
+            if self.quadra_plus_threshold is not None:  # filter outsiders!
+                for i in range(distances.shape[0]):
+                    if distances[i] > self.quadra_plus_threshold * tmpmean:
+                        distances[i] = 0.
+            weighted_list = distances / np.sum(distances)
+            chosen_index = np.random.choice(np.arange(len(distances)), p=weighted_list)
+            centroid_list.append(points_list[chosen_index])
+            del points_list[chosen_index]
+            self.centers = centroid_list
+
+        # Ordinary KMeans iterative steps.
+        # Predict the cluster of each point and update class centers with the mean of points assigned to this cluster.
+        for epoch in range(self.epochs):
+            self.predict_cluster = self.predict(train_data)
+            for ct in range(len(self.centers)):
+                idx, = np.where(self.predict_cluster == ct)
+                samples = train_data[idx, :]
+                self.centers[ct] = np.mean(samples, axis=0)
+
+        return self
+
+    def predict(self, test_data):
+        """
+        Predict the cluster of each point according to its nearest centroid.
+        :return: predicted cluster labels
+        """
+        predict_cluster = np.zeros(shape=(len(test_data),))
+        for n, arr in enumerate(test_data):
+            predict_cluster[n] = np.argmin([calculate_distance(arr, _) for _ in self.centers])  # minimum distance
+        return predict_cluster
+
+
+def scale_data(data):
+    """
+    standardization of input data
+    :param data: np.array
+    :return: min-max standardized data
+    """
+    for i in range(data.shape[1]):
+        max_ = data[:, i].max()
+        min_ = data[:, i].min()
+        data[:, i] = (data[:, i] - min_) / (max_ - min_)
+    return data
+
+
+def normal(x, mean, cov):
+    """
+    :param x: query data
+    :param mean: mean of the given normal distribution
+    :param cov: covariance matrix of the given normal distribution
+    :return: pdf value
+    """
+    s, u = np.linalg.eigh(cov)
+    t = s.dtype.char.lower()
+    eps = 1E6 * np.finfo(t).eps * np.max(abs(s))
+    s_pinv = np.array([0 if abs(x) <= eps else min(1/x, 1e8) for x in s], dtype=float)
+    d = s[s > eps]
+    maha = np.sum(np.square(np.dot((x - mean), np.multiply(u, np.sqrt(s_pinv)))), axis=-1)
+    return np.exp(-0.5 * (len(d) * np.log(2 * np.pi) + np.sum(np.log(d)) + maha))
+
+
+class GaussianMixture:
+    def __init__(self, n_cluster, epochs=200):
+        self.n_cluster = n_cluster
+        self.p = np.random.rand(self.n_cluster)
+        self.p = self.p / self.p.sum()      # all p's sum up to 1
+        self.epochs = epochs
+
+    def fit(self, train_data):
+        self.train_data = scale_data(train_data)
+        self.N, self.D = train_data.shape
+        self.means = np.array(KMeans(self.n_cluster).fit(train_data).centers)  # K-means++ initialization
+        self.covs = np.array([np.eye(self.D)] * self.n_cluster)
+        self.alpha = np.ones(self.n_cluster) / self.n_cluster  # same prior
+        for epoch in range(self.epochs):  # iterative optimization
+            self.e_step()
+            self.m_step()
+        return self
+
+    def e_step(self):
+        """
+        ordinary expectation step
+        """
+        self.gamma = np.zeros((self.N, self.n_cluster))
+        for k in range(self.n_cluster):
+            self.gamma[:, k] = self.alpha[k] * normal(self.train_data, self.means[k], self.covs[k])
+        for i in range(self.N):
+            self.gamma[i, :] /= np.sum(self.gamma[i, :])
+
+    def m_step(self):
+        """
+        ordinary maximization step
+        """
+        for k in range(self.n_cluster):
+            nk = np.sum(self.gamma[:, k])
+            self.means[k, :] = np.sum(self.train_data * self.gamma[:, k].reshape(-1, 1), axis=0) / nk
+            bias = self.train_data - self.means[k]
+            self.covs[k] = np.matmul(bias.T, (bias * self.gamma[:, k].reshape(-1, 1))) / nk
+            self.alpha[k] = nk / self.N
+
+    def predict(self, test_data):
+        """
+        :return: the predicted labels given priors and distribution
+        """
+        test_data = scale_data(test_data)
+        density = np.empty((test_data.shape[0], self.n_cluster))
+        for i in range(self.n_cluster):
+            density[:, i] = normal(test_data, self.means[i], self.covs[i])
+        posterior = density * self.p
+        predicted = np.argmax(posterior, axis=1)
+        return predicted
+
+
+class ClusteringAlgorithm:
+    def __init__(self, candidate_max=10, it=10):
+        self.it = it
+        self.candidate_list = range(2, candidate_max)  # candidate_max must be larger than 2
+
+    def fit(self, train_data):
+        def generate_uniform_points(data):
+            """
+            :return: a set of points uniformly sampled from the bounding box (can be rectangle or hyper-cubes)
+            """
+            mins = np.argmin(data, axis=0)
+            maxs = np.argmax(data, axis=0)
+            reference_data_set = np.zeros((data.shape[0], data.shape[1]))
+            for i in range(data.shape[0]):
+                for j in range(data.shape[1]):
+                    reference_data_set[i][j] = np.random.uniform(data[mins[j]][j], data[maxs[j]][j])
+            return reference_data_set
+
+        def dispersion(data, k):
+            """
+            :return: expectation of the logD_k, the first item in the gap statistics function
+            """
+            model = KMeans(k).fit(data)
+            distances_from_mean = np.arange(k)
+            for i in range(k):
+                for idx, label in enumerate(model.predict_cluster):
+                    if i == label:
+                        distances_from_mean[i] += np.sum((data[idx] - model.centers[i]) ** 2)
+            return np.log(np.sum(distances_from_mean))
+
+        def gap_statistic(data, nthcluster):
+            actual_dispersion = dispersion(data, nthcluster)
+            # ref_dispersion is the dispersion of the given data, sum of distances between all points and their centroid
+            ref_dispersion = np.mean([dispersion(generate_uniform_points(data), nthcluster) for _ in range(self.it)])
+            return actual_dispersion, ref_dispersion
+
+        # the following code block computes the optimum K
+        dispersion_values = np.zeros((len(self.candidate_list), 2))
+        for i, cluster in enumerate(self.candidate_list):
+            dispersion_values[i] = gap_statistic(train_data, cluster)
+        gaps = dispersion_values[:, 1] - dispersion_values[:, 0]
+        n_clusters = np.argmax(gaps) + 2  # plus 2 because K starts from 2
+
+        # ordinary fit and predict given n_clusters
+        self.model = KMeans(n_clusters)
+        return self.model.fit(train_data)
+
+    def predict(self, test_data):
+        return self.model.predict(test_data)
\ No newline at end of file