-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathplot_kmeans_l1.py
More file actions
123 lines (86 loc) · 2.28 KB
/
plot_kmeans_l1.py
File metadata and controls
123 lines (86 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""
.. _l-kmeans-l1-example:
KMeans with norm L1
===================
This demonstrates how results change when using norm L1 for a k-means
algorithm.
"""
import matplotlib.pyplot as plt
import numpy
import numpy.random as rnd
from sklearn.cluster import KMeans
from mlinsights.mlmodel import KMeansL1L2
######################################################################
# Simple datasets
# ---------------
N = 1000
X = numpy.zeros((N * 2, 2), dtype=numpy.float64)
X[:N] = rnd.rand(N, 2)
X[N:] = rnd.rand(N, 2)
# X[N:, 0] += 0.75
X[N:, 1] += 1
X[: N // 10, 0] -= 2
X.shape
########################################
#
fig, ax = plt.subplots(1, 1)
ax.plot(X[:, 0], X[:, 1], ".")
ax.set_title("Two squares")
######################################################################
# Classic KMeans
# --------------
#
# It uses euclidean distance.
km = KMeans(2)
km.fit(X)
km.cluster_centers_
def plot_clusters(km_, X, ax):
lab = km_.predict(X)
for i in range(km_.cluster_centers_.shape[0]):
sub = X[lab == i]
ax.plot(sub[:, 0], sub[:, 1], ".", label="c=%d" % i)
C = km_.cluster_centers_
ax.plot(C[:, 0], C[:, 1], "o", ms=15, label="centers")
ax.legend()
fig, ax = plt.subplots(1, 1)
plot_clusters(km, X, ax)
ax.set_title("L2 KMeans")
######################################################################
# KMeans with L1 norm
# -------------------
kml1 = KMeansL1L2(2, norm="L1")
kml1.fit(X)
########################################
#
kml1.cluster_centers_
########################################
#
fig, ax = plt.subplots(1, 1)
plot_clusters(kml1, X, ax)
ax.set_title("L1 KMeans")
######################################################################
# When clusters are completely different
# --------------------------------------
N = 1000
X = numpy.zeros((N * 2, 2), dtype=numpy.float64)
X[:N] = rnd.rand(N, 2)
X[N:] = rnd.rand(N, 2)
# X[N:, 0] += 0.75
X[N:, 1] += 1
X[: N // 10, 0] -= 4
X.shape
########################################
#
km = KMeans(2)
km.fit(X)
########################################
#
kml1 = KMeansL1L2(2, norm="L1")
kml1.fit(X)
########################################
#
fig, ax = plt.subplots(1, 2, figsize=(10, 4))
plot_clusters(km, X, ax[0])
plot_clusters(kml1, X, ax[1])
ax[0].set_title("L2 KMeans")
ax[1].set_title("L1 KMeans")