mlinsights/_doc/examples/plot_piecewise_linear_regression.py at 37347dcbfe5eb26abf87a5a42f2b73a78bccefe9 · sdpython/mlinsights · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""
Piecewise linear regression with scikit-learn predictors
========================================================

The notebook illustrates an implementation of a piecewise linear
regression based on
`scikit-learn <https://scikit-learn.org/stable/index.html>`_. The
bucketization can be done with a
`DecisionTreeRegressor <https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html>`_
or a
`KBinsDiscretizer <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.KBinsDiscretizer.html>`_.
A linear model is then fitted on each bucket.

Piecewise data
--------------

Let's build a toy problem based on two linear models.
"""

import numpy
import numpy.random as npr
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.dummy import DummyRegressor
from mlinsights.mlmodel import PiecewiseRegressor

X = npr.normal(size=(1000, 4))
alpha = [4, -2]
t = (X[:, 0] + X[:, 3] * 0.5) > 0
switch = numpy.zeros(X.shape[0])
switch[t] = 1
y = alpha[0] * X[:, 0] * t + alpha[1] * X[:, 0] * (1 - t) + X[:, 2]

########################################
#


fig, ax = plt.subplots(1, 1)
ax.plot(X[:, 0], y, ".")
ax.set_title("Piecewise examples")


######################################################################
# Piecewise Linear Regression with a decision tree
# ------------------------------------------------
#
# The first example is done with a decision tree.


X_train, X_test, y_train, y_test = train_test_split(X[:, :1], y)

########################################
#


model = PiecewiseRegressor(
    verbose=True, binner=DecisionTreeRegressor(min_samples_leaf=300)
)
model.fit(X_train, y_train)

########################################
#


pred = model.predict(X_test)
pred[:5]

########################################
#


fig, ax = plt.subplots(1, 1)
ax.plot(X_test[:, 0], y_test, ".", label="data")
ax.plot(X_test[:, 0], pred, ".", label="predictions")
ax.set_title("Piecewise Linear Regression\n2 buckets")
ax.legend()


######################################################################
# The method *transform_bins* returns the bucket of each variables, the
# final leave from the tree.


model.transform_bins(X_test)


######################################################################
# Let's try with more buckets.


model = PiecewiseRegressor(
    verbose=False, binner=DecisionTreeRegressor(min_samples_leaf=150)
)
model.fit(X_train, y_train)

########################################
#


fig, ax = plt.subplots(1, 1)
ax.plot(X_test[:, 0], y_test, ".", label="data")
ax.plot(X_test[:, 0], model.predict(X_test), ".", label="predictions")
ax.set_title("Piecewise Linear Regression\n4 buckets")
ax.legend()


######################################################################
# Piecewise Linear Regression with a KBinsDiscretizer
# ---------------------------------------------------


model = PiecewiseRegressor(verbose=True, binner=KBinsDiscretizer(n_bins=2))
model.fit(X_train, y_train)

########################################
#


fig, ax = plt.subplots(1, 1)
ax.plot(X_test[:, 0], y_test, ".", label="data")
ax.plot(X_test[:, 0], model.predict(X_test), ".", label="predictions")
ax.set_title("Piecewise Linear Regression\n2 buckets")
ax.legend()

########################################
#


model = PiecewiseRegressor(verbose=True, binner=KBinsDiscretizer(n_bins=4))
model.fit(X_train, y_train)

########################################
#


fig, ax = plt.subplots(1, 1)
ax.plot(X_test[:, 0], y_test, ".", label="data")
ax.plot(X_test[:, 0], model.predict(X_test), ".", label="predictions")
ax.set_title("Piecewise Linear Regression\n4 buckets")
ax.legend()


######################################################################
# The model does not enforce continuity despite the fast it looks like so.
# Let's compare with a constant on each bucket.


model = PiecewiseRegressor(
    verbose="tqdm", binner=KBinsDiscretizer(n_bins=4), estimator=DummyRegressor()
)
model.fit(X_train, y_train)

########################################
#


fig, ax = plt.subplots(1, 1)
ax.plot(X_test[:, 0], y_test, ".", label="data")
ax.plot(X_test[:, 0], model.predict(X_test), ".", label="predictions")
ax.set_title("Piecewise Constants\n4 buckets")
ax.legend()


######################################################################
# Next
# ----

# PR `Model trees (M5P and
# co) <https://github.com/scikit-learn/scikit-learn/issues/13106>`_ and
# issue `Model trees
# (M5P) <https://github.com/scikit-learn/scikit-learn/pull/13732>`_
# propose an implementation a piecewise regression with any kind of
# regression model. It is based on `Building Model
# Trees <https://github.com/ankonzoid/LearningX/tree/master/advanced_ML/model_tree%3E>`_.
# It fits many models to find the best splits.