Skip to content

Commit 1ef78ea

Browse files
committed
adjusting model hparam description
1 parent dad9a12 commit 1ef78ea

7 files changed

Lines changed: 287 additions & 0 deletions

File tree

mambular/models/fttransformer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ class FTTransformerRegressor(SklearnBaseRegressor):
2020
Learning rate for the optimizer.
2121
lr_patience : int, default=10
2222
Number of epochs with no improvement after which learning rate will be reduced.
23+
family : str, default=None
24+
Distributional family to be used for the model.
2325
weight_decay : float, default=1e-06
2426
Weight decay (L2 penalty) for the optimizer.
2527
lr_factor : float, default=0.1

mambular/models/mambatab.py

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,290 @@
66

77

88
class MambaTabRegressor(SklearnBaseRegressor):
9+
"""
10+
MambaTab regressor. This class extends the SklearnBaseRegressor class and uses the MambaTab model
11+
with the default MambaTab configuration.
12+
13+
The accepted arguments to the MambaTabRegressor class include both the attributes in the DefaultMambaTabConfig dataclass
14+
and the parameters for the Preprocessor class.
15+
16+
Parameters
17+
----------
18+
lr : float, default=1e-04
19+
Learning rate for the optimizer.
20+
lr_patience : int, default=10
21+
Number of epochs with no improvement after which learning rate will be reduced.
22+
weight_decay : float, default=1e-06
23+
Weight decay (L2 penalty) for the optimizer.
24+
lr_factor : float, default=0.1
25+
Factor by which the learning rate will be reduced.
26+
d_model : int, default=64
27+
Dimensionality of the model.
28+
n_layers : int, default=8
29+
Number of layers in the model.
30+
expand_factor : int, default=2
31+
Expansion factor for the feed-forward layers.
32+
bias : bool, default=False
33+
Whether to use bias in the linear layers.
34+
d_conv : int, default=16
35+
Dimensionality of the convolutional layers.
36+
conv_bias : bool, default=True
37+
Whether to use bias in the convolutional layers.
38+
dropout : float, default=0.05
39+
Dropout rate for regularization.
40+
dt_rank : str, default="auto"
41+
Rank of the decision tree.
42+
d_state : int, default=32
43+
Dimensionality of the state in recurrent layers.
44+
dt_scale : float, default=1.0
45+
Scaling factor for decision tree.
46+
dt_init : str, default="random"
47+
Initialization method for decision tree.
48+
dt_max : float, default=0.1
49+
Maximum value for decision tree initialization.
50+
dt_min : float, default=1e-04
51+
Minimum value for decision tree initialization.
52+
dt_init_floor : float, default=1e-04
53+
Floor value for decision tree initialization.
54+
norm : str, default="RMSNorm"
55+
Normalization method to be used.
56+
activation : callable, default=nn.SELU()
57+
Activation function for the model.
58+
num_embedding_activation : callable, default=nn.Identity()
59+
Activation function for numerical embeddings.
60+
head_layer_sizes : list, default=(128, 64, 32)
61+
Sizes of the layers in the head of the model.
62+
head_dropout : float, default=0.5
63+
Dropout rate for the head layers.
64+
head_skip_layers : bool, default=False
65+
Whether to skip layers in the head.
66+
head_activation : callable, default=nn.SELU()
67+
Activation function for the head layers.
68+
head_use_batch_norm : bool, default=False
69+
Whether to use batch normalization in the head layers.
70+
norm : str, default="LayerNorm"
71+
Normalization method to be used.
72+
axis : int, default=1
73+
Axis over which Mamba iterates. If 1, it iterates over the rows; if 0, it iterates over the columns.
74+
n_bins : int, default=50
75+
The number of bins to use for numerical feature binning. This parameter is relevant
76+
only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
77+
numerical_preprocessing : str, default="ple"
78+
The preprocessing strategy for numerical features. Valid options are
79+
'binning', 'one_hot', 'standardization', and 'normalization'.
80+
use_decision_tree_bins : bool, default=False
81+
If True, uses decision tree regression/classification to determine
82+
optimal bin edges for numerical feature binning. This parameter is
83+
relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
84+
binning_strategy : str, default="uniform"
85+
Defines the strategy for binning numerical features. Options include 'uniform',
86+
'quantile', or other sklearn-compatible strategies.
87+
cat_cutoff : float or int, default=0.03
88+
Indicates the cutoff after which integer values are treated as categorical.
89+
If float, it's treated as a percentage. If int, it's the maximum number of
90+
unique values for a column to be considered categorical.
91+
treat_all_integers_as_numerical : bool, default=False
92+
If True, all integer columns will be treated as numerical, regardless
93+
of their unique value count or proportion.
94+
degree : int, default=3
95+
The degree of the polynomial features to be used in preprocessing.
96+
knots : int, default=12
97+
The number of knots to be used in spline transformations.
98+
"""
99+
9100
def __init__(self, **kwargs):
10101
super().__init__(model=MambaTab, config=DefaultMambaTabConfig, **kwargs)
11102

12103

13104
class MambaTabClassifier(SklearnBaseClassifier):
105+
"""
106+
MambaTab Classifier. This class extends the SklearnBaseClassifier class and uses the MambaTab model
107+
with the default MambaTab configuration.
108+
109+
The accepted arguments to the MambaTabClassifier class include both the attributes in the DefaultMambaTabConfig dataclass
110+
and the parameters for the Preprocessor class.
111+
112+
Parameters
113+
----------
114+
lr : float, default=1e-04
115+
Learning rate for the optimizer.
116+
lr_patience : int, default=10
117+
Number of epochs with no improvement after which learning rate will be reduced.
118+
weight_decay : float, default=1e-06
119+
Weight decay (L2 penalty) for the optimizer.
120+
lr_factor : float, default=0.1
121+
Factor by which the learning rate will be reduced.
122+
d_model : int, default=64
123+
Dimensionality of the model.
124+
n_layers : int, default=8
125+
Number of layers in the model.
126+
expand_factor : int, default=2
127+
Expansion factor for the feed-forward layers.
128+
bias : bool, default=False
129+
Whether to use bias in the linear layers.
130+
d_conv : int, default=16
131+
Dimensionality of the convolutional layers.
132+
conv_bias : bool, default=True
133+
Whether to use bias in the convolutional layers.
134+
dropout : float, default=0.05
135+
Dropout rate for regularization.
136+
dt_rank : str, default="auto"
137+
Rank of the decision tree.
138+
d_state : int, default=32
139+
Dimensionality of the state in recurrent layers.
140+
dt_scale : float, default=1.0
141+
Scaling factor for decision tree.
142+
dt_init : str, default="random"
143+
Initialization method for decision tree.
144+
dt_max : float, default=0.1
145+
Maximum value for decision tree initialization.
146+
dt_min : float, default=1e-04
147+
Minimum value for decision tree initialization.
148+
dt_init_floor : float, default=1e-04
149+
Floor value for decision tree initialization.
150+
norm : str, default="RMSNorm"
151+
Normalization method to be used.
152+
activation : callable, default=nn.SELU()
153+
Activation function for the model.
154+
num_embedding_activation : callable, default=nn.Identity()
155+
Activation function for numerical embeddings.
156+
head_layer_sizes : list, default=(128, 64, 32)
157+
Sizes of the layers in the head of the model.
158+
head_dropout : float, default=0.5
159+
Dropout rate for the head layers.
160+
head_skip_layers : bool, default=False
161+
Whether to skip layers in the head.
162+
head_activation : callable, default=nn.SELU()
163+
Activation function for the head layers.
164+
head_use_batch_norm : bool, default=False
165+
Whether to use batch normalization in the head layers.
166+
norm : str, default="LayerNorm"
167+
Normalization method to be used.
168+
axis : int, default=1
169+
Axis over which Mamba iterates. If 1, it iterates over the rows; if 0, it iterates over the columns.
170+
n_bins : int, default=50
171+
The number of bins to use for numerical feature binning. This parameter is relevant
172+
only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
173+
numerical_preprocessing : str, default="ple"
174+
The preprocessing strategy for numerical features. Valid options are
175+
'binning', 'one_hot', 'standardization', and 'normalization'.
176+
use_decision_tree_bins : bool, default=False
177+
If True, uses decision tree regression/classification to determine
178+
optimal bin edges for numerical feature binning. This parameter is
179+
relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
180+
binning_strategy : str, default="uniform"
181+
Defines the strategy for binning numerical features. Options include 'uniform',
182+
'quantile', or other sklearn-compatible strategies.
183+
cat_cutoff : float or int, default=0.03
184+
Indicates the cutoff after which integer values are treated as categorical.
185+
If float, it's treated as a percentage. If int, it's the maximum number of
186+
unique values for a column to be considered categorical.
187+
treat_all_integers_as_numerical : bool, default=False
188+
If True, all integer columns will be treated as numerical, regardless
189+
of their unique value count or proportion.
190+
degree : int, default=3
191+
The degree of the polynomial features to be used in preprocessing.
192+
knots : int, default=12
193+
The number of knots to be used in spline transformations.
194+
"""
195+
14196
def __init__(self, **kwargs):
15197
super().__init__(model=MambaTab, config=DefaultMambaTabConfig, **kwargs)
16198

17199

18200
class MambaTabLSS(SklearnBaseLSS):
201+
"""
202+
MambaTab for distributinoal regression. This class extends the SklearnBaseLSS class and uses the MambaTab model
203+
with the default MambaTab configuration.
204+
205+
The accepted arguments to the MambaTabLSS class include both the attributes in the DefaultMambaTabConfig dataclass
206+
and the parameters for the Preprocessor class.
207+
208+
Parameters
209+
----------
210+
lr : float, default=1e-04
211+
Learning rate for the optimizer.
212+
lr_patience : int, default=10
213+
Number of epochs with no improvement after which learning rate will be reduced.
214+
family : str, default=None
215+
Distributional family to be used for the model.
216+
weight_decay : float, default=1e-06
217+
Weight decay (L2 penalty) for the optimizer.
218+
lr_factor : float, default=0.1
219+
Factor by which the learning rate will be reduced.
220+
d_model : int, default=64
221+
Dimensionality of the model.
222+
n_layers : int, default=8
223+
Number of layers in the model.
224+
expand_factor : int, default=2
225+
Expansion factor for the feed-forward layers.
226+
bias : bool, default=False
227+
Whether to use bias in the linear layers.
228+
d_conv : int, default=16
229+
Dimensionality of the convolutional layers.
230+
conv_bias : bool, default=True
231+
Whether to use bias in the convolutional layers.
232+
dropout : float, default=0.05
233+
Dropout rate for regularization.
234+
dt_rank : str, default="auto"
235+
Rank of the decision tree.
236+
d_state : int, default=32
237+
Dimensionality of the state in recurrent layers.
238+
dt_scale : float, default=1.0
239+
Scaling factor for decision tree.
240+
dt_init : str, default="random"
241+
Initialization method for decision tree.
242+
dt_max : float, default=0.1
243+
Maximum value for decision tree initialization.
244+
dt_min : float, default=1e-04
245+
Minimum value for decision tree initialization.
246+
dt_init_floor : float, default=1e-04
247+
Floor value for decision tree initialization.
248+
norm : str, default="RMSNorm"
249+
Normalization method to be used.
250+
activation : callable, default=nn.SELU()
251+
Activation function for the model.
252+
num_embedding_activation : callable, default=nn.Identity()
253+
Activation function for numerical embeddings.
254+
head_layer_sizes : list, default=(128, 64, 32)
255+
Sizes of the layers in the head of the model.
256+
head_dropout : float, default=0.5
257+
Dropout rate for the head layers.
258+
head_skip_layers : bool, default=False
259+
Whether to skip layers in the head.
260+
head_activation : callable, default=nn.SELU()
261+
Activation function for the head layers.
262+
head_use_batch_norm : bool, default=False
263+
Whether to use batch normalization in the head layers.
264+
norm : str, default="LayerNorm"
265+
Normalization method to be used.
266+
axis : int, default=1
267+
Axis over which Mamba iterates. If 1, it iterates over the rows; if 0, it iterates over the columns.
268+
n_bins : int, default=50
269+
The number of bins to use for numerical feature binning. This parameter is relevant
270+
only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
271+
numerical_preprocessing : str, default="ple"
272+
The preprocessing strategy for numerical features. Valid options are
273+
'binning', 'one_hot', 'standardization', and 'normalization'.
274+
use_decision_tree_bins : bool, default=False
275+
If True, uses decision tree regression/classification to determine
276+
optimal bin edges for numerical feature binning. This parameter is
277+
relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
278+
binning_strategy : str, default="uniform"
279+
Defines the strategy for binning numerical features. Options include 'uniform',
280+
'quantile', or other sklearn-compatible strategies.
281+
cat_cutoff : float or int, default=0.03
282+
Indicates the cutoff after which integer values are treated as categorical.
283+
If float, it's treated as a percentage. If int, it's the maximum number of
284+
unique values for a column to be considered categorical.
285+
treat_all_integers_as_numerical : bool, default=False
286+
If True, all integer columns will be treated as numerical, regardless
287+
of their unique value count or proportion.
288+
degree : int, default=3
289+
The degree of the polynomial features to be used in preprocessing.
290+
knots : int, default=12
291+
The number of knots to be used in spline transformations.
292+
"""
293+
19294
def __init__(self, **kwargs):
20295
super().__init__(model=MambaTab, config=DefaultMambaTabConfig, **kwargs)

mambular/models/mambular.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,8 @@ class MambularLSS(SklearnBaseLSS):
275275
Learning rate for the optimizer.
276276
lr_patience : int, default=10
277277
Number of epochs with no improvement after which learning rate will be reduced.
278+
family : str, default=None
279+
Distributional family to be used for the model.
278280
weight_decay : float, default=1e-06
279281
Weight decay (L2 penalty) for the optimizer.
280282
lr_factor : float, default=0.1

mambular/models/mlp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ class MLPLSS(SklearnBaseLSS):
201201
Learning rate for the optimizer.
202202
lr_patience : int, default=10
203203
Number of epochs with no improvement after which learning rate will be reduced.
204+
family : str, default=None
205+
Distributional family to be used for the model.
204206
weight_decay : float, default=1e-06
205207
Weight decay (L2 penalty) for the optimizer.
206208
lr_factor : float, default=0.1

mambular/models/resnet.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ class ResNetLSS(SklearnBaseLSS):
219219
Dropout rate for regularization.
220220
norm : str, default=None
221221
Normalization method to be used, if any.
222+
family : str, default=None
223+
Distributional family to be used for the model.
222224
use_glu : bool, default=False
223225
Whether to use Gated Linear Units (GLU) in the ResNet.
224226
skip_connections : bool, default=True

mambular/models/tabtransformer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ class TabTransformerRegressor(SklearnBaseRegressor):
1919
Learning rate for the optimizer.
2020
lr_patience : int, default=10
2121
Number of epochs with no improvement after which learning rate will be reduced.
22+
family : str, default=None
23+
Distributional family to be used for the model.
2224
weight_decay : float, default=1e-06
2325
Weight decay (L2 penalty) for the optimizer.
2426
lr_factor : float, default=0.1

mambular/models/tabularnn.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ class TabulaRNNLSS(SklearnBaseLSS):
185185
Learning rate for the optimizer.
186186
model_type : str, default="RNN"
187187
type of model, one of "RNN", "LSTM", "GRU"
188+
family : str, default=None
189+
Distributional family to be used for the model.
188190
lr_patience : int, default=10
189191
Number of epochs with no improvement after which learning rate will be reduced.
190192
weight_decay : float, default=1e-06

0 commit comments

Comments
 (0)