adjusting model hparam description

AnFreTh · AnFreTh · commit 1ef78ea2d0d3 · 2024-08-13T05:29:41.000+02:00
diff --git a/mambular/models/fttransformer.py b/mambular/models/fttransformer.py
@@ -20,6 +20,8 @@ class FTTransformerRegressor(SklearnBaseRegressor):
         Learning rate for the optimizer.
     lr_patience : int, default=10
         Number of epochs with no improvement after which learning rate will be reduced.
+    family : str, default=None
+        Distributional family to be used for the model.
     weight_decay : float, default=1e-06
         Weight decay (L2 penalty) for the optimizer.
     lr_factor : float, default=0.1
diff --git a/mambular/models/mambatab.py b/mambular/models/mambatab.py
@@ -6,15 +6,290 @@
 
 
 class MambaTabRegressor(SklearnBaseRegressor):
+    """
+    MambaTab regressor. This class extends the SklearnBaseRegressor class and uses the MambaTab model
+    with the default MambaTab configuration.
+
+    The accepted arguments to the MambaTabRegressor class include both the attributes in the DefaultMambaTabConfig dataclass
+    and the parameters for the Preprocessor class.
+
+    Parameters
+    ----------
+    lr : float, default=1e-04
+        Learning rate for the optimizer.
+    lr_patience : int, default=10
+        Number of epochs with no improvement after which learning rate will be reduced.
+    weight_decay : float, default=1e-06
+        Weight decay (L2 penalty) for the optimizer.
+    lr_factor : float, default=0.1
+        Factor by which the learning rate will be reduced.
+    d_model : int, default=64
+        Dimensionality of the model.
+    n_layers : int, default=8
+        Number of layers in the model.
+    expand_factor : int, default=2
+        Expansion factor for the feed-forward layers.
+    bias : bool, default=False
+        Whether to use bias in the linear layers.
+    d_conv : int, default=16
+        Dimensionality of the convolutional layers.
+    conv_bias : bool, default=True
+        Whether to use bias in the convolutional layers.
+    dropout : float, default=0.05
+        Dropout rate for regularization.
+    dt_rank : str, default="auto"
+        Rank of the decision tree.
+    d_state : int, default=32
+        Dimensionality of the state in recurrent layers.
+    dt_scale : float, default=1.0
+        Scaling factor for decision tree.
+    dt_init : str, default="random"
+        Initialization method for decision tree.
+    dt_max : float, default=0.1
+        Maximum value for decision tree initialization.
+    dt_min : float, default=1e-04
+        Minimum value for decision tree initialization.
+    dt_init_floor : float, default=1e-04
+        Floor value for decision tree initialization.
+    norm : str, default="RMSNorm"
+        Normalization method to be used.
+    activation : callable, default=nn.SELU()
+        Activation function for the model.
+    num_embedding_activation : callable, default=nn.Identity()
+        Activation function for numerical embeddings.
+    head_layer_sizes : list, default=(128, 64, 32)
+        Sizes of the layers in the head of the model.
+    head_dropout : float, default=0.5
+        Dropout rate for the head layers.
+    head_skip_layers : bool, default=False
+        Whether to skip layers in the head.
+    head_activation : callable, default=nn.SELU()
+        Activation function for the head layers.
+    head_use_batch_norm : bool, default=False
+        Whether to use batch normalization in the head layers.
+    norm : str, default="LayerNorm"
+        Normalization method to be used.
+    axis : int, default=1
+        Axis over which Mamba iterates. If 1, it iterates over the rows; if 0, it iterates over the columns.
+    n_bins : int, default=50
+        The number of bins to use for numerical feature binning. This parameter is relevant
+        only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
+    numerical_preprocessing : str, default="ple"
+        The preprocessing strategy for numerical features. Valid options are
+        'binning', 'one_hot', 'standardization', and 'normalization'.
+    use_decision_tree_bins : bool, default=False
+        If True, uses decision tree regression/classification to determine
+        optimal bin edges for numerical feature binning. This parameter is
+        relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
+    binning_strategy : str, default="uniform"
+        Defines the strategy for binning numerical features. Options include 'uniform',
+        'quantile', or other sklearn-compatible strategies.
+    cat_cutoff : float or int, default=0.03
+        Indicates the cutoff after which integer values are treated as categorical.
+        If float, it's treated as a percentage. If int, it's the maximum number of
+        unique values for a column to be considered categorical.
+    treat_all_integers_as_numerical : bool, default=False
+        If True, all integer columns will be treated as numerical, regardless
+        of their unique value count or proportion.
+    degree : int, default=3
+        The degree of the polynomial features to be used in preprocessing.
+    knots : int, default=12
+        The number of knots to be used in spline transformations.
+    """
+
     def __init__(self, **kwargs):
         super().__init__(model=MambaTab, config=DefaultMambaTabConfig, **kwargs)
 
 
 class MambaTabClassifier(SklearnBaseClassifier):
+    """
+    MambaTab Classifier. This class extends the SklearnBaseClassifier class and uses the MambaTab model
+    with the default MambaTab configuration.
+
+    The accepted arguments to the MambaTabClassifier class include both the attributes in the DefaultMambaTabConfig dataclass
+    and the parameters for the Preprocessor class.
+
+    Parameters
+    ----------
+    lr : float, default=1e-04
+        Learning rate for the optimizer.
+    lr_patience : int, default=10
+        Number of epochs with no improvement after which learning rate will be reduced.
+    weight_decay : float, default=1e-06
+        Weight decay (L2 penalty) for the optimizer.
+    lr_factor : float, default=0.1
+        Factor by which the learning rate will be reduced.
+    d_model : int, default=64
+        Dimensionality of the model.
+    n_layers : int, default=8
+        Number of layers in the model.
+    expand_factor : int, default=2
+        Expansion factor for the feed-forward layers.
+    bias : bool, default=False
+        Whether to use bias in the linear layers.
+    d_conv : int, default=16
+        Dimensionality of the convolutional layers.
+    conv_bias : bool, default=True
+        Whether to use bias in the convolutional layers.
+    dropout : float, default=0.05
+        Dropout rate for regularization.
+    dt_rank : str, default="auto"
+        Rank of the decision tree.
+    d_state : int, default=32
+        Dimensionality of the state in recurrent layers.
+    dt_scale : float, default=1.0
+        Scaling factor for decision tree.
+    dt_init : str, default="random"
+        Initialization method for decision tree.
+    dt_max : float, default=0.1
+        Maximum value for decision tree initialization.
+    dt_min : float, default=1e-04
+        Minimum value for decision tree initialization.
+    dt_init_floor : float, default=1e-04
+        Floor value for decision tree initialization.
+    norm : str, default="RMSNorm"
+        Normalization method to be used.
+    activation : callable, default=nn.SELU()
+        Activation function for the model.
+    num_embedding_activation : callable, default=nn.Identity()
+        Activation function for numerical embeddings.
+    head_layer_sizes : list, default=(128, 64, 32)
+        Sizes of the layers in the head of the model.
+    head_dropout : float, default=0.5
+        Dropout rate for the head layers.
+    head_skip_layers : bool, default=False
+        Whether to skip layers in the head.
+    head_activation : callable, default=nn.SELU()
+        Activation function for the head layers.
+    head_use_batch_norm : bool, default=False
+        Whether to use batch normalization in the head layers.
+    norm : str, default="LayerNorm"
+        Normalization method to be used.
+    axis : int, default=1
+        Axis over which Mamba iterates. If 1, it iterates over the rows; if 0, it iterates over the columns.
+    n_bins : int, default=50
+        The number of bins to use for numerical feature binning. This parameter is relevant
+        only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
+    numerical_preprocessing : str, default="ple"
+        The preprocessing strategy for numerical features. Valid options are
+        'binning', 'one_hot', 'standardization', and 'normalization'.
+    use_decision_tree_bins : bool, default=False
+        If True, uses decision tree regression/classification to determine
+        optimal bin edges for numerical feature binning. This parameter is
+        relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
+    binning_strategy : str, default="uniform"
+        Defines the strategy for binning numerical features. Options include 'uniform',
+        'quantile', or other sklearn-compatible strategies.
+    cat_cutoff : float or int, default=0.03
+        Indicates the cutoff after which integer values are treated as categorical.
+        If float, it's treated as a percentage. If int, it's the maximum number of
+        unique values for a column to be considered categorical.
+    treat_all_integers_as_numerical : bool, default=False
+        If True, all integer columns will be treated as numerical, regardless
+        of their unique value count or proportion.
+    degree : int, default=3
+        The degree of the polynomial features to be used in preprocessing.
+    knots : int, default=12
+        The number of knots to be used in spline transformations.
+    """
+
     def __init__(self, **kwargs):
         super().__init__(model=MambaTab, config=DefaultMambaTabConfig, **kwargs)
 
 
 class MambaTabLSS(SklearnBaseLSS):
+    """
+    MambaTab for distributinoal regression. This class extends the SklearnBaseLSS class and uses the MambaTab model
+    with the default MambaTab configuration.
+
+    The accepted arguments to the MambaTabLSS class include both the attributes in the DefaultMambaTabConfig dataclass
+    and the parameters for the Preprocessor class.
+
+    Parameters
+    ----------
+    lr : float, default=1e-04
+        Learning rate for the optimizer.
+    lr_patience : int, default=10
+        Number of epochs with no improvement after which learning rate will be reduced.
+    family : str, default=None
+        Distributional family to be used for the model.
+    weight_decay : float, default=1e-06
+        Weight decay (L2 penalty) for the optimizer.
+    lr_factor : float, default=0.1
+        Factor by which the learning rate will be reduced.
+    d_model : int, default=64
+        Dimensionality of the model.
+    n_layers : int, default=8
+        Number of layers in the model.
+    expand_factor : int, default=2
+        Expansion factor for the feed-forward layers.
+    bias : bool, default=False
+        Whether to use bias in the linear layers.
+    d_conv : int, default=16
+        Dimensionality of the convolutional layers.
+    conv_bias : bool, default=True
+        Whether to use bias in the convolutional layers.
+    dropout : float, default=0.05
+        Dropout rate for regularization.
+    dt_rank : str, default="auto"
+        Rank of the decision tree.
+    d_state : int, default=32
+        Dimensionality of the state in recurrent layers.
+    dt_scale : float, default=1.0
+        Scaling factor for decision tree.
+    dt_init : str, default="random"
+        Initialization method for decision tree.
+    dt_max : float, default=0.1
+        Maximum value for decision tree initialization.
+    dt_min : float, default=1e-04
+        Minimum value for decision tree initialization.
+    dt_init_floor : float, default=1e-04
+        Floor value for decision tree initialization.
+    norm : str, default="RMSNorm"
+        Normalization method to be used.
+    activation : callable, default=nn.SELU()
+        Activation function for the model.
+    num_embedding_activation : callable, default=nn.Identity()
+        Activation function for numerical embeddings.
+    head_layer_sizes : list, default=(128, 64, 32)
+        Sizes of the layers in the head of the model.
+    head_dropout : float, default=0.5
+        Dropout rate for the head layers.
+    head_skip_layers : bool, default=False
+        Whether to skip layers in the head.
+    head_activation : callable, default=nn.SELU()
+        Activation function for the head layers.
+    head_use_batch_norm : bool, default=False
+        Whether to use batch normalization in the head layers.
+    norm : str, default="LayerNorm"
+        Normalization method to be used.
+    axis : int, default=1
+        Axis over which Mamba iterates. If 1, it iterates over the rows; if 0, it iterates over the columns.
+    n_bins : int, default=50
+        The number of bins to use for numerical feature binning. This parameter is relevant
+        only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
+    numerical_preprocessing : str, default="ple"
+        The preprocessing strategy for numerical features. Valid options are
+        'binning', 'one_hot', 'standardization', and 'normalization'.
+    use_decision_tree_bins : bool, default=False
+        If True, uses decision tree regression/classification to determine
+        optimal bin edges for numerical feature binning. This parameter is
+        relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.
+    binning_strategy : str, default="uniform"
+        Defines the strategy for binning numerical features. Options include 'uniform',
+        'quantile', or other sklearn-compatible strategies.
+    cat_cutoff : float or int, default=0.03
+        Indicates the cutoff after which integer values are treated as categorical.
+        If float, it's treated as a percentage. If int, it's the maximum number of
+        unique values for a column to be considered categorical.
+    treat_all_integers_as_numerical : bool, default=False
+        If True, all integer columns will be treated as numerical, regardless
+        of their unique value count or proportion.
+    degree : int, default=3
+        The degree of the polynomial features to be used in preprocessing.
+    knots : int, default=12
+        The number of knots to be used in spline transformations.
+    """
+
     def __init__(self, **kwargs):
         super().__init__(model=MambaTab, config=DefaultMambaTabConfig, **kwargs)
diff --git a/mambular/models/mambular.py b/mambular/models/mambular.py
@@ -275,6 +275,8 @@ class MambularLSS(SklearnBaseLSS):
         Learning rate for the optimizer.
     lr_patience : int, default=10
         Number of epochs with no improvement after which learning rate will be reduced.
+    family : str, default=None
+        Distributional family to be used for the model.
     weight_decay : float, default=1e-06
         Weight decay (L2 penalty) for the optimizer.
     lr_factor : float, default=0.1
diff --git a/mambular/models/mlp.py b/mambular/models/mlp.py
@@ -201,6 +201,8 @@ class MLPLSS(SklearnBaseLSS):
         Learning rate for the optimizer.
     lr_patience : int, default=10
         Number of epochs with no improvement after which learning rate will be reduced.
+    family : str, default=None
+        Distributional family to be used for the model.
     weight_decay : float, default=1e-06
         Weight decay (L2 penalty) for the optimizer.
     lr_factor : float, default=0.1
diff --git a/mambular/models/resnet.py b/mambular/models/resnet.py
@@ -219,6 +219,8 @@ class ResNetLSS(SklearnBaseLSS):
         Dropout rate for regularization.
     norm : str, default=None
         Normalization method to be used, if any.
+    family : str, default=None
+        Distributional family to be used for the model.
     use_glu : bool, default=False
         Whether to use Gated Linear Units (GLU) in the ResNet.
     skip_connections : bool, default=True
diff --git a/mambular/models/tabtransformer.py b/mambular/models/tabtransformer.py
@@ -19,6 +19,8 @@ class TabTransformerRegressor(SklearnBaseRegressor):
         Learning rate for the optimizer.
     lr_patience : int, default=10
         Number of epochs with no improvement after which learning rate will be reduced.
+    family : str, default=None
+        Distributional family to be used for the model.
     weight_decay : float, default=1e-06
         Weight decay (L2 penalty) for the optimizer.
     lr_factor : float, default=0.1
diff --git a/mambular/models/tabularnn.py b/mambular/models/tabularnn.py
@@ -185,6 +185,8 @@ class TabulaRNNLSS(SklearnBaseLSS):
         Learning rate for the optimizer.
     model_type : str, default="RNN"
         type of model, one of "RNN", "LSTM", "GRU"
+    family : str, default=None
+        Distributional family to be used for the model.
     lr_patience : int, default=10
         Number of epochs with no improvement after which learning rate will be reduced.
     weight_decay : float, default=1e-06