OpenTabular
diff --git a/‎mambular/configs/fttransformer_config.py‎
Lines changed: 55 additions & 0 deletions b/‎mambular/configs/fttransformer_config.py‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎mambular/configs/mambular_config.py‎
Lines changed: 67 additions & 0 deletions b/‎mambular/configs/mambular_config.py‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎mambular/configs/mlp_config.py‎
Lines changed: 33 additions & 0 deletions b/‎mambular/configs/mlp_config.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎mambular/configs/resnet_config.py‎
Lines changed: 35 additions & 0 deletions b/‎mambular/configs/resnet_config.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎mambular/configs/tabtransformer_config.py‎
Lines changed: 55 additions & 0 deletions b/‎mambular/configs/tabtransformer_config.py‎
Lines changed: 55 additions & 0 deletions
@@ -4,6 +4,61 @@
 
 @dataclass
 class DefaultFTTransformerConfig:
+    """
+    Configuration class for the default FT Transformer model with predefined hyperparameters.
+
+    Parameters
+    ----------
+    lr : float, default=1e-04
+        Learning rate for the optimizer.
+    lr_patience : int, default=10
+        Number of epochs with no improvement after which learning rate will be reduced.
+    weight_decay : float, default=1e-06
+        Weight decay (L2 penalty) for the optimizer.
+    lr_factor : float, default=0.1
+        Factor by which the learning rate will be reduced.
+    d_model : int, default=64
+        Dimensionality of the model.
+    n_layers : int, default=8
+        Number of layers in the transformer.
+    n_heads : int, default=4
+        Number of attention heads in the transformer.
+    attn_dropout : float, default=0.3
+        Dropout rate for the attention mechanism.
+    ff_dropout : float, default=0.3
+        Dropout rate for the feed-forward layers.
+    norm : str, default="RMSNorm"
+        Normalization method to be used.
+    activation : callable, default=nn.SELU()
+        Activation function for the transformer.
+    num_embedding_activation : callable, default=nn.Identity()
+        Activation function for numerical embeddings.
+    head_layer_sizes : list, default=(128, 64, 32)
+        Sizes of the layers in the head of the model.
+    head_dropout : float, default=0.5
+        Dropout rate for the head layers.
+    head_skip_layers : bool, default=False
+        Whether to skip layers in the head.
+    head_activation : callable, default=nn.SELU()
+        Activation function for the head layers.
+    head_use_batch_norm : bool, default=False
+        Whether to use batch normalization in the head layers.
+    layer_norm_after_embedding : bool, default=False
+        Whether to apply layer normalization after embedding.
+    pooling_method : str, default="cls"
+        Pooling method to be used ('cls', 'avg', etc.).
+    norm_first : bool, default=False
+        Whether to apply normalization before other operations in each transformer block.
+    bias : bool, default=True
+        Whether to use bias in the linear layers.
+    transformer_activation : callable, default=nn.SELU()
+        Activation function for the transformer layers.
+    layer_norm_eps : float, default=1e-05
+        Epsilon value for layer normalization.
+    transformer_dim_feedforward : int, default=512
+        Dimensionality of the feed-forward layers in the transformer.
+    """
+
     lr: float = 1e-04
     lr_patience: int = 10
     weight_decay: float = 1e-06
 
@@ -4,6 +4,73 @@
 
 @dataclass
 class DefaultMambularConfig:
+    """
+    Configuration class for the Default Mambular model with predefined hyperparameters.
+
+    Parameters
+    ----------
+    lr : float, default=1e-04
+        Learning rate for the optimizer.
+    lr_patience : int, default=10
+        Number of epochs with no improvement after which learning rate will be reduced.
+    weight_decay : float, default=1e-06
+        Weight decay (L2 penalty) for the optimizer.
+    lr_factor : float, default=0.1
+        Factor by which the learning rate will be reduced.
+    d_model : int, default=64
+        Dimensionality of the model.
+    n_layers : int, default=8
+        Number of layers in the model.
+    expand_factor : int, default=2
+        Expansion factor for the feed-forward layers.
+    bias : bool, default=False
+        Whether to use bias in the linear layers.
+    d_conv : int, default=16
+        Dimensionality of the convolutional layers.
+    conv_bias : bool, default=True
+        Whether to use bias in the convolutional layers.
+    dropout : float, default=0.05
+        Dropout rate for regularization.
+    dt_rank : str, default="auto"
+        Rank of the decision tree.
+    d_state : int, default=32
+        Dimensionality of the state in recurrent layers.
+    dt_scale : float, default=1.0
+        Scaling factor for decision tree.
+    dt_init : str, default="random"
+        Initialization method for decision tree.
+    dt_max : float, default=0.1
+        Maximum value for decision tree initialization.
+    dt_min : float, default=1e-04
+        Minimum value for decision tree initialization.
+    dt_init_floor : float, default=1e-04
+        Floor value for decision tree initialization.
+    norm : str, default="RMSNorm"
+        Normalization method to be used.
+    activation : callable, default=nn.SELU()
+        Activation function for the model.
+    num_embedding_activation : callable, default=nn.Identity()
+        Activation function for numerical embeddings.
+    head_layer_sizes : list, default=(128, 64, 32)
+        Sizes of the layers in the head of the model.
+    head_dropout : float, default=0.5
+        Dropout rate for the head layers.
+    head_skip_layers : bool, default=False
+        Whether to skip layers in the head.
+    head_activation : callable, default=nn.SELU()
+        Activation function for the head layers.
+    head_use_batch_norm : bool, default=False
+        Whether to use batch normalization in the head layers.
+    layer_norm_after_embedding : bool, default=False
+        Whether to apply layer normalization after embedding.
+    pooling_method : str, default="avg"
+        Pooling method to be used ('avg', 'max', etc.).
+    bidirectional : bool, default=False
+        Whether to use bidirectional processing of the input sequences.
+    use_learnable_interaction : bool, default=False
+        Whether to use learnable feature interactions before passing through mamba blocks.
+    """
+
     lr: float = 1e-04
     lr_patience: int = 10
     weight_decay: float = 1e-06
 
@@ -4,6 +4,39 @@
 
 @dataclass
 class DefaultMLPConfig:
+    """
+    Configuration class for the default Multi-Layer Perceptron (MLP) model with predefined hyperparameters.
+
+    Parameters
+    ----------
+    lr : float, default=1e-04
+        Learning rate for the optimizer.
+    lr_patience : int, default=10
+        Number of epochs with no improvement after which learning rate will be reduced.
+    weight_decay : float, default=1e-06
+        Weight decay (L2 penalty) for the optimizer.
+    lr_factor : float, default=0.1
+        Factor by which the learning rate will be reduced.
+    layer_sizes : list, default=(128, 128, 32)
+        Sizes of the layers in the MLP.
+    activation : callable, default=nn.SELU()
+        Activation function for the MLP layers.
+    skip_layers : bool, default=False
+        Whether to skip layers in the MLP.
+    dropout : float, default=0.5
+        Dropout rate for regularization.
+    norm : str, default=None
+        Normalization method to be used, if any.
+    use_glu : bool, default=False
+        Whether to use Gated Linear Units (GLU) in the MLP.
+    skip_connections : bool, default=False
+        Whether to use skip connections in the MLP.
+    batch_norm : bool, default=False
+        Whether to use batch normalization in the MLP layers.
+    layer_norm : bool, default=False
+        Whether to use layer normalization in the MLP layers.
+    """
+
     lr: float = 1e-04
     lr_patience: int = 10
     weight_decay: float = 1e-06
 
@@ -4,6 +4,41 @@
 
 @dataclass
 class DefaultResNetConfig:
+    """
+    Configuration class for the default ResNet model with predefined hyperparameters.
+
+    Parameters
+    ----------
+    lr : float, default=1e-04
+        Learning rate for the optimizer.
+    lr_patience : int, default=10
+        Number of epochs with no improvement after which learning rate will be reduced.
+    weight_decay : float, default=1e-06
+        Weight decay (L2 penalty) for the optimizer.
+    lr_factor : float, default=0.1
+        Factor by which the learning rate will be reduced.
+    layer_sizes : list, default=(128, 128, 32)
+        Sizes of the layers in the ResNet.
+    activation : callable, default=nn.SELU()
+        Activation function for the ResNet layers.
+    skip_layers : bool, default=False
+        Whether to skip layers in the ResNet.
+    dropout : float, default=0.5
+        Dropout rate for regularization.
+    norm : str, default=None
+        Normalization method to be used, if any.
+    use_glu : bool, default=False
+        Whether to use Gated Linear Units (GLU) in the ResNet.
+    skip_connections : bool, default=True
+        Whether to use skip connections in the ResNet.
+    batch_norm : bool, default=True
+        Whether to use batch normalization in the ResNet layers.
+    layer_norm : bool, default=False
+        Whether to use layer normalization in the ResNet layers.
+    num_blocks : int, default=3
+        Number of residual blocks in the ResNet.
+    """
+
     lr: float = 1e-04
     lr_patience: int = 10
     weight_decay: float = 1e-06
 
@@ -4,6 +4,61 @@
 
 @dataclass
 class DefaultTabTransformerConfig:
+    """
+    Configuration class for the default Tab Transformer model with predefined hyperparameters.
+
+    Parameters
+    ----------
+    lr : float, default=1e-04
+        Learning rate for the optimizer.
+    lr_patience : int, default=10
+        Number of epochs with no improvement after which learning rate will be reduced.
+    weight_decay : float, default=1e-06
+        Weight decay (L2 penalty) for the optimizer.
+    lr_factor : float, default=0.1
+        Factor by which the learning rate will be reduced.
+    d_model : int, default=64
+        Dimensionality of the model.
+    n_layers : int, default=8
+        Number of layers in the transformer.
+    n_heads : int, default=4
+        Number of attention heads in the transformer.
+    attn_dropout : float, default=0.3
+        Dropout rate for the attention mechanism.
+    ff_dropout : float, default=0.3
+        Dropout rate for the feed-forward layers.
+    norm : str, default="RMSNorm"
+        Normalization method to be used.
+    activation : callable, default=nn.SELU()
+        Activation function for the transformer.
+    num_embedding_activation : callable, default=nn.Identity()
+        Activation function for numerical embeddings.
+    head_layer_sizes : list, default=(128, 64, 32)
+        Sizes of the layers in the head of the model.
+    head_dropout : float, default=0.5
+        Dropout rate for the head layers.
+    head_skip_layers : bool, default=False
+        Whether to skip layers in the head.
+    head_activation : callable, default=nn.SELU()
+        Activation function for the head layers.
+    head_use_batch_norm : bool, default=False
+        Whether to use batch normalization in the head layers.
+    layer_norm_after_embedding : bool, default=False
+        Whether to apply layer normalization after embedding.
+    pooling_method : str, default="avg"
+        Pooling method to be used ('cls', 'avg', etc.).
+    norm_first : bool, default=True
+        Whether to apply normalization before other operations in each transformer block.
+    bias : bool, default=True
+        Whether to use bias in the linear layers.
+    transformer_activation : callable, default=nn.SELU()
+        Activation function for the transformer layers.
+    layer_norm_eps : float, default=1e-05
+        Epsilon value for layer normalization.
+    transformer_dim_feedforward : int, default=512
+        Dimensionality of the feed-forward layers in the transformer.
+    """
+
     lr: float = 1e-04
     lr_patience: int = 10
     weight_decay: float = 1e-06