OpenTabular
diff --git a/‎mambular/configs/base_config.py‎
Lines changed: 83 additions & 0 deletions b/‎mambular/configs/base_config.py‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎mambular/configs/fttransformer_config.py‎
Lines changed: 2 additions & 40 deletions b/‎mambular/configs/fttransformer_config.py‎
Lines changed: 2 additions & 40 deletions
diff --git a/‎mambular/configs/mambatab_config.py‎
Lines changed: 2 additions & 36 deletions b/‎mambular/configs/mambatab_config.py‎
Lines changed: 2 additions & 36 deletions
diff --git a/‎mambular/configs/mambattention_config.py‎
Lines changed: 2 additions & 42 deletions b/‎mambular/configs/mambattention_config.py‎
Lines changed: 2 additions & 42 deletions
@@ -0,0 +1,83 @@
+from dataclasses import dataclass, field
+from collections.abc import Callable
+import torch.nn as nn
+
+
+@dataclass
+class BaseConfig:
+    """
+    Base configuration class with shared hyperparameters for models.
+
+    This configuration class provides common hyperparameters for optimization,
+    embeddings, and categorical encoding, which can be inherited by specific
+    model configurations.
+
+    Parameters
+    ----------
+    lr : float, default=1e-04
+        Learning rate for the optimizer.
+    lr_patience : int, default=10
+        Number of epochs with no improvement before reducing the learning rate.
+    weight_decay : float, default=1e-06
+        L2 regularization parameter for weight decay in the optimizer.
+    lr_factor : float, default=0.1
+        Factor by which the learning rate is reduced when patience is exceeded.
+    activation : Callable, default=nn.ReLU()
+        Activation function to use in the model's layers.
+    cat_encoding : str, default="int"
+        Method for encoding categorical features ('int', 'one-hot', or 'linear').
+
+    Embedding Parameters
+    --------------------
+    use_embeddings : bool, default=False
+        Whether to use embeddings for categorical or numerical features.
+    embedding_activation : Callable, default=nn.Identity()
+        Activation function applied to embeddings.
+    embedding_type : str, default="linear"
+        Type of embedding to use ('linear', 'plr', etc.).
+    embedding_bias : bool, default=False
+        Whether to use bias in embedding layers.
+    layer_norm_after_embedding : bool, default=False
+        Whether to apply layer normalization after embedding layers.
+    d_model : int, default=32
+        Dimensionality of embeddings or model representations.
+    plr_lite : bool, default=False
+        Whether to use a lightweight version of Piecewise Linear Regression (PLR).
+    n_frequencies : int, default=48
+        Number of frequency components for embeddings.
+    frequencies_init_scale : float, default=0.01
+        Initial scale for frequency components in embeddings.
+    embedding_projection : bool, default=True
+        Whether to apply a projection layer after embeddings.
+
+    Notes
+    -----
+    - This base class is meant to be inherited by other configurations.
+    - Provides default values that can be overridden in derived configurations.
+
+    """
+
+    # Training Parameters
+    lr: float = 1e-04
+    lr_patience: int = 10
+    weight_decay: float = 1e-06
+    lr_factor: float = 0.1
+
+    # Embedding Parameters
+    use_embeddings: bool = False
+    embedding_activation: Callable = nn.Identity()  # noqa: RUF009
+    embedding_type: str = "linear"
+    embedding_bias: bool = False
+    layer_norm_after_embedding: bool = False
+    d_model: int = 32
+    plr_lite: bool = False
+    n_frequencies: int = 48
+    frequencies_init_scale: float = 0.01
+    embedding_projection: bool = True
+
+    # Architecture Parameters
+    batch_norm: bool = False
+    layer_norm: bool = False
+    layer_norm_eps: float = 1e-05
+    activation: Callable = nn.ReLU()  # noqa: RUF009
+    cat_encoding: str = "int"
@@ -1,25 +1,16 @@
 from collections.abc import Callable
 from dataclasses import dataclass, field
-
 import torch.nn as nn
-
 from ..arch_utils.transformer_utils import ReGLU
+from .base_config import BaseConfig
 
 
 @dataclass
-class DefaultFTTransformerConfig:
+class DefaultFTTransformerConfig(BaseConfig):
     """Configuration class for the FT Transformer model with predefined hyperparameters.
 
     Parameters
     ----------
-    lr : float, default=1e-04
-        Learning rate for the optimizer.
-    lr_patience : int, default=10
-        Number of epochs with no improvement after which the learning rate will be reduced.
-    weight_decay : float, default=1e-06
-        Weight decay (L2 regularization) for the optimizer.
-    lr_factor : float, default=0.1
-        Factor by which the learning rate will be reduced.
     d_model : int, default=128
         Dimensionality of the transformer model.
     n_layers : int, default=4
@@ -44,20 +35,6 @@ class DefaultFTTransformerConfig:
         Whether to apply normalization before other operations in each transformer block.
     bias : bool, default=True
         Whether to use bias in linear layers.
-    embedding_activation : callable, default=nn.Identity()
-        Activation function for embeddings.
-    embedding_type : str, default="linear"
-        Type of embedding to use ('linear', 'plr', etc.).
-    plr_lite : bool, default=False
-        Whether to use a lightweight version of Piecewise Linear Regression (PLR).
-    n_frequencies : int, default=48
-        Number of frequencies for PLR embeddings.
-    frequencies_init_scale : float, default=0.01
-        Initial scale for frequency parameters in embeddings.
-    embedding_bias : bool, default=False
-        Whether to use bias in embedding layers.
-    layer_norm_after_embedding : bool, default=False
-        Whether to apply layer normalization after embedding layers.
     head_layer_sizes : list, default=()
         Sizes of the fully connected layers in the model's head.
     head_dropout : float, default=0.5
@@ -76,12 +53,6 @@ class DefaultFTTransformerConfig:
         Method for encoding categorical features ('int', 'one-hot', or 'linear').
     """
 
-    # Optimizer Parameters
-    lr: float = 1e-04
-    lr_patience: int = 10
-    weight_decay: float = 1e-06
-    lr_factor: float = 0.1
-
     # Architecture Parameters
     d_model: int = 128
     n_layers: int = 4
@@ -96,15 +67,6 @@ class DefaultFTTransformerConfig:
     norm_first: bool = False
     bias: bool = True
 
-    # Embedding Parameters
-    embedding_activation: Callable = nn.Identity()  # noqa: RUF009
-    embedding_type: str = "linear"
-    plr_lite: bool = False
-    n_frequencies: int = 48
-    frequencies_init_scale: float = 0.01
-    embedding_bias: bool = False
-    layer_norm_after_embedding: bool = False
-
     # Head Parameters
     head_layer_sizes: list = field(default_factory=list)
     head_dropout: float = 0.5
 
@@ -1,23 +1,15 @@
 from collections.abc import Callable
 from dataclasses import dataclass, field
-
 import torch.nn as nn
+from .base_config import BaseConfig
 
 
 @dataclass
-class DefaultMambaTabConfig:
+class DefaultMambaTabConfig(BaseConfig):
     """Configuration class for the Default MambaTab model with predefined hyperparameters.
 
     Parameters
     ----------
-    lr : float, default=1e-04
-        Learning rate for the optimizer.
-    lr_patience : int, default=10
-        Number of epochs with no improvement after which the learning rate will be reduced.
-    weight_decay : float, default=1e-06
-        Weight decay (L2 regularization) for the optimizer.
-    lr_factor : float, default=0.1
-        Factor by which the learning rate will be reduced.
     d_model : int, default=64
         Dimensionality of the model.
     n_layers : int, default=1
@@ -50,18 +42,6 @@ class DefaultMambaTabConfig:
         Activation function for the model.
     axis : int, default=1
         Axis along which operations are applied, if applicable.
-    num_embedding_activation : callable, default=nn.ReLU()
-        Activation function for numerical embeddings.
-    embedding_type : str, default="linear"
-        Type of embedding to use ('linear', etc.).
-    embedding_bias : bool, default=False
-        Whether to use bias in the embedding layers.
-    plr_lite : bool, default=False
-        Whether to use a lightweight version of Piecewise Linear Regression (PLR).
-    n_frequencies : int, default=48
-        Number of frequencies for PLR embeddings.
-    frequencies_init_scale : float, default=0.01
-        Initial scale for frequency parameters in embeddings.
     head_layer_sizes : list, default=()
         Sizes of the fully connected layers in the model's head.
     head_dropout : float, default=0.0
@@ -82,12 +62,6 @@ class DefaultMambaTabConfig:
         Whether to process data bidirectionally.
     """
 
-    # Optimizer Parameters
-    lr: float = 1e-04
-    lr_patience: int = 10
-    weight_decay: float = 1e-06
-    lr_factor: float = 0.1
-
     # Architecture Parameters
     d_model: int = 64
     n_layers: int = 1
@@ -106,14 +80,6 @@ class DefaultMambaTabConfig:
     activation: Callable = nn.ReLU()  # noqa: RUF009
     axis: int = 1
 
-    # Embedding Parameters
-    num_embedding_activation: Callable = nn.ReLU()  # noqa: RUF009
-    embedding_type: str = "linear"
-    embedding_bias: bool = False
-    plr_lite: bool = False
-    n_frequencies: int = 48
-    frequencies_init_scale: float = 0.01
-
     # Head Parameters
     head_layer_sizes: list = field(default_factory=list)
     head_dropout: float = 0.0
 
@@ -1,23 +1,15 @@
 from collections.abc import Callable
 from dataclasses import dataclass, field
-
 import torch.nn as nn
+from .base_config import BaseConfig
 
 
 @dataclass
-class DefaultMambAttentionConfig:
+class DefaultMambAttentionConfig(BaseConfig):
     """Configuration class for the Default Mambular Attention model with predefined hyperparameters.
 
     Parameters
     ----------
-    lr : float, default=1e-04
-        Learning rate for the optimizer.
-    lr_patience : int, default=10
-        Number of epochs with no improvement after which learning rate will be reduced.
-    weight_decay : float, default=1e-06
-        Weight decay (L2 penalty) for the optimizer.
-    lr_factor : float, default=0.1
-        Factor by which the learning rate will be reduced.
     d_model : int, default=64
         Dimensionality of the model.
     n_layers : int, default=4
@@ -58,22 +50,6 @@ class DefaultMambAttentionConfig:
         Type of normalization used in the model.
     activation : callable, default=nn.SiLU()
         Activation function for the model.
-    layer_norm_eps : float, default=1e-05
-        Epsilon value for layer normalization.
-    num_embedding_activation : callable, default=nn.ReLU()
-        Activation function for numerical embeddings.
-    embedding_type : str, default="linear"
-        Type of embedding to use ('linear', etc.).
-    embedding_bias : bool, default=False
-        Whether to use bias in the embedding layers.
-    plr_lite : bool, default=False
-        Whether to use a lightweight version of Piecewise Linear Regression (PLR).
-    n_frequencies : int, default=48
-        Number of frequencies for PLR embeddings.
-    frequencies_init_scale : float, default=0.01
-        Initial scale for frequency parameters in embeddings.
-    layer_norm_after_embedding : bool, default=False
-        Whether to apply layer normalization after embedding layers.
     head_layer_sizes : list, default=()
         Sizes of the fully connected layers in the model's head.
     head_dropout : float, default=0.5
@@ -106,12 +82,6 @@ class DefaultMambAttentionConfig:
         Number of attention layers in the model.
     """
 
-    # Optimizer Parameters
-    lr: float = 1e-04
-    lr_patience: int = 10
-    weight_decay: float = 1e-06
-    lr_factor: float = 0.1
-
     # Architecture Parameters
     d_model: int = 64
     n_layers: int = 4
@@ -133,16 +103,6 @@ class DefaultMambAttentionConfig:
     dt_init_floor: float = 1e-04
     norm: str = "LayerNorm"
     activation: Callable = nn.SiLU()  # noqa: RUF009
-    layer_norm_eps: float = 1e-05
-
-    # Embedding Parameters
-    num_embedding_activation: Callable = nn.ReLU()  # noqa: RUF009
-    embedding_type: str = "linear"
-    embedding_bias: bool = False
-    plr_lite: bool = False
-    n_frequencies: int = 48
-    frequencies_init_scale: float = 0.01
-    layer_norm_after_embedding: bool = False
 
     # Head Parameters
     head_layer_sizes: list = field(default_factory=list)