OpenTabular
diff --git a/‎README.md‎
Lines changed: 8 additions & 5 deletions b/‎README.md‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎mambular/arch_utils/layer_utils/embedding_layer.py‎
Lines changed: 5 additions & 1 deletion b/‎mambular/arch_utils/layer_utils/embedding_layer.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎mambular/arch_utils/layer_utils/importance.py‎
Lines changed: 28 additions & 0 deletions b/‎mambular/arch_utils/layer_utils/importance.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎mambular/arch_utils/mamba_utils/mamba_arch.py‎
Lines changed: 18 additions & 6 deletions b/‎mambular/arch_utils/mamba_utils/mamba_arch.py‎
Lines changed: 18 additions & 6 deletions
diff --git a/‎mambular/arch_utils/rnn_utils.py‎
Lines changed: 11 additions & 2 deletions b/‎mambular/arch_utils/rnn_utils.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎mambular/arch_utils/trompt_utils.py‎
Lines changed: 54 additions & 0 deletions b/‎mambular/arch_utils/trompt_utils.py‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎mambular/base_models/__init__.py‎
Lines changed: 4 additions & 4 deletions b/‎mambular/base_models/__init__.py‎
Lines changed: 4 additions & 4 deletions
@@ -76,7 +76,10 @@ Mambular is a Python package that brings the power of advanced deep learning arc
 | `TabulaRNN`      | A Recurrent Neural Network for Tabular data, introduced [here](https://arxiv.org/pdf/2411.17207).                                                   |
 | `MambAttention`  | A combination between Mamba and Transformers, also introduced [here](https://arxiv.org/pdf/2411.17207).                                             |
 | `NDTF`           | A neural decision forest using soft decision trees. See [Kontschieder et al.](https://openaccess.thecvf.com/content_iccv_2015/html/Kontschieder_Deep_Neural_Decision_ICCV_2015_paper.html) for inspiration. |
-| `SAINT`          | Improve neural networs via Row Attention and Contrastive Pre-Training, introduced [here](https://arxiv.org/pdf/2106.01342).                                              |
+| `SAINT`          | Improve neural networs via Row Attention and Contrastive Pre-Training, introduced [here](https://arxiv.org/pdf/2106.01342).                         |
+| `AutoInt`        | Automatic Feature Interaction Learning via Self-Attentive Neural Networks introduced [here](https://arxiv.org/abs/1810.11921).                      |
+| `Trompt `        | Trompt: Towards a Better Deep Neural Network for Tabular Data introduced [here](https://arxiv.org/abs/2305.18446).                                  |
+
 
 
 
@@ -211,13 +214,13 @@ random_search.fit(X, y, **fit_params)
 print("Best Parameters:", random_search.best_params_)
 print("Best Score:", random_search.best_score_)
 ```
-Note, that using this, you can also optimize the preprocessing. Just use the prefix ``prepro__`` when specifying the preprocessor arguments you want to optimize:
+Note, that using this, you can also optimize the preprocessing. Just specify the necessary parameters when specifying the preprocessor arguments you want to optimize:
 ```python
 param_dist = {
     'd_model': randint(32, 128),  
     'n_layers': randint(2, 10),  
     'lr': uniform(1e-5, 1e-3),
-    "prepro__numerical_preprocessing": ["ple", "standardization", "box-cox"]
+    "numerical_preprocessing": ["ple", "standardization", "box-cox"]
 }
 
 ```
@@ -321,7 +324,7 @@ Here's how you can implement a custom model with Mambular:
    Define your custom model just as you would for an `nn.Module`. The main difference is that you will inherit from `BaseModel` and use the provided feature information to construct your layers. To integrate your model into the existing API, you only need to define the architecture and the forward pass.
 
    ```python
-   from mambular.base_models import BaseModel
+   from mambular.base_models.utils import BaseModel
    from mambular.utils.get_feature_dimensions import get_feature_dimensions
    import torch
    import torch.nn
@@ -365,7 +368,7 @@ Here's how you can implement a custom model with Mambular:
    You can build a regression, classification, or distributional regression model that can leverage all of Mambular's built-in methods by using the following:
 
    ```python
-   from mambular.models import SklearnBaseRegressor
+   from mambular.models.utils import SklearnBaseRegressor
 
    class MyRegressor(SklearnBaseRegressor):
        def __init__(self, **kwargs):
 
@@ -156,7 +156,11 @@ def forward(self, num_features, cat_features, emb_features):
         # Process categorical embeddings
         if self.cat_embeddings and cat_features is not None:
             cat_embeddings = [
-                emb(cat_features[i]) if emb(cat_features[i]).ndim == 3 else emb(cat_features[i]).unsqueeze(1)
+                (
+                    emb(cat_features[i])
+                    if emb(cat_features[i]).ndim == 3
+                    else emb(cat_features[i]).unsqueeze(1)
+                )
                 for i, emb in enumerate(self.cat_embeddings)
             ]
 
 
@@ -0,0 +1,28 @@
+import torch.nn as nn
+import torch
+
+
+class ImportanceGetter(nn.Module):  # Figure 3 part 1
+    def __init__(self, P, C, d):
+        super().__init__()
+        self.colemb = nn.Parameter(torch.empty(C, d))
+        self.pemb = nn.Parameter(torch.empty(P, d))
+        torch.nn.init.normal_(self.colemb, std=0.01)
+        torch.nn.init.normal_(self.pemb, std=0.01)
+        self.C = C
+        self.P = P
+        self.d = d
+        self.dense = nn.Linear(2 * self.d, self.d)
+        self.laynorm1 = nn.LayerNorm(self.d)
+        self.laynorm2 = nn.LayerNorm(self.d)
+
+    def forward(self, O):
+        eprompt = self.pemb.unsqueeze(0).repeat(O.shape[0], 1, 1)
+
+        dense_out = self.dense(torch.cat((self.laynorm1(eprompt), O), dim=-1))
+
+        dense_out = dense_out + eprompt + O
+
+        ecolumn = self.laynorm2(self.colemb.unsqueeze(0).repeat(O.shape[0], 1, 1))
+
+        return torch.softmax(dense_out @ ecolumn.transpose(1, 2), dim=-1)
@@ -43,11 +43,14 @@ def __init__(
                     norm=get_normalization_layer(config),  # type: ignore
                     activation=getattr(config, "activation", nn.SiLU()),
                     bidirectional=getattr(config, "bidirectional", False),
-                    use_learnable_interaction=getattr(config, "use_learnable_interaction", False),
+                    use_learnable_interaction=getattr(
+                        config, "use_learnable_interaction", False
+                    ),
                     layer_norm_eps=getattr(config, "layer_norm_eps", 1e-5),
                     AD_weight_decay=getattr(config, "AD_weight_decay", True),
                     BC_layer_norm=getattr(config, "BC_layer_norm", False),
                     use_pscan=getattr(config, "use_pscan", False),
+                    dilation=getattr(config, "dilation", 1),
                 )
                 for _ in range(getattr(config, "n_layers", 6))
             ]
@@ -149,6 +152,7 @@ def __init__(
         AD_weight_decay=False,
         BC_layer_norm=False,
         use_pscan=False,
+        dilation=1,
     ):
         super().__init__()
 
@@ -194,6 +198,7 @@ def __init__(
             AD_weight_decay=AD_weight_decay,
             BC_layer_norm=BC_layer_norm,
             use_pscan=use_pscan,
+            dilation=dilation,
         )
         self.norm = norm
 
@@ -307,6 +312,7 @@ def __init__(
         AD_weight_decay=False,
         BC_layer_norm=False,
         use_pscan=False,
+        dilation=1,
     ):
         super().__init__()
 
@@ -319,7 +325,10 @@ def __init__(
                 self.pscan = pscan  # Store the imported pscan function
             except ImportError:
                 self.pscan = None  # Set to None if pscan is not available
-                print("The 'mambapy' package is not installed. Please install it by running:\n" "pip install mambapy")
+                print(
+                    "The 'mambapy' package is not installed. Please install it by running:\n"
+                    "pip install mambapy"
+                )
         else:
             self.pscan = None
 
@@ -347,6 +356,7 @@ def __init__(
                 bias=conv_bias,
                 groups=self.d_inner,
                 padding=d_conv - 1,
+                dilation=dilation,
             )
 
         self.dropout = nn.Dropout(dropout)
@@ -375,16 +385,18 @@ def __init__(
         else:
             raise NotImplementedError
 
-        dt_fwd = torch.exp(torch.rand(self.d_inner) * (math.log(dt_max) - math.log(dt_min)) + math.log(dt_min)).clamp(
-            min=dt_init_floor
-        )
+        dt_fwd = torch.exp(
+            torch.rand(self.d_inner) * (math.log(dt_max) - math.log(dt_min))
+            + math.log(dt_min)
+        ).clamp(min=dt_init_floor)
         inv_dt_fwd = dt_fwd + torch.log(-torch.expm1(-dt_fwd))
         with torch.no_grad():
             self.dt_proj_fwd.bias.copy_(inv_dt_fwd)
 
         if self.bidirectional:
             dt_bwd = torch.exp(
-                torch.rand(self.d_inner) * (math.log(dt_max) - math.log(dt_min)) + math.log(dt_min)
+                torch.rand(self.d_inner) * (math.log(dt_max) - math.log(dt_min))
+                + math.log(dt_min)
             ).clamp(min=dt_init_floor)
             inv_dt_bwd = dt_bwd + torch.log(-torch.expm1(-dt_bwd))
             with torch.no_grad():
 
@@ -21,6 +21,7 @@ def __init__(self, config):
         self.rnn_activation = getattr(config, "rnn_activation", "relu")
         self.d_conv = getattr(config, "d_conv", 4)
         self.residuals = getattr(config, "residuals", False)
+        self.dilation = getattr(config, "dilation", 1)
 
         # Choose RNN layer based on model_type
         rnn_layer = {
@@ -37,7 +38,10 @@ def __init__(self, config):
 
         if self.residuals:
             self.residual_matrix = nn.ParameterList(
-                [nn.Parameter(torch.randn(self.hidden_size, self.hidden_size)) for _ in range(self.num_layers)]
+                [
+                    nn.Parameter(torch.randn(self.hidden_size, self.hidden_size))
+                    for _ in range(self.num_layers)
+                ]
             )
 
         # First Conv1d layer uses input_size
@@ -49,6 +53,7 @@ def __init__(self, config):
                 padding=self.d_conv - 1,
                 bias=self.conv_bias,
                 groups=self.input_size,
+                dilation=self.dilation,
             )
         )
         self.layernorms_conv.append(nn.LayerNorm(self.input_size))
@@ -63,6 +68,7 @@ def __init__(self, config):
                     padding=self.d_conv - 1,
                     bias=self.conv_bias,
                     groups=self.hidden_size,
+                    dilation=self.dilation,
                 )
             )
             self.layernorms_conv.append(nn.LayerNorm(self.hidden_size))
@@ -159,7 +165,10 @@ def __init__(
 
         if self.residuals:
             self.residual_matrix = nn.ParameterList(
-                [nn.Parameter(torch.randn(self.hidden_size, self.hidden_size)) for _ in range(self.num_layers)]
+                [
+                    nn.Parameter(torch.randn(self.hidden_size, self.hidden_size))
+                    for _ in range(self.num_layers)
+                ]
             )
 
         # First Conv1d layer uses input_size
 
@@ -0,0 +1,54 @@
+import torch.nn as nn
+import torch
+from .layer_utils.embedding_layer import EmbeddingLayer
+from .layer_utils.importance import ImportanceGetter
+import numpy as np
+
+
+class Expander(nn.Module):  # Figure 3 part 3
+    def __init__(self, P):
+        super().__init__()
+        self.lin = nn.Linear(1, P)
+        self.relu = nn.ReLU()
+        self.gn = nn.GroupNorm(2, P)
+
+    def forward(self, x):
+        res = self.relu(self.lin(x.unsqueeze(-1)))
+
+        return x.unsqueeze(1) + self.gn(torch.permute(res, (0, 3, 1, 2)))
+
+
+class TromptCell(nn.Module):
+    def __init__(self, feature_information, config):
+        super().__init__()
+        C = np.sum([len(info) for info in feature_information])
+        self.enc = EmbeddingLayer(
+            *feature_information,
+            config=config,
+        )
+        self.fe = ImportanceGetter(config.P, C, config.d_model)
+        self.ex = Expander(config.P)
+
+    def forward(self, *data, O=None):
+        x_res = self.ex(self.enc(*data))
+
+        M = self.fe(O)
+
+        return (M.unsqueeze(-1) * x_res).sum(dim=2)
+
+
+class TromptDecoder(nn.Module):
+    def __init__(self, d, d_out):
+        super().__init__()
+        self.l1 = nn.Linear(d, 1)
+        self.l2 = nn.Linear(d, d)
+        self.relu = nn.ReLU()
+        self.laynorm1 = nn.LayerNorm(d)
+        self.lf = nn.Linear(d, d_out)
+
+    def forward(self, x):
+        pw = torch.softmax(self.l1(x).squeeze(-1), dim=-1)
+
+        xnew = (pw.unsqueeze(-1) * x).sum(dim=-2)
+
+        return self.lf(self.laynorm1(self.relu(self.l2(xnew))))
@@ -1,6 +1,4 @@
-from .basemodel import BaseModel
 from .ft_transformer import FTTransformer
-from .lightning_wrapper import TaskModel
 from .mambatab import MambaTab
 from .mambattn import MambAttention
 from .mambular import Mambular
@@ -12,13 +10,16 @@
 from .tabm import TabM
 from .tabtransformer import TabTransformer
 from .tabularnn import TabulaRNN
+from .autoint import AutoInt
+from .trompt import Trompt
 
 __all__ = [
+    "Trompt",
+    "AutoInt",
     "MLP",
     "NDTF",
     "NODE",
     "SAINT",
-    "BaseModel",
     "FTTransformer",
     "MambAttention",
     "MambaTab",
@@ -27,5 +28,4 @@
     "TabM",
     "TabTransformer",
     "TabulaRNN",
-    "TaskModel",
 ]