include dilation to 1DConv layers

AnFreTh · AnFreTh · commit f954a311bca7 · 2025-03-09T20:04:41.000+01:00
diff --git a/mambular/arch_utils/mamba_utils/mamba_arch.py b/mambular/arch_utils/mamba_utils/mamba_arch.py
@@ -43,11 +43,14 @@ def __init__(
                     norm=get_normalization_layer(config),  # type: ignore
                     activation=getattr(config, "activation", nn.SiLU()),
                     bidirectional=getattr(config, "bidirectional", False),
-                    use_learnable_interaction=getattr(config, "use_learnable_interaction", False),
+                    use_learnable_interaction=getattr(
+                        config, "use_learnable_interaction", False
+                    ),
                     layer_norm_eps=getattr(config, "layer_norm_eps", 1e-5),
                     AD_weight_decay=getattr(config, "AD_weight_decay", True),
                     BC_layer_norm=getattr(config, "BC_layer_norm", False),
                     use_pscan=getattr(config, "use_pscan", False),
+                    dilation=getattr(config, "dilation", 1),
                 )
                 for _ in range(getattr(config, "n_layers", 6))
             ]
@@ -149,6 +152,7 @@ def __init__(
         AD_weight_decay=False,
         BC_layer_norm=False,
         use_pscan=False,
+        dilation=1,
     ):
         super().__init__()
 
@@ -194,6 +198,7 @@ def __init__(
             AD_weight_decay=AD_weight_decay,
             BC_layer_norm=BC_layer_norm,
             use_pscan=use_pscan,
+            dilation=dilation,
         )
         self.norm = norm
 
@@ -307,6 +312,7 @@ def __init__(
         AD_weight_decay=False,
         BC_layer_norm=False,
         use_pscan=False,
+        dilation=1,
     ):
         super().__init__()
 
@@ -319,7 +325,10 @@ def __init__(
                 self.pscan = pscan  # Store the imported pscan function
             except ImportError:
                 self.pscan = None  # Set to None if pscan is not available
-                print("The 'mambapy' package is not installed. Please install it by running:\n" "pip install mambapy")
+                print(
+                    "The 'mambapy' package is not installed. Please install it by running:\n"
+                    "pip install mambapy"
+                )
         else:
             self.pscan = None
 
@@ -347,6 +356,7 @@ def __init__(
                 bias=conv_bias,
                 groups=self.d_inner,
                 padding=d_conv - 1,
+                dilation=dilation,
             )
 
         self.dropout = nn.Dropout(dropout)
@@ -375,16 +385,18 @@ def __init__(
         else:
             raise NotImplementedError
 
-        dt_fwd = torch.exp(torch.rand(self.d_inner) * (math.log(dt_max) - math.log(dt_min)) + math.log(dt_min)).clamp(
-            min=dt_init_floor
-        )
+        dt_fwd = torch.exp(
+            torch.rand(self.d_inner) * (math.log(dt_max) - math.log(dt_min))
+            + math.log(dt_min)
+        ).clamp(min=dt_init_floor)
         inv_dt_fwd = dt_fwd + torch.log(-torch.expm1(-dt_fwd))
         with torch.no_grad():
             self.dt_proj_fwd.bias.copy_(inv_dt_fwd)
 
         if self.bidirectional:
             dt_bwd = torch.exp(
-                torch.rand(self.d_inner) * (math.log(dt_max) - math.log(dt_min)) + math.log(dt_min)
+                torch.rand(self.d_inner) * (math.log(dt_max) - math.log(dt_min))
+                + math.log(dt_min)
             ).clamp(min=dt_init_floor)
             inv_dt_bwd = dt_bwd + torch.log(-torch.expm1(-dt_bwd))
             with torch.no_grad():
diff --git a/mambular/arch_utils/rnn_utils.py b/mambular/arch_utils/rnn_utils.py
@@ -21,6 +21,7 @@ def __init__(self, config):
         self.rnn_activation = getattr(config, "rnn_activation", "relu")
         self.d_conv = getattr(config, "d_conv", 4)
         self.residuals = getattr(config, "residuals", False)
+        self.dilation = getattr(config, "dilation", 1)
 
         # Choose RNN layer based on model_type
         rnn_layer = {
@@ -37,7 +38,10 @@ def __init__(self, config):
 
         if self.residuals:
             self.residual_matrix = nn.ParameterList(
-                [nn.Parameter(torch.randn(self.hidden_size, self.hidden_size)) for _ in range(self.num_layers)]
+                [
+                    nn.Parameter(torch.randn(self.hidden_size, self.hidden_size))
+                    for _ in range(self.num_layers)
+                ]
             )
 
         # First Conv1d layer uses input_size
@@ -49,6 +53,7 @@ def __init__(self, config):
                 padding=self.d_conv - 1,
                 bias=self.conv_bias,
                 groups=self.input_size,
+                dilation=self.dilation,
             )
         )
         self.layernorms_conv.append(nn.LayerNorm(self.input_size))
@@ -63,6 +68,7 @@ def __init__(self, config):
                     padding=self.d_conv - 1,
                     bias=self.conv_bias,
                     groups=self.hidden_size,
+                    dilation=self.dilation,
                 )
             )
             self.layernorms_conv.append(nn.LayerNorm(self.hidden_size))
@@ -159,7 +165,10 @@ def __init__(
 
         if self.residuals:
             self.residual_matrix = nn.ParameterList(
-                [nn.Parameter(torch.randn(self.hidden_size, self.hidden_size)) for _ in range(self.num_layers)]
+                [
+                    nn.Parameter(torch.randn(self.hidden_size, self.hidden_size))
+                    for _ in range(self.num_layers)
+                ]
             )
 
         # First Conv1d layer uses input_size
diff --git a/mambular/configs/mambular_config.py b/mambular/configs/mambular_config.py
@@ -22,6 +22,8 @@ class DefaultMambularConfig(BaseConfig):
         Dropout rate for regularization.
     d_conv : int, default=4
         Size of convolution over columns.
+    dilation : int, default=1
+        Dilation factor for the convolution.
     dt_rank : str, default="auto"
         Rank of the decision tree used in the model.
     d_state : int, default=128
@@ -76,6 +78,7 @@ class DefaultMambularConfig(BaseConfig):
     d_model: int = 64
     n_layers: int = 4
     d_conv: int = 4
+    dilation: int = 1
     expand_factor: int = 2
     bias: bool = False
     dropout: float = 0.0
diff --git a/mambular/configs/tabularnn_config.py b/mambular/configs/tabularnn_config.py
@@ -48,6 +48,8 @@ class DefaultTabulaRNNConfig(BaseConfig):
         Size of the feedforward network.
     d_conv : int, default=4
         Size of the convolutional layer for embedding features.
+    dilation : int, default=1
+        Dilation factor for the convolution.
     conv_bias : bool, default=True
         Whether to use bias in the convolutional layers.
     """
@@ -78,4 +80,5 @@ class DefaultTabulaRNNConfig(BaseConfig):
     rnn_activation: str = "relu"
     dim_feedforward: int = 256
     d_conv: int = 4
+    dilation: int = 1
     conv_bias: bool = True