Skip to content

Commit 3eab247

Browse files
authored
Merge pull request #48 from basf/restructure
improved documentation
2 parents 7813141 + c9f8c16 commit 3eab247

18 files changed

Lines changed: 1708 additions & 30 deletions

mambular/configs/fttransformer_config.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,61 @@
44

55
@dataclass
66
class DefaultFTTransformerConfig:
7+
"""
8+
Configuration class for the default FT Transformer model with predefined hyperparameters.
9+
10+
Parameters
11+
----------
12+
lr : float, default=1e-04
13+
Learning rate for the optimizer.
14+
lr_patience : int, default=10
15+
Number of epochs with no improvement after which learning rate will be reduced.
16+
weight_decay : float, default=1e-06
17+
Weight decay (L2 penalty) for the optimizer.
18+
lr_factor : float, default=0.1
19+
Factor by which the learning rate will be reduced.
20+
d_model : int, default=64
21+
Dimensionality of the model.
22+
n_layers : int, default=8
23+
Number of layers in the transformer.
24+
n_heads : int, default=4
25+
Number of attention heads in the transformer.
26+
attn_dropout : float, default=0.3
27+
Dropout rate for the attention mechanism.
28+
ff_dropout : float, default=0.3
29+
Dropout rate for the feed-forward layers.
30+
norm : str, default="RMSNorm"
31+
Normalization method to be used.
32+
activation : callable, default=nn.SELU()
33+
Activation function for the transformer.
34+
num_embedding_activation : callable, default=nn.Identity()
35+
Activation function for numerical embeddings.
36+
head_layer_sizes : list, default=(128, 64, 32)
37+
Sizes of the layers in the head of the model.
38+
head_dropout : float, default=0.5
39+
Dropout rate for the head layers.
40+
head_skip_layers : bool, default=False
41+
Whether to skip layers in the head.
42+
head_activation : callable, default=nn.SELU()
43+
Activation function for the head layers.
44+
head_use_batch_norm : bool, default=False
45+
Whether to use batch normalization in the head layers.
46+
layer_norm_after_embedding : bool, default=False
47+
Whether to apply layer normalization after embedding.
48+
pooling_method : str, default="cls"
49+
Pooling method to be used ('cls', 'avg', etc.).
50+
norm_first : bool, default=False
51+
Whether to apply normalization before other operations in each transformer block.
52+
bias : bool, default=True
53+
Whether to use bias in the linear layers.
54+
transformer_activation : callable, default=nn.SELU()
55+
Activation function for the transformer layers.
56+
layer_norm_eps : float, default=1e-05
57+
Epsilon value for layer normalization.
58+
transformer_dim_feedforward : int, default=512
59+
Dimensionality of the feed-forward layers in the transformer.
60+
"""
61+
762
lr: float = 1e-04
863
lr_patience: int = 10
964
weight_decay: float = 1e-06

mambular/configs/mambular_config.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,73 @@
44

55
@dataclass
66
class DefaultMambularConfig:
7+
"""
8+
Configuration class for the Default Mambular model with predefined hyperparameters.
9+
10+
Parameters
11+
----------
12+
lr : float, default=1e-04
13+
Learning rate for the optimizer.
14+
lr_patience : int, default=10
15+
Number of epochs with no improvement after which learning rate will be reduced.
16+
weight_decay : float, default=1e-06
17+
Weight decay (L2 penalty) for the optimizer.
18+
lr_factor : float, default=0.1
19+
Factor by which the learning rate will be reduced.
20+
d_model : int, default=64
21+
Dimensionality of the model.
22+
n_layers : int, default=8
23+
Number of layers in the model.
24+
expand_factor : int, default=2
25+
Expansion factor for the feed-forward layers.
26+
bias : bool, default=False
27+
Whether to use bias in the linear layers.
28+
d_conv : int, default=16
29+
Dimensionality of the convolutional layers.
30+
conv_bias : bool, default=True
31+
Whether to use bias in the convolutional layers.
32+
dropout : float, default=0.05
33+
Dropout rate for regularization.
34+
dt_rank : str, default="auto"
35+
Rank of the decision tree.
36+
d_state : int, default=32
37+
Dimensionality of the state in recurrent layers.
38+
dt_scale : float, default=1.0
39+
Scaling factor for decision tree.
40+
dt_init : str, default="random"
41+
Initialization method for decision tree.
42+
dt_max : float, default=0.1
43+
Maximum value for decision tree initialization.
44+
dt_min : float, default=1e-04
45+
Minimum value for decision tree initialization.
46+
dt_init_floor : float, default=1e-04
47+
Floor value for decision tree initialization.
48+
norm : str, default="RMSNorm"
49+
Normalization method to be used.
50+
activation : callable, default=nn.SELU()
51+
Activation function for the model.
52+
num_embedding_activation : callable, default=nn.Identity()
53+
Activation function for numerical embeddings.
54+
head_layer_sizes : list, default=(128, 64, 32)
55+
Sizes of the layers in the head of the model.
56+
head_dropout : float, default=0.5
57+
Dropout rate for the head layers.
58+
head_skip_layers : bool, default=False
59+
Whether to skip layers in the head.
60+
head_activation : callable, default=nn.SELU()
61+
Activation function for the head layers.
62+
head_use_batch_norm : bool, default=False
63+
Whether to use batch normalization in the head layers.
64+
layer_norm_after_embedding : bool, default=False
65+
Whether to apply layer normalization after embedding.
66+
pooling_method : str, default="avg"
67+
Pooling method to be used ('avg', 'max', etc.).
68+
bidirectional : bool, default=False
69+
Whether to use bidirectional processing of the input sequences.
70+
use_learnable_interaction : bool, default=False
71+
Whether to use learnable feature interactions before passing through mamba blocks.
72+
"""
73+
774
lr: float = 1e-04
875
lr_patience: int = 10
976
weight_decay: float = 1e-06

mambular/configs/mlp_config.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,39 @@
44

55
@dataclass
66
class DefaultMLPConfig:
7+
"""
8+
Configuration class for the default Multi-Layer Perceptron (MLP) model with predefined hyperparameters.
9+
10+
Parameters
11+
----------
12+
lr : float, default=1e-04
13+
Learning rate for the optimizer.
14+
lr_patience : int, default=10
15+
Number of epochs with no improvement after which learning rate will be reduced.
16+
weight_decay : float, default=1e-06
17+
Weight decay (L2 penalty) for the optimizer.
18+
lr_factor : float, default=0.1
19+
Factor by which the learning rate will be reduced.
20+
layer_sizes : list, default=(128, 128, 32)
21+
Sizes of the layers in the MLP.
22+
activation : callable, default=nn.SELU()
23+
Activation function for the MLP layers.
24+
skip_layers : bool, default=False
25+
Whether to skip layers in the MLP.
26+
dropout : float, default=0.5
27+
Dropout rate for regularization.
28+
norm : str, default=None
29+
Normalization method to be used, if any.
30+
use_glu : bool, default=False
31+
Whether to use Gated Linear Units (GLU) in the MLP.
32+
skip_connections : bool, default=False
33+
Whether to use skip connections in the MLP.
34+
batch_norm : bool, default=False
35+
Whether to use batch normalization in the MLP layers.
36+
layer_norm : bool, default=False
37+
Whether to use layer normalization in the MLP layers.
38+
"""
39+
740
lr: float = 1e-04
841
lr_patience: int = 10
942
weight_decay: float = 1e-06

mambular/configs/resnet_config.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,41 @@
44

55
@dataclass
66
class DefaultResNetConfig:
7+
"""
8+
Configuration class for the default ResNet model with predefined hyperparameters.
9+
10+
Parameters
11+
----------
12+
lr : float, default=1e-04
13+
Learning rate for the optimizer.
14+
lr_patience : int, default=10
15+
Number of epochs with no improvement after which learning rate will be reduced.
16+
weight_decay : float, default=1e-06
17+
Weight decay (L2 penalty) for the optimizer.
18+
lr_factor : float, default=0.1
19+
Factor by which the learning rate will be reduced.
20+
layer_sizes : list, default=(128, 128, 32)
21+
Sizes of the layers in the ResNet.
22+
activation : callable, default=nn.SELU()
23+
Activation function for the ResNet layers.
24+
skip_layers : bool, default=False
25+
Whether to skip layers in the ResNet.
26+
dropout : float, default=0.5
27+
Dropout rate for regularization.
28+
norm : str, default=None
29+
Normalization method to be used, if any.
30+
use_glu : bool, default=False
31+
Whether to use Gated Linear Units (GLU) in the ResNet.
32+
skip_connections : bool, default=True
33+
Whether to use skip connections in the ResNet.
34+
batch_norm : bool, default=True
35+
Whether to use batch normalization in the ResNet layers.
36+
layer_norm : bool, default=False
37+
Whether to use layer normalization in the ResNet layers.
38+
num_blocks : int, default=3
39+
Number of residual blocks in the ResNet.
40+
"""
41+
742
lr: float = 1e-04
843
lr_patience: int = 10
944
weight_decay: float = 1e-06

mambular/configs/tabtransformer_config.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,61 @@
44

55
@dataclass
66
class DefaultTabTransformerConfig:
7+
"""
8+
Configuration class for the default Tab Transformer model with predefined hyperparameters.
9+
10+
Parameters
11+
----------
12+
lr : float, default=1e-04
13+
Learning rate for the optimizer.
14+
lr_patience : int, default=10
15+
Number of epochs with no improvement after which learning rate will be reduced.
16+
weight_decay : float, default=1e-06
17+
Weight decay (L2 penalty) for the optimizer.
18+
lr_factor : float, default=0.1
19+
Factor by which the learning rate will be reduced.
20+
d_model : int, default=64
21+
Dimensionality of the model.
22+
n_layers : int, default=8
23+
Number of layers in the transformer.
24+
n_heads : int, default=4
25+
Number of attention heads in the transformer.
26+
attn_dropout : float, default=0.3
27+
Dropout rate for the attention mechanism.
28+
ff_dropout : float, default=0.3
29+
Dropout rate for the feed-forward layers.
30+
norm : str, default="RMSNorm"
31+
Normalization method to be used.
32+
activation : callable, default=nn.SELU()
33+
Activation function for the transformer.
34+
num_embedding_activation : callable, default=nn.Identity()
35+
Activation function for numerical embeddings.
36+
head_layer_sizes : list, default=(128, 64, 32)
37+
Sizes of the layers in the head of the model.
38+
head_dropout : float, default=0.5
39+
Dropout rate for the head layers.
40+
head_skip_layers : bool, default=False
41+
Whether to skip layers in the head.
42+
head_activation : callable, default=nn.SELU()
43+
Activation function for the head layers.
44+
head_use_batch_norm : bool, default=False
45+
Whether to use batch normalization in the head layers.
46+
layer_norm_after_embedding : bool, default=False
47+
Whether to apply layer normalization after embedding.
48+
pooling_method : str, default="avg"
49+
Pooling method to be used ('cls', 'avg', etc.).
50+
norm_first : bool, default=True
51+
Whether to apply normalization before other operations in each transformer block.
52+
bias : bool, default=True
53+
Whether to use bias in the linear layers.
54+
transformer_activation : callable, default=nn.SELU()
55+
Activation function for the transformer layers.
56+
layer_norm_eps : float, default=1e-05
57+
Epsilon value for layer normalization.
58+
transformer_dim_feedforward : int, default=512
59+
Dimensionality of the feed-forward layers in the transformer.
60+
"""
61+
762
lr: float = 1e-04
863
lr_patience: int = 10
964
weight_decay: float = 1e-06

0 commit comments

Comments
 (0)