TabR integration + compatibility checking of pre-processing method with plr embedding

bishnukhadka · bishnukhadka · commit 44d05ac3e063 · 2025-03-31T20:10:49.000+05:45
diff --git a/mambular/arch_utils/layer_utils/embedding_layer.py b/mambular/arch_utils/layer_utils/embedding_layer.py
@@ -125,6 +125,8 @@ def __init__(self, num_feature_info, cat_feature_info, emb_feature_info, config)
         if self.layer_norm_after_embedding:
             self.embedding_norm = nn.LayerNorm(self.d_model)
 
+        self.feature_info = (num_feature_info, cat_feature_info, emb_feature_info)
+
     def forward(self, num_features, cat_features, emb_features):
         """Defines the forward pass of the model.
 
@@ -171,6 +173,8 @@ def forward(self, num_features, cat_features, emb_features):
 
         # Process numerical embeddings based on embedding_type
         if self.embedding_type == "plr":
+            # check pre-processing type compatibility with plr
+            self.check_plr_embedding_compatibility(self.feature_info)
             # For PLR, pass all numerical features together
             if num_features is not None:
                 num_features = torch.stack(num_features, dim=1).squeeze(
@@ -226,6 +230,21 @@ def forward(self, num_features, cat_features, emb_features):
             x = self.embedding_dropout(x)
 
         return x
+    
+    def check_plr_embedding_compatibility(self, feature_info:tuple):
+        # List of incompatible preprocessing terms for PLR embedding
+        incompatible_terms = ['ple', 'one-hot', 'polynomial', 'splines', 'sigmoid', 'rbf']
+        
+        # Iterate through each dictionary in the tuple (data)
+        for sub_dict in feature_info:
+            # Iterate through each feature in the current dictionary
+            for feature, properties in sub_dict.items():
+                preprocessing = properties.get('preprocessing', '')
+                
+                # Check for incompatible terms in the preprocessing string
+                for term in incompatible_terms:
+                    if term in preprocessing:
+                        raise ValueError(f"PLR embedding type doesn't work with the '{term}' pre-processing method.\n")
 
 
 class OneHotEncoding(nn.Module):
diff --git a/mambular/base_models/utils/lightning_wrapper.py b/mambular/base_models/utils/lightning_wrapper.py
@@ -277,7 +277,8 @@ def validation_step(self, batch, batch_idx):  # type: ignore
         data, labels = batch
         if hasattr(self.estimator, "validate_with_candidates") and self.train_features is not None:
             preds = self.estimator.validate_with_candidates(
-                *data, candidate_x=self.train_features, candidate_y=self.train_targets
+                                                    *data, 
+                                                    candidate_x=self.train_features, candidate_y=self.train_targets
             )
         else:
             preds = self(*data)