@@ -57,10 +57,7 @@ def fit(self, X, y=None):
5757 self: Returns the instance itself.
5858 """
5959 # Fit should determine the mapping from original categories to sequential integers starting from 0
60- self .mapping_ = [
61- {category : i + 1 for i , category in enumerate (np .unique (col ))}
62- for col in X .T
63- ]
60+ self .mapping_ = [{category : i + 1 for i , category in enumerate (np .unique (col ))} for col in X .T ]
6461 for mapping in self .mapping_ :
6562 mapping [None ] = 0 # Assign 0 to unknown values
6663 return self
@@ -75,12 +72,7 @@ def transform(self, X):
7572 X_transformed (ndarray of shape (n_samples, n_features)): The transformed data with integer values.
7673 """
7774 # Transform the categories to their mapped integer values
78- X_transformed = np .array (
79- [
80- [self .mapping_ [col ].get (value , 0 ) for col , value in enumerate (row )]
81- for row in X
82- ]
83- )
75+ X_transformed = np .array ([[self .mapping_ [col ].get (value , 0 ) for col , value in enumerate (row )] for row in X ])
8476 return X_transformed
8577
8678 def get_feature_names_out (self , input_features = None ):
@@ -122,9 +114,7 @@ def fit(self, X, y=None):
122114 Returns:
123115 self: Returns the instance itself.
124116 """
125- self .max_bins_ = (
126- np .max (X , axis = 0 ).astype (int ) + 1
127- ) # Find the maximum bin index for each feature
117+ self .max_bins_ = np .max (X , axis = 0 ).astype (int ) + 1 # Find the maximum bin index for each feature
128118 return self
129119
130120 def transform (self , X ):
@@ -207,9 +197,7 @@ def get_feature_names_out(self, input_features=None):
207197 feature_names (array of shape (n_features,)): The original feature names.
208198 """
209199 if input_features is None :
210- raise ValueError (
211- "input_features must be provided to generate feature names."
212- )
200+ raise ValueError ("input_features must be provided to generate feature names." )
213201 return np .array (input_features )
214202
215203
@@ -243,10 +231,10 @@ def __init__(self, model_name="paraphrase-MiniLM-L3-v2", model=None):
243231 from sentence_transformers import SentenceTransformer
244232
245233 self .model = SentenceTransformer (model_name )
246- except ImportError :
234+ except ImportError as e :
247235 raise ImportError (
248236 "sentence-transformers is not installed. Install it via `pip install sentence-transformers` or provide a preloaded model."
249- )
237+ ) from e
250238
251239 def fit (self , X , y = None ):
252240 """Fit method (not required for a transformer but included for compatibility)."""
@@ -264,13 +252,11 @@ def transform(self, X):
264252 - A 2D numpy array with embeddings for each text input.
265253 """
266254 if isinstance (X , np .ndarray ):
267- X = (
268- X .flatten ().astype (str ).tolist ()
269- ) # Convert to a list of strings if passed as an array
255+ X = X .flatten ().astype (str ).tolist () # Convert to a list of strings if passed as an array
270256 elif isinstance (X , list ):
271257 X = [str (x ) for x in X ] # Ensure everything is a string
272258
273- embeddings = self .model . encode (
274- X , convert_to_numpy = True
275- ) # Get sentence embeddings
259+ if self .model is None :
260+ raise ValueError ( "Model is not initialized. Ensure that the model is properly loaded." )
261+ embeddings = self . model . encode ( X , convert_to_numpy = True ) # Get sentence embeddings
276262 return embeddings
0 commit comments