def _score(self, X, y):
for switch in self.mapping:
// Get column name (can be anything: str, number,...)
column = switch.get("col")
// Score the column
transformed_column = pd.Series([np.nan] * X.shape[0], name=column)
for val in switch.get("woe"):
transformed_column.loc[X[column] == val] = switch.get("woe")[val] // THIS LINE IS SLOW
// Replace missing values only in the computed columns
if self.impute_missing:
if self.handle_unknown == "impute":
transformed_column.fillna(0, inplace=True)
elif self.handle_unknown == "error":
missing = transformed_column.isnull()
if any(missing):
raise ValueError("Unexpected categories found in column %s" % switch.get("col"))
// Randomization is meaningful only for training data -> we do it only if y is present
After Change
def _score(self, X, y):
for col in self.cols:
// Score the column
X[col] = X[col].map(self.mapping[col])
// Replace missing values only in the computed columns
if self.impute_missing:
if self.handle_unknown == "impute":