if self.refit:
self.regr_ = [clone(clf) for clf in self.regressors]
self.meta_regr_ = clone(self.meta_regressor)
else:
self.regr_ = self.regressors
self.meta_regr_ = self.meta_regressor
kfold = check_cv(self.cv, y)
if isinstance(self.cv, int):
// Override shuffle parameter in case of self generated
// cross-validation strategy
kfold.shuffle = self.shuffle
meta_features = np.zeros((X.shape[0], len(self.regressors)))
//
// The outer loop iterates over the base-regressors. Each regressor
// is trained cv times and makes predictions, after which we train
// the meta-regressor on their combined results.
//
for i, regr in enumerate(self.regressors):
//
// In the inner loop, each model is trained cv times on the
// training-part of this fold of data; and the holdout-part of data
// is used for predictions. This is repeated cv times, so in
// the end we have predictions for each data point.
//
// Advantage of this complex approach is that data points we"re
// predicting have not been trained on by the algorithm, so it"s
// less susceptible to overfitting.
//
for train_idx, holdout_idx in kfold.split(X, y, groups):
instance = clone(regr)
instance.fit(X[train_idx], y[train_idx])
y_pred = instance.predict(X[holdout_idx])
meta_features[holdout_idx, i] = y_pred
// save meta-features for training data
if self.store_train_meta_features:
self.train_meta_features_ = meta_features
// Train meta-model on the out-of-fold predictions
if not self.use_features_in_secondary:
self.meta_regr_.fit(meta_features, y)
elif sparse.issparse(X):
self.meta_regr_.fit(sparse.hstack((X, meta_features)), y)
else:
self.meta_regr_.fit(np.hstack((X, meta_features)), y)