// cross-validation strategy
kfold.shuffle = self.shuffle
meta_features = np.zeros((X.shape[0], len(self.regressors)))
//
// The outer loop iterates over the base-regressors. Each regressor
// is trained cv times and makes predictions, after which we train
// the meta-regressor on their combined results.
//
for i, regr in enumerate(self.regressors):
//
// In the inner loop, each model is trained cv times on the
// training-part of this fold of data; and the holdout-part of data
// is used for predictions. This is repeated cv times, so in
// the end we have predictions for each data point.
//
// Advantage of this complex approach is that data points we"re
// predicting have not been trained on by the algorithm, so it"s
// less susceptible to overfitting.
//
for train_idx, holdout_idx in kfold.split(X, y, groups):
instance = clone(regr)
if sample_weight is None:
instance.fit(X[train_idx], y[train_idx])
else:
instance.fit(X[train_idx], y[train_idx],
sample_weight=sample_weight[train_idx])
y_pred = instance.predict(X[holdout_idx])
meta_features[holdout_idx, i] = y_pred
// save meta-features for training data
if self.store_train_meta_features: