if self.max_features < self.min_features:
raise AttributeError("min_features must be <= max_features")
candidates = list(chain(*((combinations(range(X.shape[1]), r=i))
for i in range(self.min_features,
self.max_features + 1))))
self.subsets_ = {}
all_comb = len(candidates)
n_jobs = min(self.n_jobs, all_comb)
parallel = Parallel(n_jobs=n_jobs, pre_dispatch=self.pre_dispatch)
work = enumerate(parallel(delayed(_calc_score)(self, X, y, c)
for c in candidates))
for iteration, (c, cv_scores) in work:
self.subsets_[iteration] = {"feature_idx": c,
"cv_scores": cv_scores,
"avg_score": np.mean(cv_scores)}
if self.print_progress:
sys.stderr.write("\rFeatures: %d/%d" % (
iteration + 1, all_comb))
sys.stderr.flush()
max_score = float("-inf")
for c in self.subsets_:
if self.subsets_[c]["avg_score"] > max_score:
max_score = self.subsets_[c]["avg_score"]
best_subset = c
score = max_score
idx = self.subsets_[best_subset]["feature_idx"]
After Change
if self.max_features < self.min_features:
raise AttributeError("min_features must be <= max_features")
candidates = chain(*((combinations(range(X.shape[1]), r=i))
for i in range(self.min_features,
self.max_features + 1)))
self.subsets_ = {}
def ncr(n, r):
Return the number of combinations of length r from n items.
Parameters
----------
n : {integer}
Total number of items
r : {integer}
Number of items to select from n
Returns
-------
Number of combinations, integer
r = min(r, n-r)
if r == 0:
return 1
numer = reduce(op.mul, range(n, n-r, -1))
denom = reduce(op.mul, range(1, r+1))
return numer//denom
all_comb = np.sum([ncr(n=X.shape[1], r=i)
for i in range(self.min_features,
self.max_features + 1)])
n_jobs = min(self.n_jobs, all_comb)
parallel = Parallel(n_jobs=n_jobs, pre_dispatch=self.pre_dispatch)
work = enumerate(parallel(delayed(_calc_score)(self, X, y, c)
for c in candidates))
for iteration, (c, cv_scores) in work:
self.subsets_[iteration] = {"feature_idx": c,
"cv_scores": cv_scores,
"avg_score": np.mean(cv_scores)}
if self.print_progress:
sys.stderr.write("\rFeatures: %d/%d" % (
iteration + 1, all_comb))
sys.stderr.flush()
max_score = float("-inf")
for c in self.subsets_:
if self.subsets_[c]["avg_score"] > max_score:
max_score = self.subsets_[c]["avg_score"]
best_subset = c
score = max_score
idx = self.subsets_[best_subset]["feature_idx"]