// get some random "other" candidates and score them along with the right one
// -- always use current DA but change trees when computing features
other_trees = [self.vectorizer.transform(self.features.get_features(ttrees[num], {"da": da}))
for num in np.random.choice(len(ttrees), self.train_cands)]
// -- add in some candidates generated using the random planner
// (use the current DA)
if self.random_candgen:
After Change
for ttree_no, da in enumerate(das):
// get some random "other" candidates and score them along with the right one
// -- always use current DA but change trees when computing features
other_idxs = np.random.choice(len(ttrees), self.train_cands)
other_trees = [self.vectorizer.transform(self.features.get_features(ttrees[num], {"da": da}))
for num in other_idxs]
// -- add in some candidates generated using the random planner
// (use the current DA)
if self.random_candgen:
random_doc = self.random_candgen.generate_tree(da)
for _ in xrange(self.train_cands - 1):
self.random_candgen.generate_tree(da, random_doc)
other_trees.extend([self.vectorizer.transform(self.features.get_features(rand_ttree, {"da": da}))
for rand_ttree in ttrees_from_doc(random_doc, self.language,
self.selector)])
cands = [X[ttree_no]] + [cand for cand in other_trees
if not np.array_equal(cand.toarray(),
X[ttree_no].toarray())]
scores = [self._score(cand) for cand in cands]
top_cand_idx = scores.index(max(scores))
// import ipdb
// ipdb.set_trace()
if self.debug_out:
print >> self.debug_out, ("TTREE-NO: %04d, SEL_CAND: %04d, LEN: %02d" % (ttree_no, top_cand_idx, len(cands)))
print >> self.debug_out, "CAND TTREES:"
for num in other_idxs:
print >> self.debug_out, ttrees[num]
print >> self.debug_out, "---RND---"
for ttree in ttrees_from_doc(random_doc, self.language, self.selector):
print >> self.debug_out, ttree
print >> self.debug_out, "SCORES:", ", ".join(["%.3f" % s for s in scores])