labeled = np.copy(classifier.X_training) if n_training_records > 0 else select_cold_start_instance(unlabeled)
// Add uncertainty scores to our unlabeled data, and keep a copy of our unlabeled data.
unlabeled_uncertainty = np.concatenate((unlabeled, np.expand_dims(uncertainty_scores, axis=1)), axis=1)
unlabeled_uncertainty_copy = np.copy(unlabeled_uncertainty)
// Define our record container and the maximum number of records to sample.
instance_index_ranking = []
ceiling = np.minimum(unlabeled.shape[0], n_instances)
// TODO (dataframing) is there a better way to do this? Inherently sequential.
for _ in range(ceiling):
// Select the instance from our unlabeled copy that scores highest.
raw_instance = select_instance(X_training=labeled, X_uncertainty=unlabeled_uncertainty_copy)
instance = np.expand_dims(raw_instance, axis=1)
// Find our record"s index in both the original unlabeled and our uncertainty copy.
instance_index_original = np.where(np.all(unlabeled == raw_instance, axis=1))[0][0]
instance_index_copy = np.where(np.all(unlabeled_uncertainty_copy[:, :-1] == instance.T, axis=1))[0][0]
// Add our instance we"ve considered for labeling to our labeled set. Although we don"t
// know it"s label, we want further iterations to consider the newly-added instance so