random_state=random_state)
data = pca.fit_transform(data)
if verbose:
print("PCA complete in {:.2f} seconds".format(
time.time() - start))
if verbose:
start = time.time()
print("Calculating KNN...")
// kernel includes self as connection but not in k
// actually search for k+1 neighbors including self
k = k + 1
if alpha_decay and a is not None:
try:
if knn_dist == "precomputed":
pdx = data
else:
pdx = squareform(pdist(data, metric=knn_dist))
knn_dist = np.partition(pdx, k, axis=1)[:, :k]
// bandwidth(x) = distance to k-th neighbor of x
epsilon = np.max(knn_dist, axis=1)
pdx = (pdx / epsilon).T // autotuning d(x,:) using epsilon(x).
except RuntimeWarning:
raise ValueError(
"It looks like you have at least k identical data points. "
"Try removing duplicates.")
kernel = np.exp(-1 * (pdx ** a)) // not really Gaussian kernel
else:
if knn_dist == "precomputed":
// we already have pairwise distances
pdx = knn_dist
knn_idx = np.argpartition(pdx, k, axis=1)[:, :k]
ind_ptr = np.arange(knn_idx.shape[0] + 1) * knn_idx.shape[1]
col_ind = knn_idx.reshape(-1)
ones = np.repeat(1., len(col_ind))
kernel = sparse.csr_matrix((ones, col_ind, ind_ptr),
shape=[data.shape[0], data.shape[0]])
else:
knn = NearestNeighbors(n_neighbors=k,
n_jobs=n_jobs).fit(data)
kernel = knn.kneighbors_graph(data, mode="connectivity")
if verbose:
print("KNN complete in {:.2f} seconds".format(time.time() - start))
kernel = kernel + kernel.T // symmetrization
return kernel
After Change
kernel matrix built from the input data
if knn_dist != "precomputed" and ndim < data.shape[1]:
log_start("PCA")
if sparse.issparse(data):
_, _, VT = randomized_svd(data, ndim,
random_state=random_state)
data = data.dot(VT.T)