docs_score = np.round(sorted_docs_np[:, 1].astype(float), decimals=4)
// pack the scores and names in data_frame
score_name_data_frame = pd.DataFrame(docs_score.reshape(
docs_score.size, 1), index=docs_name, columns=["Cosine similarity"])
return score_name_data_frame
After Change
dist = 1 - cosine_similarity(final_matrix)
// get an array of file index in filemanager.files
num_row = len(dtm_data_frame.index)
other_file_indexes = np.asarray([file_index for file_index in range(
num_row)if file_index != comp_file_index])
// construct an array of scores
docs_score_array = np.asarray([dist[file_index, comp_file_index]
for file_index in other_file_indexes])
// construct an array of names
docs_name_array = np.asarray([temp_labels[i] for i in other_file_indexes])
// sort the score array
sorted_score_array = np.sort(docs_score_array)
// round the score array to 4 decimals
final_score_array = np.round(sorted_score_array, decimals=4)
// sort the
final_name_array = docs_name_array[docs_score_array.argsort()]