import numpy as np
from scipy.sparse import csgraph
from numpy import linalg as LA
from scipy.spatial.distance import pdist, squareform
from matplotlib import pyplot as plt
[docs]def getAffinityMatrix(coordinates, n_cluster = 10):
"""
Calculate affinity matrix based on input coordinates matrix and the numeber
of nearest neighbours.
Apply local scaling based on the k nearest neighbour
References:
https://papers.nips.cc/paper/2619-self-tuning-spectral-clustering.pdf
"""
# calculate euclidian distance matrix
dists = squareform(pdist(coordinates))
# for each row, sort the distances ascendingly and take the index of the
#k-th position (nearest neighbour)
knn_distances = np.sort(dists, axis=0)[n_cluster]
knn_distances = knn_distances[np.newaxis].T
# calculate sigma_i * sigma_j
local_scale = knn_distances.dot(knn_distances.T)
affinity_matrix = dists * dists
affinity_matrix = -affinity_matrix / local_scale
# divide square distance matrix by local scale
affinity_matrix[np.where(np.isnan(affinity_matrix))] = 0.0
# apply exponential
affinity_matrix = np.exp(affinity_matrix)
np.fill_diagonal(affinity_matrix, 0)
return affinity_matrix
[docs]def eigenDecomposition(A, plot = True, topK = 5):
"""
This method performs the eigen decomposition on a given affinity matrix
Parameters:
-----------
A:
Affinity matrix
plot:
plots the sorted eigen values for visual inspection
Returns:
--------
A tuple containing:
- the optimal number of clusters by eigengap heuristic
- all eigen values
- all eigen vectors
References:
https://papers.nips.cc/paper/2619-self-tuning-spectral-clustering.pdf
http://www.kyb.mpg.de/fileadmin/user_upload/files/publications/attachments/Luxburg07_tutorial_4488%5b0%5d.pdf
"""
L = csgraph.laplacian(A, normed=True)
# LM parameter : Eigenvalues with largest magnitude (eigs, eigsh), that is, largest eigenvalues in
# the euclidean norm of complex numbers.
eigenvalues, eigenvectors = LA.eig(L)
if plot:
plt.title('Largest eigen values of input matrix')
plt.scatter(np.arange(len(eigenvalues)), eigenvalues)
plt.grid()
plt.show()
plt.close()
# Identify the optimal number of clusters as the index corresponding
# to the larger gap between eigen values
index_largest_gap = np.argsort(np.diff(eigenvalues))[::-1][:topK]
nb_clusters = index_largest_gap + 1
return nb_clusters, eigenvalues, eigenvectors