Source code for pycrostates.metrics.dunn
"""Dunn score."""
import numpy as np
from ..cluster._base import _BaseCluster
from ..utils import _distance_matrix
from ..utils._checks import _check_type
from ..utils._docs import fill_doc
[docs]
@fill_doc
def dunn_score(cluster): # higher the better
r"""Compute the Dunn index score.
This function computes the Dunn index score\ :footcite:p:`Dunn` from a
fitted :ref:`Clustering` instance.
Parameters
----------
%(cluster)s
Returns
-------
score : float
The resulting Dunn score.
Notes
-----
This function uses the absolute spatial correlation for distance.
References
----------
.. footbibliography::
"""
_check_type(cluster, (_BaseCluster,), item_name="cluster")
cluster._check_fit()
data = cluster._fitted_data
labels = cluster._labels_
keep = np.linalg.norm(data.T, axis=1) != 0
data = data[:, keep]
labels = labels[keep]
score = _dunn_score(data.T, labels)
return score
def _dunn_score(X, labels): # higher the better
"""Compute the Dunn index.
Parameters
----------
X : np.array
np.array([N, p]) of all points
labels: np.array
np.array([N]) labels of all points
Notes
-----
Based on https://github.com/jqmviegas/jqm_cvi
"""
distances = _distance_matrix(X)
ks = np.sort(np.unique(labels))
deltas = np.ones([len(ks), len(ks)]) * 1000000
big_deltas = np.zeros([len(ks), 1])
for i, ks_i in enumerate(ks):
for j, ks_j in enumerate(ks):
if i == j:
continue # skip diagonal
deltas[i, j] = _delta_fast((labels == ks_i), (labels == ks_j), distances)
big_deltas[i] = _big_delta_fast((labels == ks_i), distances)
di = np.min(deltas) / np.max(big_deltas)
return di
def _delta_fast(ck, cl, distances):
values = distances[np.where(ck)][:, np.where(cl)]
values = values[np.nonzero(values)]
return np.min(values)
def _big_delta_fast(ci, distances):
values = distances[np.where(ci)][:, np.where(ci)]
return np.max(values)