Source code for pycrostates.metrics.dunn

"""Dunn score."""

import numpy as np

from ..cluster._base import _BaseCluster
from ..utils import _distance_matrix
from ..utils._checks import _check_type
from ..utils._docs import fill_doc


[docs] @fill_doc def dunn_score(cluster): # higher the better r"""Compute the Dunn index score. This function computes the Dunn index score\ :footcite:p:`Dunn` from a fitted :ref:`Clustering` instance. Parameters ---------- %(cluster)s Returns ------- score : float The resulting Dunn score. Notes ----- This function uses the absolute spatial correlation for distance. References ---------- .. footbibliography:: """ _check_type(cluster, (_BaseCluster,), item_name="cluster") cluster._check_fit() data = cluster._fitted_data labels = cluster._labels_ keep = np.linalg.norm(data.T, axis=1) != 0 data = data[:, keep] labels = labels[keep] score = _dunn_score(data.T, labels) return score
def _dunn_score(X, labels): # higher the better """Compute the Dunn index. Parameters ---------- X : np.array np.array([N, p]) of all points labels: np.array np.array([N]) labels of all points Notes ----- Based on https://github.com/jqmviegas/jqm_cvi """ distances = _distance_matrix(X) ks = np.sort(np.unique(labels)) deltas = np.ones([len(ks), len(ks)]) * 1000000 big_deltas = np.zeros([len(ks), 1]) for i, ks_i in enumerate(ks): for j, ks_j in enumerate(ks): if i == j: continue # skip diagonal deltas[i, j] = _delta_fast((labels == ks_i), (labels == ks_j), distances) big_deltas[i] = _big_delta_fast((labels == ks_i), distances) di = np.min(deltas) / np.max(big_deltas) return di def _delta_fast(ck, cl, distances): values = distances[np.where(ck)][:, np.where(cl)] values = values[np.nonzero(values)] return np.min(values) def _big_delta_fast(ci, distances): values = distances[np.where(ci)][:, np.where(ci)] return np.max(values)