Source code for pycrostates.metrics.davies_bouldin
"""Davies Bouldin score."""
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import _safe_indexing, check_X_y
from ..cluster._base import _BaseCluster
from ..utils import _distance_matrix
from ..utils._checks import _check_type
from ..utils._docs import fill_doc
[docs]
@fill_doc
def davies_bouldin_score(cluster): # lower the better
r"""Compute the Davies-Bouldin score.
This function computes the Davies-Bouldin score\ :footcite:p:`Davies-Bouldin` with
:func:`sklearn.metrics.davies_bouldin_score` from a fitted :ref:`Clustering`
instance.
Parameters
----------
%(cluster)s
Returns
-------
score : float
The resulting Davies-Bouldin score.
Notes
-----
For more details regarding the implementation, please refer to
:func:`sklearn.metrics.davies_bouldin_score`. This function was modified in order to
use the absolute spatial correlation for distance computations instead of the
euclidean distance.
References
----------
.. footbibliography::
"""
_check_type(cluster, (_BaseCluster,), item_name="cluster")
cluster._check_fit()
data = cluster._fitted_data
labels = cluster._labels_
keep = np.linalg.norm(data.T, axis=1) != 0
data = data[:, keep]
labels = labels[keep]
# Align polarities just in case..
x = cluster.cluster_centers_[labels].T
sign = np.sign((x.T * data.T).sum(axis=1))
data = data * sign
davies_bouldin_score = _davies_bouldin_score(data.T, labels)
return davies_bouldin_score
def _davies_bouldin_score(X, labels):
"""Compute the Davies-Bouldin score.
Parameters
----------
X : array of shape (n_samples, n_features)
A list of ``n_features``-dimensional data points. Each row corresponds
to a single data point.
labels : array of shape (n_samples,)
Predicted labels for each sample.
Returns
-------
score: float
The resulting Davies-Bouldin score.
"""
X, labels = check_X_y(X, labels)
le = LabelEncoder()
labels = le.fit_transform(labels)
n_labels = len(le.classes_)
intra_dists = np.zeros(n_labels)
centroids = np.zeros((n_labels, len(X[0])), dtype=float)
for k in range(n_labels):
cluster_k = _safe_indexing(X, labels == k)
centroid = cluster_k.mean(axis=0)
centroids[k] = centroid
intra_dists[k] = np.average(_distance_matrix(cluster_k, [centroid]))
centroid_distances = _distance_matrix(centroids)
if np.allclose(intra_dists, 0) or np.allclose(centroid_distances, 0):
return 0.0
centroid_distances[centroid_distances == 0] = np.inf
combined_intra_dists = intra_dists[:, None] + intra_dists
scores = np.max(combined_intra_dists / centroid_distances, axis=1)
return np.mean(scores)