Source code for gradec.segmentation

"""Segmentation module."""

from abc import ABCMeta

import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances

SIGMA_0 = 1


class Segmentation(metaclass=ABCMeta):
    """Base class for segmentation methods.

    Parameters
    ----------
    n_segments : int
        Total number of segments.

    Attributes
    ----------
    multidimensional : bool
        Whether the input is multidimensional.
    """

    def __init__(self, n_segments=5):
        self.n_segments = n_segments
        self.multidimensional = False

    def fit(self, gradients):
        """Fit Segmentation to gradients."""

    def transform(self):
        """Transform gradients to maps."""
        grad_maps = []
        for segment_i, (segment, peak) in enumerate(zip(self.segments, self.peaks)):
            mask = self.labels == segment_i
            segment_val = segment[mask]

            distances = pairwise_distances(
                segment_val, peak.reshape(1, -1), metric="euclidean"
            ).flatten()

            mean_dist = np.mean(distances)
            sigma = mean_dist * SIGMA_0
            affinity = np.exp(-(distances**2) / (2 * sigma**2))

            pseudo_act_map = np.zeros(segment.shape[0], dtype=segment.dtype)
            pseudo_act_map[mask] = np.array(affinity)

            grad_maps.append(pseudo_act_map)

        return grad_maps


[docs] class PCTSegmentation(Segmentation): """Percentile-based segmentation. This method implements the original method described in (Margulies et al., 2016) in which the whole-brain gradients is segmented into equidistant gradients segments. """
[docs] def fit(self, gradients): """Fit Segmentation to gradient. Parameters ---------- gradients : numpy.ndarray Gradients vector. Attributes ---------- segments : list of numpy.ndarray List with thresholded gradients maps. labels : L boundaries : peaks : """ if gradients.ndim > 1: raise ValueError("PCTSegmentation only supports 1D gradients.") else: # Reshape gradients to 2D to avoid issues with pairwise_distances in transform gradients = gradients.reshape(-1, 1) step = 100 / self.n_segments bins = [(i, i + step) for i in range(0, int(100 - step + 1), int(step))] segments = [] labels = [] labels = np.zeros(gradients.shape[0], dtype=int) boundaries = [gradients.min()] peaks = [] for i, bin in enumerate(bins): # Get boundary points min_, max_ = np.percentile(gradients[np.where(gradients)], bin) # Threshold gradients map based on bin, but don’t binarize. mask = (gradients >= min_) & (gradients <= max_) mask = mask.flatten() thresh_arr = gradients.copy() thresh_arr[~mask] = 0 segments.append(thresh_arr) labels[mask] = i boundaries.append(max_) # Peak activation = median 50th percentile of the segment peaks.append(np.median(thresh_arr[mask])) # Replace the first and last peaks with the min and max of the gradients peaks[0], peaks[-1] = gradients.min(), gradients.max() self.segments = segments self.labels = labels self.boundaries = boundaries self.peaks = peaks return self
[docs] class KMeansSegmentation(Segmentation): """KMeans-based segmentation. This method relies on 1D k-means clustering, which has previously been used to define clusters of functional connectivity matrices to establish a brain-wide parcellation. """
[docs] def fit(self, gradients): """Fit Segmentation to gradients. Parameters ---------- gradients : (V x D) numpy.ndarray Gradients vector, where V is the number of vertices and D is the number of dimensions. Attributes ---------- segments : list of numpy.ndarray List with thresholded gradients maps. labels : boundaries : peaks : """ if gradients.ndim > 1 and gradients.shape[1] > 1: self.multidimensional = True else: gradients = gradients.reshape(-1, 1) kmeans_model = KMeans( n_clusters=self.n_segments, init="k-means++", n_init=10, random_state=0, algorithm="elkan", ).fit(gradients) # Get order mapper from map_peaks peaks_unsrt = kmeans_model.cluster_centers_ map_peaks = peaks_unsrt[:, 0].flatten() # Order base on principal gradient order_idx = np.argsort(map_peaks) order_mapper = np.zeros_like(order_idx) order_mapper[order_idx] = np.arange(self.n_segments) # Reorder labels based on map_peaks order labels = order_mapper[kmeans_model.labels_] peaks = peaks_unsrt[order_idx, :] # Save the boundaries of the segments only for 1D case boundaries = [] if self.multidimensional else [gradients.min()] segments = [] for i in range(self.n_segments): map_arr = np.zeros_like(gradients) map_arr[labels == i] = gradients[labels == i] segments.append(map_arr) if not self.multidimensional: boundaries.append(gradients[labels == i].max()) if not self.multidimensional: # Replace the first and last peaks with the min and max of the gradients for 1D peaks[0], peaks[-1] = gradients.min(), gradients.max() self.segments = segments self.labels = labels self.boundaries = boundaries self.peaks = peaks return self