Source code for lipyds.leafletfinder.grouping

"""
Grouping methods
================

Classes
-------

.. autoclass:: GraphMethod
    :members:

.. autoclass:: SpectralClusteringMethod
    :members:
"""

import abc
import warnings
from typing import Optional, List

from MDAnalysis.core.groups import AtomGroup
from MDAnalysis.analysis.distances import contact_matrix
import numpy as np
from numpy.typing import ArrayLike

from ..lib.mdautils import (get_centers_by_residue, get_orientations,
                            get_distances_with_projection)


class GroupingMethod(abc.ABC):
    def __init__(self, headgroups: AtomGroup,
                 tailgroups: Optional[AtomGroup]=None,
                 cutoff: float=15.0, pbc: bool=False):
        self.headgroups = headgroups
        if tailgroups is None:
            tailgroups = self.headgroups.residues.atoms - headgroups
        self.tailgroups = tailgroups
        self.n_residues = len(self.headgroups.residues)
        self.cutoff = cutoff
        self.pbc = pbc
        if pbc:
            self.get_box = lambda: self.headgroups.universe.dimensions
        else:
            self.get_box = lambda: None

    @abc.abstractmethod
    def run(self, **kwargs) -> List[List[int]]:
        raise NotImplementedError()


[docs]
class GraphMethod(GroupingMethod):

    def __init__(self, headgroups: AtomGroup,
                 tailgroups: Optional[AtomGroup]=None,
                 cutoff: float=15.0, pbc: bool=False,
                 sparse: Optional[bool]=None, **kwargs):
        try:
            import networkx as nx
        except ImportError:
            raise ImportError("networkx is required to use this method "
                            "but is not installed. Install it with "
                            "`conda install networkx` or "
                            "`pip install networkx`.") from None
        super().__init__(headgroups, tailgroups=tailgroups,
                         cutoff=cutoff, pbc=pbc)
        self.sparse = sparse
        self.returntype = "numpy" if not sparse else "sparse"

    def run(self, **kwargs) -> List[List[int]]:
        import networkx as nx
        box = self.get_box()
        coordinates = get_centers_by_residue(self.headgroups, box=box)
        try:
            adj = contact_matrix(coordinates, cutoff=self.cutoff, box=box,
                                returntype=self.returntype)
        except ValueError as exc:
            if sparse is None:
                warnings.warn("NxN matrix is too big. Switching to sparse "
                            "matrix method")
                adj = contact_matrix(coordinates, cutoff=cutoff, box=box,
                                    returntype="sparse")
            elif sparse is False:
                raise ValueError("NxN matrix is too big. "
                                "Use `sparse=True`") from None
            else:
                raise exc 
        graph = nx.Graph(adj)
        groups = [list(c) for c in nx.connected_components(graph)]
        return groups




[docs]
class SpectralClusteringMethod(GroupingMethod):
    def __init__(self, headgroups: AtomGroup,
                 tailgroups: Optional[AtomGroup]=None,
                 cutoff: float=30.0, pbc: bool=False,
                 n_leaflets: int=2, delta: Optional[float]=20,
                 cosine_threshold: float=0.8,
                 angle_factor: float=1, **kwargs):
        
        try:
            import sklearn.cluster as skc
        except ImportError:
            raise ImportError('scikit-learn is required to use this method '
                            'but is not installed. Install it with `conda '
                            'install scikit-learn` or `pip install '
                            'scikit-learn`.') from None

        super().__init__(headgroups, tailgroups=tailgroups,
                         cutoff=cutoff, pbc=pbc)
        self.delta = delta
        self.cosine_threshold = cosine_threshold
        self.angle_factor = angle_factor
        self.predictor = skc.SpectralClustering(n_clusters=n_leaflets,
                                                affinity="precomputed",
                                                **kwargs)
    
    def _get_kernel(self) -> ArrayLike:
        box = self.get_box()
        
        coordinates = get_centers_by_residue(self.headgroups, box=box)
        orientations = get_orientations(self.headgroups,
                                        tailgroups=self.tailgroups,
                                        box=box, normalize=True,
                                        headgroup_centers=coordinates)
        dist_mat = get_distances_with_projection(coordinates, orientations,
                                                 self.cutoff, box=box,
                                                 angle_factor=self.angle_factor)
        delta = self.delta or np.max(dist_mat[dist_mat < self.cutoff*2]) / 3

        gau = np.exp(- dist_mat ** 2 / (2. * delta ** 2))
        # reasonably acute/obtuse angles are acute/obtuse anough
        angles = np.dot(orientations, orientations.T)
        cos = np.clip(angles, -self.cosine_threshold, self.cosine_threshold)
        cos += self.cosine_threshold
        cos /= (2*self.cosine_threshold)
        mask = ~np.isnan(cos)
        gau[mask] *= cos[mask]
        return gau

    def run(self, **kwargs) -> List[List[int]]:
        kernel = self._get_kernel()
        data_labels = self.predictor.fit_predict(kernel)
        ix = np.argsort(data_labels)
        indices = np.arange(self.n_residues)
        splix = np.where(np.ediff1d(data_labels[ix]))[0] + 1
        cluster_indices = np.split(indices[ix], splix)
        return cluster_indices