Source code for peccary.core

"""
PECCARY method.

The class ``peccary`` is the core of the package and does
all of the Permutation Entropy and Statistical Complexity calculations.
"""

import numpy as np
from math import factorial

from .timeseries import Timeseries
from . import utils

__all__ = ["peccary"]


[docs]
class peccary:
    """
    The ``peccary`` class is the core of the PECCARY method, which is based
    on work by Bandt & Pompe (2002), Rosso et al. (2007), and Weck et al. (2015).
    The class extracts ordinal patterns of a specified sampling size :math:`n` 
    (default :math:`n=5`) and calculates a probability distribution of all possible
    ordinal pattern permutations. After the pattern distribution has been created,
    the values for Permutation Entropy and Jensen-Shanon Statistical Complexity can 
    be calculated. 

    The ``peccary`` class can be initialized with either a 1-D array or a
    ``Timeseries`` instance. In the case of a timeseries object, the desired attribute
    to use for analysis should be indicated with the ``attr`` parameter. If there
    are multiple particles for the data, the desired particle should be indexed with
    the ``ptcl`` parameter. Although the ``dt`` parameter is optional, it is recommended
    to specifiy it if ``data`` is a standard 1-D array, since it is used in the convenience
    methods of ``tPat`` and ``ell_from_tPat``.
    
    See :ref:`Getting started <start>` for a demostration of how to initialize ``peccary`` with a 
    ``Timeseries`` instance. 

    References
    ----------
    [1] `Bandt, C., & Pompe, B. 2002, Phys Rev Lett, 88 (American Physical Society), 174102 <https://link.aps.org/doi/10.1103/PhysRevLett.88.174102>`__

    [2] `Rosso, O. A., Larrondo, H. A., Martin, M. T., Plastino, A., & Fuentes, M. A. 2007, Phys Rev Lett, 99 (American Physical Society), 154102 <https://link.aps.org/doi/10.1103/PhysRevLett.99.154102>`__
    
    [3] `Weck, P. J., Schaffner, D. A., Brown, M. R., & Wicks, R. T. 2015, Phys Rev E, 91 (American Physical Society), 023101 <https://link.aps.org/doi/10.1103/PhysRevE.91.023101>`__
    """
    def __init__(self, data, n=5, attr=None, dt=None, ptcl=None):
        """
        Initialize PECCARY class

        Parameters
        ----------
        data : 1-D array
            Timeseries data for PECCARY 
        n : int, optional
            Sampling size, by default 5, by default 5
        attr : str, optional
            Timeseries attribute; needed if extracting a coordinate
            or data attribute from a Timeseries object, by default None
        dt : float, optional
            Timestep resolution; needed if using built-in, 
            by default None
        ptcl: int, optional
            Particle index, by default None

        Attributes
        ----------
        tser : 1-D array
            Data timeseries
        tlen : int
            Length (number of timesteps of timeseries)
        n : int
            Sampling size
        N : int
            Total number of possible ordinal pattern permutations (:math:`n!`)
        invN : float
            Probability of all ordinal patterns for unifor distribution (:math:`1/n!`)
        log2_N : float
            Calculated value of :math:`\log_2 (n!)`
        log2_Np1 : float
            Calculated value of :math:`\log_2 (n! + 1)`

        Raises
        ------
        TypeError
            Data must be a 1-D array
        """
        # Check if data is Timeseries object
        if isinstance(data, Timeseries):
            # # Check if there's more than one data attribute stored
            # attrCheck = np.array([x is not None for x in [data.x, data.y, data.z, data.data]])
            # if np.sum(attrCheck) > 1:
            #     self.tser = np.where(attrCheck)
            self.tser = getattr(data, attr) # data timeseries
            self.dt = data.dt
        else:
            # Setting up data
            self.tser=np.array(data) # data timeseries

        if ptcl is not None:
            if type(ptcl) is int:
                self.tser = self.tser[ptcl]
            else:
                raise TypeError('Particle index (ptcl) must be integer')
        
        if len(self.tser.shape)>1:
            raise TypeError('Data must be a 1-D array')

        # Precalculate constant values for later use
        self.tlen = len(self.tser) # length of timeseries
        self.n = n # sampling size
        self.N = factorial(n) # n!
        self.invN = 1./self.N # 1/n!
        self.log2_N = np.log2(self.N) # log_2(n!)
        self.log2_Np1 = np.log2(self.N+1.) # log_2(n! + 1)


[docs]
    def tPat(self, sampInt=1):
        """
        Calculates pattern time of in PECCARY routine, based on 
        sampling size and sampling interval

        Parameters
        ----------
        dt : float
            Timestep interval
        sampInt : int or ndarray, optional
            Sampling interval, by default 1

        Returns
        -------
        float or ndarray
            Pattern time(s) corresponding to inputted sampling interval(s)
        """
        return utils.ell2tpat(ell=sampInt, n=self.n, dt=self.dt)

    

[docs]
    def ell_from_tPat(self, tPat):
        """
        Calculates the sampling interval in PECCARY routine based 
        on the pattern time and sampling size

        Parameters
        ----------
        dt : float
            Timestep interval
        tPat : float or ndarray
            Pattern time

        Returns
        -------
        float or ndarray
            Sampling interval(s) corresponding to inputted pattern time(s)
        """
        return utils.tpat2ell(tpat=tPat, n=self.n, dt=self.dt)

    

[docs]
    def getPtot(self,sampInt=1):
        """
        Get total number of permutations based on sampling interval

        Parameters
        ----------
        sampInt : int, optional
            Sampling interval, by default 1

        Returns
        -------
        int
            Total number of permutations
        """
        return self.tlen - sampInt*(self.n - 1) # Total number of order n permutations in T; Weck+2015 Eq.(1) denominator



[docs]
    def constructPatternCount(self,sampInt=1):
        """
        Count occurance of patterns and total number of permutations

        Parameters
        ----------
        sampInt : int, optional
            Sampling interval, by default 1

        Returns
        -------
        count : ndarray
            A Count occurance of patterns
        Ptot : int 
            Total number of permutations
        """
        Ptot = self.tlen - sampInt*(self.n - 1) # Total number of order n permutations in T; Weck+2015 Eq.(1) denominator
        
        patterns = np.array([self.tser[i:i+(self.n-1)*sampInt+1:sampInt].argsort(kind='stable') for i in range(Ptot)]) # Get patterns based on sampling size and interval
        count = np.unique(patterns, axis=0, return_counts=True) # Count the number of unique ordinal patterns
        
        return count, Ptot



[docs]
    def getPatternsCounts(self,sampInt=1):
        """
        Return patterns, respective counts, and total number of permutations

        Parameters
        ----------
        sampInt : int, optional
            Sampling interval, by default 1

        Returns
        -------
        patterns : ndarray
            Patterns
        count : ndarray
            Count occurances of patterns
        Ptot : int 
            Total number of permutations
        """
        Ptot = self.tlen - sampInt*(self.n - 1) # Total number of order n permutations in T; Weck+2015 Eq.(1) denominator
        
        patterns = np.array([self.tser[i:i+(self.n-1)*sampInt+1:sampInt].argsort(kind='stable') for i in range(Ptot)]) # Get patterns based on sampling size and interval
        count = np.unique(patterns, axis=0, return_counts=True) # Count the number of unique ordinal patterns
        
        return count[0], count[1], Ptot

    

[docs]
    def calcS(self, sampInt=1):		
        """
        Calculate Shannon permutation entropy (S) and disqeulibrium (S_e)

        Parameters
        ----------
        sampInt : int, optional
            Sampling interval, by default 1

        Returns
        -------
        Sp : float
            Shannon permutation entropy
        Se : float
            Disequilibrium
        """
        count,Ptot = self.constructPatternCount(sampInt) # Get pattern counts and total number of permutations
        invPtot=1./Ptot # Inverse of total number of permutations

        # Calculate S from the count
        probabilities = count[1]*invPtot # Probability of each pattern occurence; Weck+2015 Eq.(1)
        S = -1.*np.sum([p*np.log2(p) for p in probabilities]) # Shannon entropy formula for pattern probabilities
        Se = -1.*np.sum([P_Pe_sum_2*np.log2(P_Pe_sum_2) for P_Pe_sum_2 in 0.5*(probabilities+self.invN)]) + 0.5*(self.N-len(probabilities))*self.invN*(1+self.log2_N) # Disequilibrium between distribution and uniform distribution; Schaffner & Daniel (in prep), Eq.(8)
        return S, Se



[docs]
    def calcH(self,sampInt=1):		
        """
        Calculate normalized Permutation Entropy

        Parameters
        ----------
        sampInt : int, optional
            Sampling interval, by default 1

        Returns
        -------
        Sp : float
            Normalized Shannon permutation entropy
        Se : float
            Normalized Shannon + Uniform permutation entropy
        """
        S, Se = self.calcS(sampInt) # Shannon entropy and disequilibrium
        return S/self.log2_N,Se/self.log2_N 



[docs]
    def calcHC(self,sampInt=1):
        """
        Calculate normalized Jensen-Shannon statistical complexity 
        and normalized permutation entropy

        Parameters
        ----------
        sampInt : int, optional
            Sampling interval, by default 1

        Returns
        -------
        H : float
            Normalized Shannon permutation entropy
        C : float
            Normalized Jensen-Shannon statistical complexity
        """
        S, Se  = self.calcS(sampInt) # Shannon entropy and disequilibrium
        H = S/self.log2_N # Normalized permutation entropy; Schaffner & Daniel (in prep) Eq.(3)
        C = -2.*((Se - 0.5*S - 0.5*self.log2_N)
                /((1. + self.invN)*self.log2_Np1 - 2.*np.log2(2.*self.N) 
                + self.log2_N)*(H)) # Jensen-Shannon statistical complexity; Weck+15 Eq.(4)

        return H, C



[docs]
    def calcC_fromSSe(self, S, Se):
        """
        Calculate normalized Jensen-Shannon statistical complexity
        from pre-calculated Shannon permutation entropy and 
        disequilibrium values

        Parameters
        ----------
        S : float
            Shannon permutation entropy
        Se : float
            Disequilibrium

        Returns
        -------
        float
            Normalized Jensen-Shannon statistical complexity
        """
        return -2.*((Se - 0.5*S - 0.5*self.log2_N)/((1. + self.invN)*self.log2_Np1 - 2.*np.log2(2.*self.N) + self.log2_N)*(S/self.log2_N)) # Jensen-Shannon statistical complexity; Weck+15 Eq.(4)



[docs]
    def calcS_fromPatternCount(self, count, Ptot):
        """
        Calculate Shannon permutation entropy and disequilibrium 
        from pattern count and total number of permutations

        Parameters
        ----------
        count : ndarray
            Count occurance result (e.g., from constructPatternCount)
        Ptot : int
            Total number of permutations (e.g., from constructPatternCount)

        Returns
        -------
        Sp : float
            Normalized Shannon permutation entropy
        Se : float
            Normalized Shannon + Uniform permutation entropy
        """
        invPtot=1./Ptot # Inverse of total number of permutations

        probabilities = count[1]*invPtot # Probability of each pattern occurence; Weck+2015 Eq.(1)
        S = -1.*np.sum([p*np.log2(p) for p in probabilities]) # Shannon entropy formula for pattern probabilities
        Se = -1.*np.sum([P_Pe_sum_2*np.log2(P_Pe_sum_2) for P_Pe_sum_2 in 0.5*(probabilities+self.invN)]) + 0.5*(self.N-len(probabilities))*self.invN*(1+self.log2_N) # Disequilibrium between distribution and uniform distribution; Schaffner & Daniel (in prep), Eq.(8)
        return S,Se



[docs]
    def calcHCcurves(self, min_sampInt=1, max_sampInt=100, step_sampInt=1, sampIntArray=None):
        """
        Returns Permutation Entropy and Statistical Complexity values for multiple
        sampling interval values, i.e., :math:`H(\ell)` and :math:`C(\ell)`

        Parameters
        ----------
        data : array
            Timeseries data for PECCARY
        n : int, optional
            Sampling size, by default 5
        min_sampInt : int, optional
            Smallest sampling interval to loop through, by default 1
        max_sampInt : int, optional
            Largest sampling interval to loop through, by default 100
        step_sampInt : int, optional
            How many sampling interval values to skip over
        sampIntArray: ndarray or list
            Custom array of sampling intervals, supersedes min_sampInt,
            max_sampInt, and step_sampInt, by default None

        Returns
        -------
        Hvals : ndarray
            Normalized Shannon Perumation Entropy as function of sampling interval, :math:`H(\ell)`
        Cvals : ndarray
            Normalized Jensen-Shannon complexity as function of sampling interval, :math:`C(\ell)`
        sampInts : ndarray
            Sampling interval (:math:`\ell`) values
        """
        if type(sampIntArray) != type(None):
            sampInts = sampIntArray # Array of sampling intervals from sampIntArray
        else:
            sampInts = np.arange(min_sampInt,max_sampInt,step_sampInt) # Make array of sampling intervals from min_sampInt to max_sampInt, increasing by 1
        Hvals, Cvals = np.array([self.calcHC(sampInt=int(sampInts[i])) for i in range(len(sampInts))]).T # Get normalized permutation entropy and statisical complexity for each of the sampling intervals
        return Hvals, Cvals, sampInts