Source code for peccary.core

"""
PECCARY method.

The class ``peccary`` is the core of the package and does
all of the Permutation Entropy and Statistical Complexity calculations.
"""

import numpy as np
from math import factorial

from .timeseries import Timeseries
from . import utils

__all__ = ["peccary"]

[docs] class peccary: """ The ``peccary`` class is the core of the PECCARY method, which is based on work by Bandt & Pompe (2002), Rosso et al. (2007), and Weck et al. (2015). The class extracts ordinal patterns of a specified sampling size :math:`n` (default :math:`n=5`) and calculates a probability distribution of all possible ordinal pattern permutations. After the pattern distribution has been created, the values for Permutation Entropy and Jensen-Shanon Statistical Complexity can be calculated. The ``peccary`` class can be initialized with either a 1-D array or a ``Timeseries`` instance. In the case of a timeseries object, the desired attribute to use for analysis should be indicated with the ``attr`` parameter. If there are multiple particles for the data, the desired particle should be indexed with the ``ptcl`` parameter. Although the ``dt`` parameter is optional, it is recommended to specifiy it if ``data`` is a standard 1-D array, since it is used in the convenience methods of ``tPat`` and ``ell_from_tPat``. See :ref:`Getting started <start>` for a demostration of how to initialize ``peccary`` with a ``Timeseries`` instance. References ---------- [1] `Bandt, C., & Pompe, B. 2002, Phys Rev Lett, 88 (American Physical Society), 174102 <https://link.aps.org/doi/10.1103/PhysRevLett.88.174102>`__ [2] `Rosso, O. A., Larrondo, H. A., Martin, M. T., Plastino, A., & Fuentes, M. A. 2007, Phys Rev Lett, 99 (American Physical Society), 154102 <https://link.aps.org/doi/10.1103/PhysRevLett.99.154102>`__ [3] `Weck, P. J., Schaffner, D. A., Brown, M. R., & Wicks, R. T. 2015, Phys Rev E, 91 (American Physical Society), 023101 <https://link.aps.org/doi/10.1103/PhysRevE.91.023101>`__ """ def __init__(self, data, n=5, attr=None, dt=None, ptcl=None): """ Initialize PECCARY class Parameters ---------- data : 1-D array Timeseries data for PECCARY n : int, optional Sampling size, by default 5, by default 5 attr : str, optional Timeseries attribute; needed if extracting a coordinate or data attribute from a Timeseries object, by default None dt : float, optional Timestep resolution; needed if using built-in, by default None ptcl: int, optional Particle index, by default None Attributes ---------- tser : 1-D array Data timeseries tlen : int Length (number of timesteps of timeseries) n : int Sampling size N : int Total number of possible ordinal pattern permutations (:math:`n!`) invN : float Probability of all ordinal patterns for unifor distribution (:math:`1/n!`) log2_N : float Calculated value of :math:`\log_2 (n!)` log2_Np1 : float Calculated value of :math:`\log_2 (n! + 1)` Raises ------ TypeError Data must be a 1-D array """ # Check if data is Timeseries object if isinstance(data, Timeseries): # # Check if there's more than one data attribute stored # attrCheck = np.array([x is not None for x in [data.x, data.y, data.z, data.data]]) # if np.sum(attrCheck) > 1: # self.tser = np.where(attrCheck) self.tser = getattr(data, attr) # data timeseries self.dt = data.dt else: # Setting up data self.tser=np.array(data) # data timeseries if ptcl is not None: if type(ptcl) is int: self.tser = self.tser[ptcl] else: raise TypeError('Particle index (ptcl) must be integer') if len(self.tser.shape)>1: raise TypeError('Data must be a 1-D array') # Precalculate constant values for later use self.tlen = len(self.tser) # length of timeseries self.n = n # sampling size self.N = factorial(n) # n! self.invN = 1./self.N # 1/n! self.log2_N = np.log2(self.N) # log_2(n!) self.log2_Np1 = np.log2(self.N+1.) # log_2(n! + 1)
[docs] def tPat(self, sampInt=1): """ Calculates pattern time of in PECCARY routine, based on sampling size and sampling interval Parameters ---------- dt : float Timestep interval sampInt : int or ndarray, optional Sampling interval, by default 1 Returns ------- float or ndarray Pattern time(s) corresponding to inputted sampling interval(s) """ return utils.ell2tpat(ell=sampInt, n=self.n, dt=self.dt)
[docs] def ell_from_tPat(self, tPat): """ Calculates the sampling interval in PECCARY routine based on the pattern time and sampling size Parameters ---------- dt : float Timestep interval tPat : float or ndarray Pattern time Returns ------- float or ndarray Sampling interval(s) corresponding to inputted pattern time(s) """ return utils.tpat2ell(tpat=tPat, n=self.n, dt=self.dt)
[docs] def getPtot(self,sampInt=1): """ Get total number of permutations based on sampling interval Parameters ---------- sampInt : int, optional Sampling interval, by default 1 Returns ------- int Total number of permutations """ return self.tlen - sampInt*(self.n - 1) # Total number of order n permutations in T; Weck+2015 Eq.(1) denominator
[docs] def constructPatternCount(self,sampInt=1): """ Count occurance of patterns and total number of permutations Parameters ---------- sampInt : int, optional Sampling interval, by default 1 Returns ------- count : ndarray A Count occurance of patterns Ptot : int Total number of permutations """ Ptot = self.tlen - sampInt*(self.n - 1) # Total number of order n permutations in T; Weck+2015 Eq.(1) denominator patterns = np.array([self.tser[i:i+(self.n-1)*sampInt+1:sampInt].argsort(kind='stable') for i in range(Ptot)]) # Get patterns based on sampling size and interval count = np.unique(patterns, axis=0, return_counts=True) # Count the number of unique ordinal patterns return count, Ptot
[docs] def getPatternsCounts(self,sampInt=1): """ Return patterns, respective counts, and total number of permutations Parameters ---------- sampInt : int, optional Sampling interval, by default 1 Returns ------- patterns : ndarray Patterns count : ndarray Count occurances of patterns Ptot : int Total number of permutations """ Ptot = self.tlen - sampInt*(self.n - 1) # Total number of order n permutations in T; Weck+2015 Eq.(1) denominator patterns = np.array([self.tser[i:i+(self.n-1)*sampInt+1:sampInt].argsort(kind='stable') for i in range(Ptot)]) # Get patterns based on sampling size and interval count = np.unique(patterns, axis=0, return_counts=True) # Count the number of unique ordinal patterns return count[0], count[1], Ptot
[docs] def calcS(self, sampInt=1): """ Calculate Shannon permutation entropy (S) and disqeulibrium (S_e) Parameters ---------- sampInt : int, optional Sampling interval, by default 1 Returns ------- Sp : float Shannon permutation entropy Se : float Disequilibrium """ count,Ptot = self.constructPatternCount(sampInt) # Get pattern counts and total number of permutations invPtot=1./Ptot # Inverse of total number of permutations # Calculate S from the count probabilities = count[1]*invPtot # Probability of each pattern occurence; Weck+2015 Eq.(1) S = -1.*np.sum([p*np.log2(p) for p in probabilities]) # Shannon entropy formula for pattern probabilities Se = -1.*np.sum([P_Pe_sum_2*np.log2(P_Pe_sum_2) for P_Pe_sum_2 in 0.5*(probabilities+self.invN)]) + 0.5*(self.N-len(probabilities))*self.invN*(1+self.log2_N) # Disequilibrium between distribution and uniform distribution; Schaffner & Daniel (in prep), Eq.(8) return S, Se
[docs] def calcH(self,sampInt=1): """ Calculate normalized Permutation Entropy Parameters ---------- sampInt : int, optional Sampling interval, by default 1 Returns ------- Sp : float Normalized Shannon permutation entropy Se : float Normalized Shannon + Uniform permutation entropy """ S, Se = self.calcS(sampInt) # Shannon entropy and disequilibrium return S/self.log2_N,Se/self.log2_N
[docs] def calcHC(self,sampInt=1): """ Calculate normalized Jensen-Shannon statistical complexity and normalized permutation entropy Parameters ---------- sampInt : int, optional Sampling interval, by default 1 Returns ------- H : float Normalized Shannon permutation entropy C : float Normalized Jensen-Shannon statistical complexity """ S, Se = self.calcS(sampInt) # Shannon entropy and disequilibrium H = S/self.log2_N # Normalized permutation entropy; Schaffner & Daniel (in prep) Eq.(3) C = -2.*((Se - 0.5*S - 0.5*self.log2_N) /((1. + self.invN)*self.log2_Np1 - 2.*np.log2(2.*self.N) + self.log2_N)*(H)) # Jensen-Shannon statistical complexity; Weck+15 Eq.(4) return H, C
[docs] def calcC_fromSSe(self, S, Se): """ Calculate normalized Jensen-Shannon statistical complexity from pre-calculated Shannon permutation entropy and disequilibrium values Parameters ---------- S : float Shannon permutation entropy Se : float Disequilibrium Returns ------- float Normalized Jensen-Shannon statistical complexity """ return -2.*((Se - 0.5*S - 0.5*self.log2_N)/((1. + self.invN)*self.log2_Np1 - 2.*np.log2(2.*self.N) + self.log2_N)*(S/self.log2_N)) # Jensen-Shannon statistical complexity; Weck+15 Eq.(4)
[docs] def calcS_fromPatternCount(self, count, Ptot): """ Calculate Shannon permutation entropy and disequilibrium from pattern count and total number of permutations Parameters ---------- count : ndarray Count occurance result (e.g., from constructPatternCount) Ptot : int Total number of permutations (e.g., from constructPatternCount) Returns ------- Sp : float Normalized Shannon permutation entropy Se : float Normalized Shannon + Uniform permutation entropy """ invPtot=1./Ptot # Inverse of total number of permutations probabilities = count[1]*invPtot # Probability of each pattern occurence; Weck+2015 Eq.(1) S = -1.*np.sum([p*np.log2(p) for p in probabilities]) # Shannon entropy formula for pattern probabilities Se = -1.*np.sum([P_Pe_sum_2*np.log2(P_Pe_sum_2) for P_Pe_sum_2 in 0.5*(probabilities+self.invN)]) + 0.5*(self.N-len(probabilities))*self.invN*(1+self.log2_N) # Disequilibrium between distribution and uniform distribution; Schaffner & Daniel (in prep), Eq.(8) return S,Se
[docs] def calcHCcurves(self, min_sampInt=1, max_sampInt=100, step_sampInt=1, sampIntArray=None): """ Returns Permutation Entropy and Statistical Complexity values for multiple sampling interval values, i.e., :math:`H(\ell)` and :math:`C(\ell)` Parameters ---------- data : array Timeseries data for PECCARY n : int, optional Sampling size, by default 5 min_sampInt : int, optional Smallest sampling interval to loop through, by default 1 max_sampInt : int, optional Largest sampling interval to loop through, by default 100 step_sampInt : int, optional How many sampling interval values to skip over sampIntArray: ndarray or list Custom array of sampling intervals, supersedes min_sampInt, max_sampInt, and step_sampInt, by default None Returns ------- Hvals : ndarray Normalized Shannon Perumation Entropy as function of sampling interval, :math:`H(\ell)` Cvals : ndarray Normalized Jensen-Shannon complexity as function of sampling interval, :math:`C(\ell)` sampInts : ndarray Sampling interval (:math:`\ell`) values """ if type(sampIntArray) != type(None): sampInts = sampIntArray # Array of sampling intervals from sampIntArray else: sampInts = np.arange(min_sampInt,max_sampInt,step_sampInt) # Make array of sampling intervals from min_sampInt to max_sampInt, increasing by 1 Hvals, Cvals = np.array([self.calcHC(sampInt=int(sampInts[i])) for i in range(len(sampInts))]).T # Get normalized permutation entropy and statisical complexity for each of the sampling intervals return Hvals, Cvals, sampInts