Source code for pyunfold.teststat


from __future__ import division, print_function
import numpy as np
from scipy.special import gammaln as lgamma

from .utils import none_to_empty_list


class TestStat(object):
    """Common base class for test statistic methods
    """
    def __init__(self, tol=None, num_causes=None, test_range=None, **kwargs):
        test_range = none_to_empty_list(test_range)
        self.tol = tol
        if num_causes is None:
            raise ValueError('Number of causes (num_causes) must be provided.')
        cause_bin_edges = np.arange(num_causes + 1, dtype=float)
        # Get bin midpoints
        cause_axis = (cause_bin_edges[1:] + cause_bin_edges[:-1]) / 2
        self.cause_axis = cause_axis
        self.ts_range = test_range
        self.has_ts_range = False
        self.ts_bins = self.set_test_range_bins()
        # Initialize Unnatural TS data
        self.stat = np.inf
        self.dof = -1
        self.dofSet = False

    def set_test_range_bins(self):
        bins = [0, -1]
        if self.ts_range != []:
            lTSR = len(self.ts_range)
            err_mess = ("***\n Test stat range can only have two elements. "
                        "This has {}. Exiting...***\n".format(lTSR))
            assert lTSR == 2, err_mess
            xlo = self.ts_range[0]
            xhi = self.ts_range[1]
            err_mess = "***\n Test stat limits reversed. xlo must be < xhi. Exiting...***\n"
            assert xlo < xhi, err_mess

            # Find the bins corresponding to the test range requested
            lobin = np.searchsorted(self.cause_axis, xlo)
            hibin = np.searchsorted(self.cause_axis, xhi)
            bins = [lobin, hibin]
            self.has_ts_range = True
        return bins

    def get_array_range(self, dist1, dist2):
        if self.has_ts_range:
            NR1 = dist1[self.ts_bins[0]:self.ts_bins[1]]
            NR2 = dist2[self.ts_bins[0]:self.ts_bins[1]]
            return NR1, NR2
        else:
            return dist1.copy(), dist2.copy()

    def pass_tol(self):
        """Function testing whether TS < tol
        """
        pass_tol = self.stat < self.tol
        return pass_tol

    def set_dof(self, dof):
        """Set degrees of freedom
        """
        if self.dof == -1:
            self.dof = dof

    def check_lengths(self, dist1, dist2):
        """Test for equal length distributions
        """
        ln1 = len(dist1)
        ln2 = len(dist2)
        err_mess = ("Test Statistic arrays are not equal length. "
                    "{} != {}. Exiting...\n".format(ln1, ln2))
        assert ln1 == ln2, err_mess
        if not self.dofSet:
            self.set_dof(ln1)
            self.dofSet = True

    def calc(self, dist1, dist2):
        """Undefined test statistics calculator
        """
        raise NotImplementedError()


[docs]class Chi2(TestStat): """Reduced chi-squared test statistic """
[docs] def calc(self, dist1, dist2): """Calculate the test statistic between two input distributions Parameters ---------- dist1 : array_like Input distribution. dist2 : array_like Input distribution. Returns ------- stat : float Test statistic """ dist1, dist2 = self.get_array_range(dist1, dist2) self.check_lengths(dist1, dist2) n1 = np.sum(dist1) n2 = np.sum(dist2) h_sum = dist1 + dist2 # Don't divide by 0... h_sum[(h_sum < 1)] = 1. h_dif = n2 * dist1 - n1 * dist2 h_quot = h_dif * h_dif / h_sum stat = np.sum(h_quot)/(n1*n2)/self.dof self.stat = stat return stat
[docs]class BF(TestStat): """Bayes factor test statistic Notes ----- For details related to the Bayes fator see [1]_. References ---------- .. [1] S. Y. BenZvi and B. M. Connolly and C. G. Pfendner and S. Westerhoff. "A Bayesian Approach to Comparing Cosmic Ray Energy Spectra". *The Astrophysical Journal* 738 (1):82. `<https://doi.org/10.1088/0004-637X/738/1/82>`_. """
[docs] def calc(self, dist1, dist2): """Calculate the test statistic between two input distributions Parameters ---------- dist1 : array_like Input distribution. dist2 : array_like Input distribution. Returns ------- stat : float Test statistic """ dist1, dist2 = self.get_array_range(dist1, dist2) self.check_lengths(dist1, dist2) lnB = 0 n1 = np.sum(dist1) n2 = np.sum(dist2) nFactor = lgamma(n1+n2+2) - lgamma(n1+1) - lgamma(n2+1) lnB += nFactor for i in range(0, len(dist1)): lnB += lgamma(dist1[i]+1) + lgamma(dist2[i]+1) - lgamma(dist1[i]+dist2[i]+2) self.stat = lnB return lnB
[docs]class RMD(TestStat): """Maximum relative difference test statistic """
[docs] def calc(self, dist1, dist2): """Calculate the test statistic between two input distributions Parameters ---------- dist1 : array_like Input distribution. dist2 : array_like Input distribution. Returns ------- stat : float Test statistic """ dist1, dist2 = self.get_array_range(dist1, dist2) self.check_lengths(dist1, dist2) h_sum = dist1+dist2 h_sum[(h_sum < 1)] = 1. h_dif = np.abs(dist1 - dist2) h_quot = h_dif / h_sum stat = np.max(h_quot) self.stat = stat return stat
[docs]class KS(TestStat): """Kolmogorov-Smirnov (KS) two-sided test statistic """
[docs] def calc(self, dist1, dist2): """Calculate the test statistic between two input distributions Parameters ---------- dist1 : array_like Input distribution. dist2 : array_like Input distribution. Returns ------- stat : float Test statistic """ dist1, dist2 = self.get_array_range(dist1, dist2) self.check_lengths(dist1, dist2) n1 = np.sum(dist1) n2 = np.sum(dist2) cs1 = np.cumsum(dist1)/n1 cs2 = np.cumsum(dist2)/n2 len1 = len(dist1) self.en = np.sqrt(len1/2) stat = np.max(np.abs(cs1-cs2)) self.stat = stat return stat
TEST_STATISTICS = {"chi2": Chi2, "bf": BF, "rmd": RMD, "ks": KS, }
[docs]def get_ts(name='ks'): """Convenience function for retrieving test statisitc calculators Parameters ---------- name : {'ks', 'chi2', 'bf', 'rmd'} Name of test statistic. Returns ------- ts : TestStat Test statistics calculator """ if name in TEST_STATISTICS: ts = TEST_STATISTICS[name] return ts else: raise ValueError('Invalid test statistic, {}, entered. Must be ' 'in {}'.format(name, TEST_STATISTICS.keys()))