Source code for pyunfold.unfold


from __future__ import division, print_function
import numpy as np
import pandas as pd

from .mix import Mixer
from .teststat import get_ts
from .priors import setup_prior
from .utils import cast_to_array
from .callbacks import setup_callbacks_regularizer


[docs]def iterative_unfold(data=None, data_err=None, response=None, response_err=None, efficiencies=None, efficiencies_err=None, prior=None, ts='ks', ts_stopping=0.01, max_iter=100, cov_type='multinomial', return_iterations=False, callbacks=None): """Performs iterative unfolding Parameters ---------- data : array_like Input observed data distribution. data_err : array_like Uncertainties of the input observed data distribution. Must be the same shape as ``data``. response : array_like Response matrix. response_err : array_like Uncertainties of response matrix. Must be the same shape as ``response``. efficiencies : array_like Detection efficiencies for the cause distribution. efficiencies_err : array_like Uncertainties of detection efficiencies. Must be the same shape as ``efficiencies``. prior : array_like, optional Prior distribution to use in unfolding. If None, then a uniform (or flat) prior will be used. If array_like, then must have the same shape as ``efficiencies`` (default is None). ts : {'ks', 'chi2', 'bf', 'rmd'} Test statistic to use for stopping condition (default is 'ks'). For more information about the available test statistics, see the `Test Statistics API documentation <api.rst#test-statistics>`__. ts_stopping : float, optional Test statistic stopping condition. At each unfolding iteration, the test statistic is computed between the current and previous iteration. Once the test statistic drops below ts_stopping, the unfolding procedure is stopped (default is 0.01). max_iter : int, optional Maximum number of iterations to allow (default is 100). cov_type : {'multinomial', 'poisson'} Whether to use the Multinomial or Poisson form for the covariance matrix (default is 'multinomial'). return_iterations : bool, optional Whether to return unfolded distributions for each iteration (default is False). callbacks : list, optional List of ``pyunfold.callbacks.Callback`` instances to be applied during unfolding (default is None, which means no Callbacks are applied). Returns ------- unfolded_result : dict Returned if ``return_iterations`` is False (default). Dictionary containing the final unfolded distribution, associated uncertainties, and test statistic information. The returned ``dict`` has the following keys: unfolded Final unfolded cause distribution stat_err Statistical uncertainties on the unfolded cause distribution sys_err Systematic uncertainties on the unfolded cause distribution associated with limited statistics in the response matrix ts_iter Final test statistic value ts_stopping Test statistic stopping criterion num_iterations Number of unfolding iterations unfolding_matrix Unfolding matrix unfolding_iters : pandas.DataFrame Returned if ``return_iterations`` is True. DataFrame containing the unfolded distribution, associated uncertainties, test statistic information, etc. at each iteration. Examples -------- >>> from pyunfold import iterative_unfold >>> data = [100, 150] >>> data_err = [10, 12.2] >>> response = [[0.9, 0.1], ... [0.1, 0.9]] >>> response_err = [[0.01, 0.01], ... [0.01, 0.01]] >>> efficiencies = [1, 1] >>> efficiencies_err = [0.01, 0.01] >>> unfolded = iterative_unfold(data=data, ... data_err=data_err, ... response=response, ... response_err=response_err, ... efficiencies=efficiencies, ... efficiencies_err=efficiencies_err) >>> unfolded {'num_iterations': 4, 'stat_err': array([11.16853268, 13.65488168]), 'sys_err': array([0.65570621, 0.65570621]), 'ts_iter': 0.0038300087456445975, 'ts_stopping': 0.01, 'unfolded': array([ 94.32086967, 155.67913033]), 'unfolding_matrix': array([[0.8471473 , 0.1528527 ], [0.06404093, 0.93595907]])} """ # Validate user input inputs = {'data': data, 'data_err': data_err, 'response': response, 'response_err': response_err, 'efficiencies': efficiencies, 'efficiencies_err': efficiencies_err } for name in inputs: if inputs[name] is None: raise ValueError('The input for {} must not be None.'.format(name)) elif np.amin(inputs[name]) < 0: raise ValueError('The items in {} must be non-negative.'.format(name)) data, data_err = cast_to_array(data, data_err) response, response_err = cast_to_array(response, response_err) efficiencies, efficiencies_err = cast_to_array(efficiencies, efficiencies_err) num_causes = len(efficiencies) # Setup prior prior = setup_prior(prior=prior, num_causes=num_causes) # Define first prior counts distribution n_c = np.sum(data) * prior # Setup Mixer mixer = Mixer(data=data, data_err=data_err, efficiencies=efficiencies, efficiencies_err=efficiencies_err, response=response, response_err=response_err, cov_type=cov_type) # Setup test statistic ts_obj = get_ts(ts) ts_func = ts_obj(tol=ts_stopping, num_causes=num_causes, TestRange=[0, 1e2], verbose=False) unfolding_iters = _unfold(prior=n_c, mixer=mixer, ts_func=ts_func, max_iter=max_iter, callbacks=callbacks) if return_iterations: return unfolding_iters else: unfolded_result = dict(unfolding_iters.iloc[-1]) return unfolded_result
def _unfold(prior=None, mixer=None, ts_func=None, max_iter=100, callbacks=None): """Perform iterative unfolding Parameters ---------- prior : array_like Initial cause distribution. mixer : pyunfold.Mix.Mixer Mixer to perform the unfolding. ts_func : pyunfold.Utils.TestStat Test statistic object. max_iter : int, optional Maximum allowed number of iterations to perform. callbacks : list, optional List of ``pyunfold.callbacks.Callback`` instances to be applied during unfolding (default is None, which means no Callbacks are applied). Returns ------- unfolding_iters : pandas.DataFrame DataFrame containing the unfolded result for each iteration. Each row in unfolding_result corresponds to an iteration. """ # Set up callbacks, regularizer Callbacks are treated separately callbacks, regularizer = setup_callbacks_regularizer(callbacks) callbacks.on_unfolding_begin() current_n_c = prior.copy() iteration = 0 unfolding_iters = [] while not ts_func.pass_tol() and iteration < max_iter: callbacks.on_iteration_begin(iteration=iteration) # Perform unfolding for this iteration unfolded_n_c = mixer.smear(current_n_c) iteration += 1 status = {'unfolded': unfolded_n_c, 'stat_err': mixer.get_stat_err(), 'sys_err': mixer.get_MC_err(), 'num_iterations': iteration, 'unfolding_matrix': mixer.Mij} if regularizer: # Will want the nonregularized distribution for the final iteration unfolded_nonregularized = status['unfolded'].copy() regularizer.on_iteration_end(iteration=iteration, status=status) ts_iter = ts_func.calc(status['unfolded'], current_n_c) status['ts_iter'] = ts_iter status['ts_stopping'] = ts_func.tol callbacks.on_iteration_end(iteration=iteration, status=status) unfolding_iters.append(status) # Updated current distribution for next iteration of unfolding current_n_c = status['unfolded'].copy() # Convert unfolding_iters list of dictionaries to a pandas DataFrame unfolding_iters = pd.DataFrame.from_records(unfolding_iters) # Replace final folded iteration with un-regularized distribution if regularizer: last_iteration_index = unfolding_iters.index[-1] unfolding_iters.at[last_iteration_index, 'unfolded'] = unfolded_nonregularized callbacks.on_unfolding_end(status=status) return unfolding_iters