from __future__ import division, print_function
import numpy as np
import pandas as pd
from .mix import Mixer
from .teststat import get_ts
from .priors import setup_prior
from .utils import cast_to_array
from .callbacks import setup_callbacks_regularizer
[docs]def iterative_unfold(data=None, data_err=None, response=None,
response_err=None, efficiencies=None,
efficiencies_err=None, prior=None, ts='ks',
ts_stopping=0.01, max_iter=100, cov_type='multinomial',
return_iterations=False, callbacks=None):
"""Performs iterative unfolding
Parameters
----------
data : array_like
Input observed data distribution.
data_err : array_like
Uncertainties of the input observed data distribution. Must be the
same shape as ``data``.
response : array_like
Response matrix.
response_err : array_like
Uncertainties of response matrix. Must be the same shape as
``response``.
efficiencies : array_like
Detection efficiencies for the cause distribution.
efficiencies_err : array_like
Uncertainties of detection efficiencies. Must be the same shape as
``efficiencies``.
prior : array_like, optional
Prior distribution to use in unfolding. If None, then a uniform
(or flat) prior will be used. If array_like, then must have the same
shape as ``efficiencies`` (default is None).
ts : {'ks', 'chi2', 'bf', 'rmd'}
Test statistic to use for stopping condition (default is 'ks').
For more information about the available test statistics, see the
`Test Statistics API documentation <api.rst#test-statistics>`__.
ts_stopping : float, optional
Test statistic stopping condition. At each unfolding iteration, the
test statistic is computed between the current and previous iteration.
Once the test statistic drops below ts_stopping, the unfolding
procedure is stopped (default is 0.01).
max_iter : int, optional
Maximum number of iterations to allow (default is 100).
cov_type : {'multinomial', 'poisson'}
Whether to use the Multinomial or Poisson form for the covariance
matrix (default is 'multinomial').
return_iterations : bool, optional
Whether to return unfolded distributions for each iteration
(default is False).
callbacks : list, optional
List of ``pyunfold.callbacks.Callback`` instances to be applied during
unfolding (default is None, which means no Callbacks are applied).
Returns
-------
unfolded_result : dict
Returned if ``return_iterations`` is False (default). Dictionary
containing the final unfolded distribution, associated uncertainties,
and test statistic information.
The returned ``dict`` has the following keys:
unfolded
Final unfolded cause distribution
stat_err
Statistical uncertainties on the unfolded cause distribution
sys_err
Systematic uncertainties on the unfolded cause distribution
associated with limited statistics in the response matrix
ts_iter
Final test statistic value
ts_stopping
Test statistic stopping criterion
num_iterations
Number of unfolding iterations
unfolding_matrix
Unfolding matrix
unfolding_iters : pandas.DataFrame
Returned if ``return_iterations`` is True. DataFrame containing the
unfolded distribution, associated uncertainties, test statistic
information, etc. at each iteration.
Examples
--------
>>> from pyunfold import iterative_unfold
>>> data = [100, 150]
>>> data_err = [10, 12.2]
>>> response = [[0.9, 0.1],
... [0.1, 0.9]]
>>> response_err = [[0.01, 0.01],
... [0.01, 0.01]]
>>> efficiencies = [1, 1]
>>> efficiencies_err = [0.01, 0.01]
>>> unfolded = iterative_unfold(data=data,
... data_err=data_err,
... response=response,
... response_err=response_err,
... efficiencies=efficiencies,
... efficiencies_err=efficiencies_err)
>>> unfolded
{'num_iterations': 4,
'stat_err': array([11.16853268, 13.65488168]),
'sys_err': array([0.65570621, 0.65570621]),
'ts_iter': 0.0038300087456445975,
'ts_stopping': 0.01,
'unfolded': array([ 94.32086967, 155.67913033]),
'unfolding_matrix': array([[0.8471473 , 0.1528527 ],
[0.06404093, 0.93595907]])}
"""
# Validate user input
inputs = {'data': data,
'data_err': data_err,
'response': response,
'response_err': response_err,
'efficiencies': efficiencies,
'efficiencies_err': efficiencies_err
}
for name in inputs:
if inputs[name] is None:
raise ValueError('The input for {} must not be None.'.format(name))
elif np.amin(inputs[name]) < 0:
raise ValueError('The items in {} must be non-negative.'.format(name))
data, data_err = cast_to_array(data, data_err)
response, response_err = cast_to_array(response, response_err)
efficiencies, efficiencies_err = cast_to_array(efficiencies,
efficiencies_err)
num_causes = len(efficiencies)
# Setup prior
prior = setup_prior(prior=prior, num_causes=num_causes)
# Define first prior counts distribution
n_c = np.sum(data) * prior
# Setup Mixer
mixer = Mixer(data=data,
data_err=data_err,
efficiencies=efficiencies,
efficiencies_err=efficiencies_err,
response=response,
response_err=response_err,
cov_type=cov_type)
# Setup test statistic
ts_obj = get_ts(ts)
ts_func = ts_obj(tol=ts_stopping,
num_causes=num_causes,
TestRange=[0, 1e2],
verbose=False)
unfolding_iters = _unfold(prior=n_c,
mixer=mixer,
ts_func=ts_func,
max_iter=max_iter,
callbacks=callbacks)
if return_iterations:
return unfolding_iters
else:
unfolded_result = dict(unfolding_iters.iloc[-1])
return unfolded_result
def _unfold(prior=None, mixer=None, ts_func=None, max_iter=100,
callbacks=None):
"""Perform iterative unfolding
Parameters
----------
prior : array_like
Initial cause distribution.
mixer : pyunfold.Mix.Mixer
Mixer to perform the unfolding.
ts_func : pyunfold.Utils.TestStat
Test statistic object.
max_iter : int, optional
Maximum allowed number of iterations to perform.
callbacks : list, optional
List of ``pyunfold.callbacks.Callback`` instances to be applied during
unfolding (default is None, which means no Callbacks are applied).
Returns
-------
unfolding_iters : pandas.DataFrame
DataFrame containing the unfolded result for each iteration.
Each row in unfolding_result corresponds to an iteration.
"""
# Set up callbacks, regularizer Callbacks are treated separately
callbacks, regularizer = setup_callbacks_regularizer(callbacks)
callbacks.on_unfolding_begin()
current_n_c = prior.copy()
iteration = 0
unfolding_iters = []
while not ts_func.pass_tol() and iteration < max_iter:
callbacks.on_iteration_begin(iteration=iteration)
# Perform unfolding for this iteration
unfolded_n_c = mixer.smear(current_n_c)
iteration += 1
status = {'unfolded': unfolded_n_c,
'stat_err': mixer.get_stat_err(),
'sys_err': mixer.get_MC_err(),
'num_iterations': iteration,
'unfolding_matrix': mixer.Mij}
if regularizer:
# Will want the nonregularized distribution for the final iteration
unfolded_nonregularized = status['unfolded'].copy()
regularizer.on_iteration_end(iteration=iteration, status=status)
ts_iter = ts_func.calc(status['unfolded'], current_n_c)
status['ts_iter'] = ts_iter
status['ts_stopping'] = ts_func.tol
callbacks.on_iteration_end(iteration=iteration, status=status)
unfolding_iters.append(status)
# Updated current distribution for next iteration of unfolding
current_n_c = status['unfolded'].copy()
# Convert unfolding_iters list of dictionaries to a pandas DataFrame
unfolding_iters = pd.DataFrame.from_records(unfolding_iters)
# Replace final folded iteration with un-regularized distribution
if regularizer:
last_iteration_index = unfolding_iters.index[-1]
unfolding_iters.at[last_iteration_index, 'unfolded'] = unfolded_nonregularized
callbacks.on_unfolding_end(status=status)
return unfolding_iters