Source code for comptools.unfolding


import os
from itertools import product
import numpy as np
import pandas as pd
import socket

from .composition_encoding import get_comp_list
from .base import get_energybins, check_output_dir
from .data_functions import ratio_error


[docs]def unfolded_counts_dist(unfolding_df, iteration=-1, num_groups=4): """ Convert unfolded distributions DataFrame from PyUnfold counts arrays to a dictionary containing a counts array for each composition. Parameters ---------- unfolding_df : pandas.DataFrame Unfolding DataFrame returned from PyUnfold. iteration : int, optional Specific unfolding iteration to retrieve unfolded counts (default is -1, the last iteration). num_groups : int, optional Number of composition groups (default is 4). Returns ------- counts : dict Dictionary with composition-counts key-value pairs. counts_sys_err : dict Dictionary with composition-systematic error key-value pairs. counts_stat_err : dict Dictionary with composition-statistical error key-value pairs. """ comp_list = get_comp_list(num_groups=num_groups) df_iter = unfolding_df.iloc[iteration] counts, counts_sys_err, counts_stat_err = {}, {}, {} for idx, composition in enumerate(comp_list): counts[composition] = df_iter['unfolded'][idx::num_groups] counts_sys_err[composition] = df_iter['sys_err'][idx::num_groups] counts_stat_err[composition] = df_iter['stat_err'][idx::num_groups] counts['total'] = np.sum([counts[composition] for composition in comp_list], axis=0) counts_sys_err['total'] = np.sqrt(np.sum([counts_sys_err[composition]**2 for composition in comp_list], axis=0)) counts_stat_err['total'] = np.sqrt(np.sum([counts_stat_err[composition]**2 for composition in comp_list], axis=0)) return counts, counts_sys_err, counts_stat_err
[docs]def column_normalize(res, res_err, efficiencies, efficiencies_err): res_col_sum = res.sum(axis=0) res_col_sum_err = np.array([np.sqrt(np.sum(res_err[:, i]**2)) for i in range(res_err.shape[1])]) normalizations, normalizations_err = ratio_error( res_col_sum, res_col_sum_err, efficiencies, efficiencies_err, nan_to_num=True) res_normalized, res_normalized_err = ratio_error( res, res_err, normalizations, normalizations_err, nan_to_num=True) res_normalized = np.nan_to_num(res_normalized) res_normalized_err = np.nan_to_num(res_normalized_err) # Test that the columns of res_normalized equal efficiencies np.testing.assert_allclose(res_normalized.sum(axis=0), efficiencies) return res_normalized, res_normalized_err
[docs]def response_hist(true_energy, reco_energy, true_target, pred_target, energy_bins=None): """Computes energy-composition response matrix Parameters ---------- true_energy : array_like Array of true (MC) energies. reco_energy : array_like Array of reconstructed energies. true_target : array_like Array of true compositions that are encoded to numerical values. pred_target : array_like Array of predicted compositions that are encoded to numerical values. energy_bins : array_like, optional Energy bins to be used for constructing response matrix (default is to use energy bins from comptools.get_energybins() function). Returns ------- res : numpy.ndarray Response matrix. res_err : numpy.ndarray Uncerainty of the response matrix. """ # Check that the input array shapes inputs = [true_energy, reco_energy, true_target, pred_target] assert len(set(map(np.ndim, inputs))) == 1 assert len(set(map(np.shape, inputs))) == 1 if energy_bins is None: energy_bins = get_energybins().energy_bins num_ebins = len(energy_bins) - 1 num_groups = len(np.unique([true_target, pred_target])) true_ebin_indices = np.digitize(true_energy, energy_bins) - 1 reco_ebin_indices = np.digitize(reco_energy, energy_bins) - 1 res = np.zeros((num_ebins * num_groups, num_ebins * num_groups)) bin_iter = product(range(num_ebins), range(num_ebins), range(num_groups), range(num_groups)) for true_ebin, reco_ebin, true_target_bin, pred_target_bin in bin_iter: # Get mask for events in true/reco energy and true/reco composition bin mask = np.logical_and.reduce((true_ebin_indices == true_ebin, reco_ebin_indices == reco_ebin, true_target == true_target_bin, pred_target == pred_target_bin)) res[num_groups * reco_ebin + pred_target_bin, num_groups * true_ebin + true_target_bin] = mask.sum() # Calculate statistical error on response matrix res_err = np.sqrt(res) return res, res_err
[docs]def response_matrix(true_energy, reco_energy, true_target, pred_target, efficiencies, efficiencies_err, energy_bins=None): """Computes normalized energy-composition response matrix Parameters ---------- true_energy : array_like Array of true (MC) energies. reco_energy : array_like Array of reconstructed energies. true_target : array_like Array of true compositions that are encoded to numerical values. pred_target : array_like Array of predicted compositions that are encoded to numerical values. efficiencies : array_like Detection efficiencies (should be in a PyUnfold-compatable form). efficiencies_err : array_like Detection efficiencies uncertainties (should be in a PyUnfold-compatable form). energy_bins : array_like, optional Energy bins to be used for constructing response matrix (default is to use energy bins from comptools.get_energybins() function). Returns ------- res_normalized : numpy.ndarray Normalized response matrix. res_normalized_err : numpy.ndarray Uncerainty of the normalized response matrix. """ res, res_err = response_hist(true_energy=true_energy, reco_energy=reco_energy, true_target=true_target, pred_target=pred_target, energy_bins=energy_bins) # Normalize response matrix column-wise (i.e. $P(E|C)$) res_normalized, res_normalized_err = column_normalize(res=res, res_err=res_err, efficiencies=efficiencies, efficiencies_err=efficiencies_err) return res_normalized, res_normalized_err