from collections import namedtuple
import os
from functools import wraps
from itertools import islice, count
import numpy as np
import yaml
[docs]def requires_icecube(func):
"""Decorator to wrap functions that require any icecube software
"""
@wraps(func)
def wrapper(*args, **kwargs):
try:
import icecube
except ImportError:
message = ('The function {} requires icecube software. '
'Make sure the env-shell.sh script has been '
'run.'.format(func.__name__))
raise ImportError(message)
return func(*args, **kwargs)
return wrapper
[docs]def get_config_paths():
""" Function to return paths used in this analysis
Specifically,
metaproject: Path to IceCube metaproject being used
comp_data_dir: Path to where data and simulation is stored
condor_data_dir: Path to where HTCondor error and output files are stored
condor_scratch_dir: Path to where HTCondor log and submit files are stored
figures_dir: Path to where figures are saved
project_root: Path to where cr-composition project is located
Returns
-------
paths : collections.namedtuple
Namedtuple containing relavent paths (e.g. figures_dir is where
figures will be saved, condor_data_dir is where data/simulation will
be saved to / loaded from, etc).
"""
here = os.path.abspath(os.path.dirname(__file__))
yaml_file = os.path.join(here, os.pardir, 'config.yml')
with open(yaml_file, 'r') as f:
config = yaml.load(f.read()) or {}
# Create path namedtuple object
path_names = ['metaproject',
'comp_data_dir',
'condor_data_dir',
'condor_scratch_dir',
'figures_dir',
'project_root',
'virtualenv_dir',
]
PathObject = namedtuple('PathType', path_names)
# Create instance of PathObject with appropriate path information
paths = PathObject(metaproject=config['paths']['metaproject'],
comp_data_dir=config['paths']['comp_data_dir'],
condor_data_dir=config['paths']['condor_data_dir'],
condor_scratch_dir=config['paths']['condor_scratch_dir'],
figures_dir=config['paths']['figures_dir'],
project_root=config['paths']['project_root'],
virtualenv_dir=config['paths']['virtualenv_dir'],
)
return paths
[docs]def check_output_dir(outfile, makedirs=True):
""" Function to check if the directory for an output file exists
This function will check whether the output directory containing the
outfile specified exists. If the output directory doesn't exist, then
there is an option to create the output directory. Otherwise, this
function will raise an IOError.
Parameters
----------
outfile : str
Path to output file.
makedirs : bool, optional
Option to create the output directory containing the output file if
it doesn't already exist (default: True)
Returns
-------
None
"""
# Ensure that outfile is an absolute path
outfile = os.path.abspath(outfile)
outdir = os.path.dirname(outfile)
if not os.path.exists(outdir):
if makedirs:
print('The directory {} doesn\'t exist. Creating it...'.format(outdir))
os.makedirs(outdir)
else:
raise IOError('The directory {} doesn\'t exist'.format(outdir))
return
[docs]def partition(seq, size, max_batches=None):
""" Generates partitions of length ``size`` from the iterable ``seq``
Parameters
----------
seq : iterable
Iterable object to be partitioned.
size : int
Number of items to have in each partition.
max_batches : int, optional
Limit the number of partitions to yield (default is to yield all
partitions).
Yields
-------
batch : list
Partition of ``seq`` that is (at most) ``size`` items long.
Examples
--------
>>> from comptools import partition
>>> list(partition(range(10), 3))
[(0, 1, 2), (3, 4, 5), (6, 7, 8), (9,)]
"""
if not isinstance(max_batches, (int, type(None))):
raise TypeError('max_batches must either be an integer or None, '
'got {}'.format(type(max_batches)))
seq_iter = iter(seq)
for num_batches in islice(count(), max_batches):
batch = list(islice(seq_iter, size))
if len(batch) == 0:
return
else:
yield batch
[docs]class ComputingEnvironemtError(Exception):
"""
Custom exception that should be raised when a problem related to the
computing environment is found
"""
pass
[docs]def get_energybins(config='IC86.2012'):
from .binning import get_energybins as get_ebins
return get_ebins(config=config)
LABEL_DICT = {'reco_log_energy': '$\log_{10}(E_{\mathrm{reco}}/\mathrm{GeV})$',
'lap_log_energy': '$\log_{10}(E_{\mathrm{Lap}}/\mathrm{GeV})$',
'log_s50': '$\log_{10}(S_{\mathrm{50}})$',
'log_s80': '$\log_{10}(S_{\mathrm{80}})$',
'log_s125': '$\log_{10}(S_{\mathrm{125}})$',
'log_s180': '$\log_{10}(S_{\mathrm{180}})$',
'log_s250': '$\log_{10}(S_{\mathrm{250}})$',
'log_s500': '$\log_{10}(S_{\mathrm{500}})$',
'lap_rlogl': '$r\log_{10}(l)$',
'lap_beta': 'lap beta',
'InIce_log_charge_1_60': 'InIce charge',
'InIce_log_charge_1_45': 'InIce charge (top 75\%)',
'InIce_charge_1_30': 'InIce charge (top 50\%)',
'InIce_log_charge_1_30': '$\log_{10}(InIce charge (top 50))$',
'InIce_log_charge_1_15': 'InIce charge (top 25\%)',
'InIce_log_charge_1_6': 'InIce charge (top 10\%)',
'reco_cos_zenith': '$\cos(\\theta_{\mathrm{reco}})$',
'lap_cos_zenith': '$\cos(\\theta)$',
'LLHlap_cos_zenith': '$\cos(\\theta_{\mathrm{Lap}})$',
'LLHLF_cos_zenith': '$\cos(\\theta_{\mathrm{LLH+COG}})$',
'lap_chi2': '$\chi^2_{\mathrm{Lap}}/\mathrm{n.d.f}$',
'NChannels_1_60': 'NChannels',
'NChannels_1_45': 'NChannels (top 75\%)',
'NChannels_1_30': 'NChannels (top 50\%)',
'NChannels_1_15': 'NChannels (top 25\%)',
'NChannels_1_6': 'NChannels (top 10\%)',
'log_NChannels_1_30': '$\log_{10}$(NChannels (top 50\%))',
'StationDensity': 'StationDensity',
'charge_nchannels_ratio': 'Charge/NChannels',
'stationdensity_charge_ratio': 'StationDensity/Charge',
'NHits_1_30': 'NHits',
'log_NHits_1_30': '$\log_{10}$(NHits (top 50\%))',
'charge_nhits_ratio': 'Charge/NHits',
'nhits_nchannels_ratio': 'NHits/NChannels',
'stationdensity_nchannels_ratio': 'StationDensity/NChannels',
'stationdensity_nhits_ratio': 'StationDensity/NHits',
'llhratio': 'llhratio',
'n_he_stoch_standard': 'Num HE stochastics (standard)',
'n_he_stoch_strong': 'Num HE stochastics (strong)',
'eloss_1500_standard': 'dE/dX (standard)',
'log_dEdX': '$\mathrm{\log_{10}(dE/dX)}$',
'eloss_1500_strong': 'dE/dX (strong)',
'num_millipede_particles': '$N_{\mathrm{mil}}$',
'avg_inice_radius': '$\mathrm{\langle R_{\mu} \\rangle }$',
'invqweighted_inice_radius_1_60': '$\mathrm{R_{\mu \ bundle}}$',
'avg_inice_radius_1_60': '$\mathrm{R_{\mu \ bundle}}$',
'avg_inice_radius_Laputop': '$R_{\mathrm{core, Lap}}$',
'FractionContainment_Laputop_InIce': '$C_{\mathrm{IC}}$',
'FractionContainment_Laputop_IceTop': '$C_{\mathrm{IT}}$',
'max_inice_radius': '$R_{\mathrm{max}}$',
'invcharge_inice_radius': '$R_{\mathrm{q,core}}$',
'lap_zenith': 'zenith',
'NStations': 'NStations',
'IceTop_charge': 'IT charge',
'IceTop_charge_175m': 'Signal greater 175m',
'log_IceTop_charge_175m': '$\log_{10}(Q_{IT, 175})$',
'IT_charge_ratio': 'IT charge ratio',
'refit_beta': '$\mathrm{\\beta_{refit}}$',
'log_d4r_peak_energy': '$\mathrm{\log_{10}(E_{D4R})}$',
'log_d4r_peak_sigma': '$\mathrm{\log_{10}(\sigma E_{D4R})}$',
'd4r_N': 'D4R N',
'median_inice_radius': 'Median InIce',
'IceTopLLHRatio': 'IceTopLLHRatio',
}
[docs]def get_training_features(feature_list=None):
# Features used in the 3-year analysis
if feature_list is None:
feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX']
# feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'avg_inice_radius']
# feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'log_d4r_peak_energy', 'log_d4r_peak_sigma']
# feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'median_inice_radius', 'd4r_peak_energy']
# feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'FractionContainment_Laputop_InIce']
# feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'avg_inice_radius']
# feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'max_inice_radius']
# feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'avg_inice_radius']
dom_numbers = [1, 15, 30, 45, 60]
for min_DOM, max_DOM in zip(dom_numbers[:-1], dom_numbers[1:]):
key = 'NChannels_{}_{}'.format(min_DOM, max_DOM)
label = 'NChannels {} {}'.format(min_DOM, max_DOM)
LABEL_DICT[key] = label
min_DOM, max_DOM = 1, 60
key = 'NChannels_{}_{}'.format(min_DOM, max_DOM)
label = 'NChannels {} {}'.format(min_DOM, max_DOM)
LABEL_DICT[key] = label
for min_DOM, max_DOM in zip(dom_numbers[:-1], dom_numbers[1:]):
key = 'NHits_{}_{}'.format(min_DOM, max_DOM)
label = 'NHits {} {}'.format(min_DOM, max_DOM)
LABEL_DICT[key] = label
min_DOM, max_DOM = 1, 60
key = 'NHits_{}_{}'.format(min_DOM, max_DOM)
label = 'NHits {} {}'.format(min_DOM, max_DOM)
LABEL_DICT[key] = label
min_dists = np.arange(0, 1125, 125)
for min_dist in min_dists:
key = 'IceTop_charge_beyond_{}m'.format(min_dist)
LABEL_DICT[key] = 'IT Q > {}m'.format(min_dist)
feature_labels = [LABEL_DICT[feature] for feature in feature_list]
return feature_list, feature_labels