Source code for comptools.base


from collections import namedtuple
import os
from functools import wraps
from itertools import islice, count
import numpy as np
import yaml


[docs]def requires_icecube(func): """Decorator to wrap functions that require any icecube software """ @wraps(func) def wrapper(*args, **kwargs): try: import icecube except ImportError: message = ('The function {} requires icecube software. ' 'Make sure the env-shell.sh script has been ' 'run.'.format(func.__name__)) raise ImportError(message) return func(*args, **kwargs) return wrapper
[docs]def get_config_paths(): """ Function to return paths used in this analysis Specifically, metaproject: Path to IceCube metaproject being used comp_data_dir: Path to where data and simulation is stored condor_data_dir: Path to where HTCondor error and output files are stored condor_scratch_dir: Path to where HTCondor log and submit files are stored figures_dir: Path to where figures are saved project_root: Path to where cr-composition project is located Returns ------- paths : collections.namedtuple Namedtuple containing relavent paths (e.g. figures_dir is where figures will be saved, condor_data_dir is where data/simulation will be saved to / loaded from, etc). """ here = os.path.abspath(os.path.dirname(__file__)) yaml_file = os.path.join(here, os.pardir, 'config.yml') with open(yaml_file, 'r') as f: config = yaml.load(f.read()) or {} # Create path namedtuple object path_names = ['metaproject', 'comp_data_dir', 'condor_data_dir', 'condor_scratch_dir', 'figures_dir', 'project_root', 'virtualenv_dir', ] PathObject = namedtuple('PathType', path_names) # Create instance of PathObject with appropriate path information paths = PathObject(metaproject=config['paths']['metaproject'], comp_data_dir=config['paths']['comp_data_dir'], condor_data_dir=config['paths']['condor_data_dir'], condor_scratch_dir=config['paths']['condor_scratch_dir'], figures_dir=config['paths']['figures_dir'], project_root=config['paths']['project_root'], virtualenv_dir=config['paths']['virtualenv_dir'], ) return paths
[docs]def check_output_dir(outfile, makedirs=True): """ Function to check if the directory for an output file exists This function will check whether the output directory containing the outfile specified exists. If the output directory doesn't exist, then there is an option to create the output directory. Otherwise, this function will raise an IOError. Parameters ---------- outfile : str Path to output file. makedirs : bool, optional Option to create the output directory containing the output file if it doesn't already exist (default: True) Returns ------- None """ # Ensure that outfile is an absolute path outfile = os.path.abspath(outfile) outdir = os.path.dirname(outfile) if not os.path.exists(outdir): if makedirs: print('The directory {} doesn\'t exist. Creating it...'.format(outdir)) os.makedirs(outdir) else: raise IOError('The directory {} doesn\'t exist'.format(outdir)) return
[docs]def partition(seq, size, max_batches=None): """ Generates partitions of length ``size`` from the iterable ``seq`` Parameters ---------- seq : iterable Iterable object to be partitioned. size : int Number of items to have in each partition. max_batches : int, optional Limit the number of partitions to yield (default is to yield all partitions). Yields ------- batch : list Partition of ``seq`` that is (at most) ``size`` items long. Examples -------- >>> from comptools import partition >>> list(partition(range(10), 3)) [(0, 1, 2), (3, 4, 5), (6, 7, 8), (9,)] """ if not isinstance(max_batches, (int, type(None))): raise TypeError('max_batches must either be an integer or None, ' 'got {}'.format(type(max_batches))) seq_iter = iter(seq) for num_batches in islice(count(), max_batches): batch = list(islice(seq_iter, size)) if len(batch) == 0: return else: yield batch
[docs]class ComputingEnvironemtError(Exception): """ Custom exception that should be raised when a problem related to the computing environment is found """ pass
[docs]def get_energybins(config='IC86.2012'): from .binning import get_energybins as get_ebins return get_ebins(config=config)
LABEL_DICT = {'reco_log_energy': '$\log_{10}(E_{\mathrm{reco}}/\mathrm{GeV})$', 'lap_log_energy': '$\log_{10}(E_{\mathrm{Lap}}/\mathrm{GeV})$', 'log_s50': '$\log_{10}(S_{\mathrm{50}})$', 'log_s80': '$\log_{10}(S_{\mathrm{80}})$', 'log_s125': '$\log_{10}(S_{\mathrm{125}})$', 'log_s180': '$\log_{10}(S_{\mathrm{180}})$', 'log_s250': '$\log_{10}(S_{\mathrm{250}})$', 'log_s500': '$\log_{10}(S_{\mathrm{500}})$', 'lap_rlogl': '$r\log_{10}(l)$', 'lap_beta': 'lap beta', 'InIce_log_charge_1_60': 'InIce charge', 'InIce_log_charge_1_45': 'InIce charge (top 75\%)', 'InIce_charge_1_30': 'InIce charge (top 50\%)', 'InIce_log_charge_1_30': '$\log_{10}(InIce charge (top 50))$', 'InIce_log_charge_1_15': 'InIce charge (top 25\%)', 'InIce_log_charge_1_6': 'InIce charge (top 10\%)', 'reco_cos_zenith': '$\cos(\\theta_{\mathrm{reco}})$', 'lap_cos_zenith': '$\cos(\\theta)$', 'LLHlap_cos_zenith': '$\cos(\\theta_{\mathrm{Lap}})$', 'LLHLF_cos_zenith': '$\cos(\\theta_{\mathrm{LLH+COG}})$', 'lap_chi2': '$\chi^2_{\mathrm{Lap}}/\mathrm{n.d.f}$', 'NChannels_1_60': 'NChannels', 'NChannels_1_45': 'NChannels (top 75\%)', 'NChannels_1_30': 'NChannels (top 50\%)', 'NChannels_1_15': 'NChannels (top 25\%)', 'NChannels_1_6': 'NChannels (top 10\%)', 'log_NChannels_1_30': '$\log_{10}$(NChannels (top 50\%))', 'StationDensity': 'StationDensity', 'charge_nchannels_ratio': 'Charge/NChannels', 'stationdensity_charge_ratio': 'StationDensity/Charge', 'NHits_1_30': 'NHits', 'log_NHits_1_30': '$\log_{10}$(NHits (top 50\%))', 'charge_nhits_ratio': 'Charge/NHits', 'nhits_nchannels_ratio': 'NHits/NChannels', 'stationdensity_nchannels_ratio': 'StationDensity/NChannels', 'stationdensity_nhits_ratio': 'StationDensity/NHits', 'llhratio': 'llhratio', 'n_he_stoch_standard': 'Num HE stochastics (standard)', 'n_he_stoch_strong': 'Num HE stochastics (strong)', 'eloss_1500_standard': 'dE/dX (standard)', 'log_dEdX': '$\mathrm{\log_{10}(dE/dX)}$', 'eloss_1500_strong': 'dE/dX (strong)', 'num_millipede_particles': '$N_{\mathrm{mil}}$', 'avg_inice_radius': '$\mathrm{\langle R_{\mu} \\rangle }$', 'invqweighted_inice_radius_1_60': '$\mathrm{R_{\mu \ bundle}}$', 'avg_inice_radius_1_60': '$\mathrm{R_{\mu \ bundle}}$', 'avg_inice_radius_Laputop': '$R_{\mathrm{core, Lap}}$', 'FractionContainment_Laputop_InIce': '$C_{\mathrm{IC}}$', 'FractionContainment_Laputop_IceTop': '$C_{\mathrm{IT}}$', 'max_inice_radius': '$R_{\mathrm{max}}$', 'invcharge_inice_radius': '$R_{\mathrm{q,core}}$', 'lap_zenith': 'zenith', 'NStations': 'NStations', 'IceTop_charge': 'IT charge', 'IceTop_charge_175m': 'Signal greater 175m', 'log_IceTop_charge_175m': '$\log_{10}(Q_{IT, 175})$', 'IT_charge_ratio': 'IT charge ratio', 'refit_beta': '$\mathrm{\\beta_{refit}}$', 'log_d4r_peak_energy': '$\mathrm{\log_{10}(E_{D4R})}$', 'log_d4r_peak_sigma': '$\mathrm{\log_{10}(\sigma E_{D4R})}$', 'd4r_N': 'D4R N', 'median_inice_radius': 'Median InIce', 'IceTopLLHRatio': 'IceTopLLHRatio', }
[docs]def get_training_features(feature_list=None): # Features used in the 3-year analysis if feature_list is None: feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX'] # feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'avg_inice_radius'] # feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'log_d4r_peak_energy', 'log_d4r_peak_sigma'] # feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'median_inice_radius', 'd4r_peak_energy'] # feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'FractionContainment_Laputop_InIce'] # feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'avg_inice_radius'] # feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'max_inice_radius'] # feature_list = ['lap_cos_zenith', 'log_s125', 'log_dEdX', 'avg_inice_radius'] dom_numbers = [1, 15, 30, 45, 60] for min_DOM, max_DOM in zip(dom_numbers[:-1], dom_numbers[1:]): key = 'NChannels_{}_{}'.format(min_DOM, max_DOM) label = 'NChannels {} {}'.format(min_DOM, max_DOM) LABEL_DICT[key] = label min_DOM, max_DOM = 1, 60 key = 'NChannels_{}_{}'.format(min_DOM, max_DOM) label = 'NChannels {} {}'.format(min_DOM, max_DOM) LABEL_DICT[key] = label for min_DOM, max_DOM in zip(dom_numbers[:-1], dom_numbers[1:]): key = 'NHits_{}_{}'.format(min_DOM, max_DOM) label = 'NHits {} {}'.format(min_DOM, max_DOM) LABEL_DICT[key] = label min_DOM, max_DOM = 1, 60 key = 'NHits_{}_{}'.format(min_DOM, max_DOM) label = 'NHits {} {}'.format(min_DOM, max_DOM) LABEL_DICT[key] = label min_dists = np.arange(0, 1125, 125) for min_dist in min_dists: key = 'IceTop_charge_beyond_{}m'.format(min_dist) LABEL_DICT[key] = 'IT Q > {}m'.format(min_dist) feature_labels = [LABEL_DICT[feature] for feature in feature_list] return feature_list, feature_labels