import logging
logger = logging.getLogger(__name__)
import warnings
import numpy as np
import operator
import sys
from copy import copy,deepcopy
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from inspect import signature
import h5py as h5
from .axes import Axes,Axis
from .healpix_axis import HealpixAxis
from sparse import DOK, COO, GCXS, SparseArray
import astropy.units as u
from mhealpy import HealpixMap
[docs]class Histogram(object):
"""
This is a wrapper of a numpy array with axes and a fill method. Sparse array from
pydata's sparse package are also supported.
Like an array, the histogram can have an arbitrary number of dimensions.
Standard numpy array indexing is supported to get the contents
--i.e. :code:`h[:]`, :code:`h[4]`, :code:`h[[1,3,4]]`, :code:`h[:,5:50:2]]`,
etc.--. However, the meaning of the :code:`-1` index is different. Instead of
counting from the end, :code:`-1` corresponds to the underflow bin. Similarly,
an index equal to the number of bins corresponds to the overflow bin.
You can however give relative position with respect to :code:`h.end`
--e.g. :code:`h[0:h.end]` result in all regular bins, :code:`h[-1:h.end+1]`
includes also the underflow/overflow bins and :code:`h[h.end]` gives you the
contents of the overflow bin. The convenient aliases :code:`h.uf = -1`,
:code:`h.of = e.end` and :code:`h.all = slice(-1,h.end+1)` are provided.
You can also use an :code:`Ellipsis` object (:code:`...`) at the end to specify that the
contents from the rest of the dimension are to have the under and overflow
bins included. e.g. for a 3D histogram :code:`h[1,-1:h.end+1,-1:h.end+1]
= h[1,...]`. :code:`h[:]` returns all contents without under/overflow bins and
h[...] returns everything, including those special bins.
If :code:`sumw2` is not :code:`None`, then the histogram will keep track of
the sum of the weights squared --i.e. you better use this if you are using
weighted data and are concern about error bars--. You can access these with
:code:`h.sumw2[item]`, where `item` is interpreted the same was a in :code:`h[item]`.
:code:`h.bin_error[item]` return the :code:`sqrt(sumw2)` (or :code:`sqrt(contents)` is :code:`sumw2
was not specified`).
The operators :code:`+`, :code:`-`, :code:`*` and :code:`-` are available.
Both other operand can be a histograms, a scalar or an array of appropiate size.
Note that :code:`h += h0` is more efficient than
:code:`h = h + h0` since latter involves the instantiation of a new histogram.
Args:
edges (Axes or array): Definition of bin edges, Anything that can
be processes by Axes. Lower edge value is included in the bin,
upper edge value is excluded.
contents (array or SparseArray): Initialization of histogram contents. Might or might
not include under/overflow bins. Initialize to 0 by default.
sumw2 (None, bool or array): If True, it will keep track of the sum of
the weights squared. You can also initialize them with an array
labels (array of str): Optionally label the axes for easier indexing
axis_scale (str or array): Bin center mode e.g. `"linear"` or `"log"`.
See ``Axis.axis_scale``.
sparse (bool): Initialize an empty sparse histogram. Only relevant if no contents
are provided.
"""
def __init__(self, edges, contents = None, sumw2 = None,
labels=None, axis_scale = None, sparse = None, unit = None):
self._axes = Axes(edges, labels=labels, axis_scale = axis_scale)
# Standarize contents (with under/overflow) or initialize them to zero.
if contents is not None:
# Deal with units
if isinstance(contents, u.Quantity):
if unit is None:
unit = contents._unit
contents = contents.value
else:
contents = contents.to_value(unit)
# Sparse
self._sparse = isinstance(contents, SparseArray)
# Shape
if np.array_equal(self.axes.nbins+2, np.shape(contents)):
# Includes under and overflow
if self.is_sparse:
self._contents = contents
else:
self._contents = np.array(contents)
elif np.array_equal(self.axes.nbins, np.shape(contents)):
# Missing under and overflow, but right shape.
# Adding empty under/overflow bins
self._contents = np.pad(contents, 1)
else:
raise ValueError("Bins ({}) - contents {} size mismatch".format(self.axes.nbins,np.shape(contents)))
else:
self._sparse = sparse
contents_shape = [n+2 for n in self.axes.nbins]
if self.is_sparse:
self._contents = DOK(contents_shape, fill_value = 0.)
else:
self._contents = np.zeros(contents_shape)
# slice does not seems to work well with GCXS, force COO for now
# Check https://github.com/pydata/sparse/issues/550
if isinstance(self._contents, GCXS):
self._contents = self._contents.asformat('coo')
# Units
if unit is not None:
self._unit = u.Unit(unit)
else:
self._unit = None
#Check if we'll keep track of the sum of the weights
# Use same units as main histogram.
w2units = None
if self.unit is not None:
w2units = self.unit*self.unit
if sumw2 is None or sumw2 is False:
self._sumw2 = None
elif sumw2 is True:
self._sumw2 = Histogram(self._axes,
sparse = self.is_sparse,
unit = w2units)
elif isinstance(sumw2, Histogram):
if self.axes != sumw2.axes:
raise ValueError("Is sumw2 is a Histogram is needs to have "
"consistent axes")
self._sumw2 = sumw2
self._sumw2.to(w2units)
else:
self._sumw2 = Histogram(self._axes, sumw2, unit = w2units)
if self._sumw2 is not None and self.is_sparse != self._sumw2.is_sparse:
raise ValueError("Both contents and sumw2 must be either "
"sparse or dense")
# Special access methods
self.sumw2 = None if self._sumw2 is None else self._get_sumw2(self)
self.bin_error = self._get_bin_error(self)
self.slice = self._slice(self)
@property
def unit(self):
return self._unit
[docs] def to(self, unit, equivalencies=[], update = True, copy = True):
"""
Convert to other units.
Args:
unit (unit-like): Unit to convert to.
equivalencies (list or tuple): A list of equivalence pairs to try if the units are not
directly convertible.
update (bool): If ``update`` is ``False``, only the units will be changed without
updating the contents accordingly
copy (bool): If True (default), then the value is copied. Otherwise, a copy
will only be made if necessary.
"""
# Copy
if copy:
new = deepcopy(self)
else:
new = self
# If no conversion is needed
if not update:
if unit is None:
new._unit = None
else:
new._unit = u.Unit(unit)
if new.sumw2 is not None:
new._sumw2._unit = unit if unit is None else unit*unit
return new
# Compute factor
if new.unit is None:
if unit is None or unit == u.dimensionless_unscaled:
factor = 1
else:
TypeError("Histogram without units")
else:
factor = new.unit.to(unit, equivalencies = equivalencies)
# Update values
new *= factor
# Update units
new._unit = unit
# Update sumw2
if new._sumw2 is not None:
new._sumw2 *= factor*factor
new._sumw2._unit = unit*unit
return new
@property
def is_sparse(self):
"""
Return True if the underlaying histogram contents are hold in a sparse array. False if it is a dense array.
"""
return self._sparse
[docs] def to_dense(self):
"""
Return a dense representation of a sparse histogram
"""
h_dense = deepcopy(self)
if h_dense.is_sparse:
h_dense._contents = self.full_contents.todense()
if h_dense._sumw2 is not None:
h_dense._sumw2 = h_dense._sumw2.to_dense()
h_dense._sparse = False
return h_dense
# pydata sparse style
todense = to_dense
[docs] def to_sparse(self):
"""
Return a sparse representation of a histogram.
"""
h_sparse = deepcopy(self)
if not h_sparse.is_sparse:
h_sparse._contents = COO.from_numpy(self.full_contents)
if h_sparse._sumw2 is not None:
h_sparse._sumw2 = self._sumw2.to_sparse()
h_sparse._sparse = True
return h_sparse
# pydata sparse style
todense = to_dense
[docs] @classmethod
def concatenate(cls, edges, histograms, label = None):
"""
Generate a Histogram from a list of histograms. The axes of all input
histograms must be equal, and the new histogram will have one more
dimension than the input. The new axis has index 0.
Args:
edges (Axes or array): Definition of bin edges of the new dimension
histograms (list of Histogram): List of histogram to fill contents.
Might or might not include under/overflow bins.
labels (str): Label the new dimension
Return:
Histogram
"""
# Check all same axes
old_axes = histograms[0].axes
for hist in histograms:
if hist.axes != old_axes:
raise ValueError("The axes of all input histogram must equal")
# Check new axis matches number of histograms,
# with or without under/overflow
new_axis = Axis(edges, label = label)
if len(histograms) == new_axis.nbins:
padding = [(1,1)]+[(0,0)]*old_axes.ndim
elif len(histograms) == new_axis.nbins + 2:
padding = [(0,0)]+[(0,0)]*old_axes.ndim
else:
raise ValueError("Mismatch between number of bins and "
"number of histograms")
# Create new axes and new contents
new_axes = Axes([new_axis] + [ax for ax in old_axes])
# Check all sparse or all dense
is_sparse = [h.is_sparse for h in histograms]
if not np.all(is_sparse) and np.any(is_sparse):
raise ValueError("Cannot concatenate a mix of sparse and dense histograms")
# Unit conversion factor
unit = histograms[0].unit
if unit is None:
# No factors, just check all histogram are the same
for h in histograms[1:]:
if h.unit is not None:
raise ValueError("Cannot concatenate maps with and without units.")
unit_factor = 1
else:
unit_factor = [1]
for h in histograms[1:]:
if h.unit is None:
raise ValueError("Cannot concatenate maps with and without units.")
else:
unit_factor += [h.unit.to(unit)]
if np.all(unit_factor == 1):
unit_factor = 1
# Update contents, with padding and units if needed
if unit_factor == 1:
contents = np.concatenate([h.full_contents[None] for h in histograms])
else:
contents = np.concatenate([factor*h.full_contents[None]
for factor,h in zip(unit_factor,histograms)])
contents = np.pad(contents, padding)
# New sumw2
has_sumw2 = [h.sumw2 is not None for h in histograms]
if np.all(has_sumw2):
if unit_factor == 1:
sumw2 = np.concatenate([h._sumw2.full_contents[None] for h in histograms])
else:
sumw2 = np.concatenate([factor*factor*h.sumw2_.full_contents[None]
for factor,h in zip(unit_factor,histograms)])
sumw2 = np.pad(sumw2, padding)
else:
sumw2 = None
if np.any(has_sumw2):
# A mix. Drop and warn
logger.warning("Not all input histogram have sum of weights "
"squared. sumw2 will be dropped")
return Histogram(new_axes, contents, sumw2, unit = unit)
@property
def ndim(self):
return self._axes.ndim
[docs] def clear(self):
"""
Set all counts to 0
"""
self[...] = 0
if self._sumw2 is not None:
self._sumw2.clear()
def __eq__(self, other):
# Histogram is completely defined by axes and contents
return (self._axes == other._axes
and
np.all(self.full_contents == other.full_contents)
and
self.unit == other.unit
and
self._sumw2 == other._sumw2)
def __array__(self):
return np.array(self.contents)
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
if len(inputs) != 1 or inputs[0] is not self:
raise NotImplementedError(
"Only numpy functions with a single operand are supported "
"e.g. np.sin(h), np.sum(h). Call h.contents explicitly to "
"get the corresponding array e.g. np.add(h.contents, a)")
return ufunc(self.contents, *inputs[1:], **kwargs)
def __array_function__(self, func, types, args, kwargs):
# Only support basic functions on a single operand like np.sum(h)
if len(args) != 1 or args[0] is not self:
raise NotImplementedError(
"Only numpy functions with a single operand are supported "
"e.g. np.sin(h), np.sum(h). Call h.contents explicitly to "
"get the corresponding array e.g. np.add(h.contents, a)")
return func(self.contents, *args[1:], **kwargs)
class _NBINS():
'''
Convenience class that will expand to the number of bins of a
given dimension.
The trick is to overload the -/+ operators such than
h.end +/- offset (h being an instance of Histogram and 'end' an
static instance of Histogram._NBINS) returns an instance of _NBINS
itself, which stores the offset. The [] operator can then detect that
the input is an instance of _NBINS and convert it into an integer with
respect to the size of the appropiate axis.
'''
def __init__(self, offset = 0):
self.offset = offset
def __add__(self, offset):
return self.__class__(offset)
def __sub__(self, offset):
return self + (-offset)
end = _NBINS()
# Convenient aliases
uf = -1
of = end
all = slice(-1, end+1)
def _prepare_indices(self, indices):
'''
Prepare indices for it use in __getitem__ and __setitem__
--i.e. [] overloading --
See class help.
'''
def _prepare_index(index, dim):
'''
Modify index for a single axis to account for under/overflow,
as well as to catch instances of _NBINS (see description above)
This depend on the number of bins in the axis of a given
dimension (dim)
'''
if isinstance(index,slice):
# Both start and stop can be either None, an instance of
# _NBINS or an integer
index = slice(1
if index.start is None
else
self.axes[dim].nbins + index.start.offset + 1
if isinstance(index.start, self._NBINS)
else
index.start+1,
self.axes[dim].nbins+1
if index.stop is None
else
self.axes[dim].nbins + index.stop.offset + 1
if isinstance(index.stop, self._NBINS)
else
index.stop+1,
index.step)
# Check bounds. Note index is the _contents index at this point
if index.start < 0 or index.stop > self.axes[dim].nbins + 2:
raise IndexError("Bin index out of bounds")
return index
elif isinstance(index, (np.integer, int)):
if index < -1 or index > self.axes[dim].nbins:
raise IndexError("Bin index out of bounds")
return index+1
elif isinstance(index, self._NBINS):
# Referece with respect to nbins
return _prepare_index(self.axes[dim].nbins + index.offset, dim)
elif isinstance(index, (np.ndarray, list, tuple, range)):
# Note: this will return a copy, not a view
# Handle references with respecto to nbins
index = np.array(index)
if index.dtype == bool:
# Mask. Pad under/overflow
return np.pad(index, 1, constant_values = False)
else:
# Indices
index_shape = index.shape
index = index.flatten()
index = [self.axes[dim].nbins + i.offset + 1
if isinstance(i,self._NBINS) else
i + 1
for i in index]
index = np.reshape(index, index_shape)
# Check bounds. Note index is the _contents index at this point/
if (np.any(index < 0) or
np.any(index > self.axes[dim].nbins + 1)):
raise IndexError("Bin index out of bounds")
return index
else:
raise TypeError("Index can only be an int, slice, list or array")
if isinstance(indices, tuple):
# Get the rest of the dimensions with under/overflow user used ...
if indices[-1] is Ellipsis:
extra_indices = tuple(slice(-1, self.axes[dim].nbins+1)
for dim in range(len(indices)-1, self.ndim))
indices = self._prepare_indices(indices[:-1] + extra_indices)
return indices
# Standard way. All other ways end up here after recursion
indices = tuple(_prepare_index(index, dim)
for dim,index in enumerate(indices))
# Remove under/overflow of the rest of the dimensions
indices += tuple(_prepare_index(slice(None), dim) for dim in
range(len(indices), self.ndim))
return indices
elif isinstance(indices, dict):
return self._prepare_indices(self.axes.expand_dict(indices, default = slice(None)))
else:
# Single axis
return self._prepare_indices(tuple([indices]))
# Access methods
# DO NOT use _contents directly, all goes through here
# to handle sparse arrays correctly. Specifically, do not
# to operations on self._contents (*,+,slice, concatenate, etc.)
# since DOK is very inneficient. Similarly, COO is readonly,
# so trying to use __setitem__ on it will raise an error.
# Assignments to _contents iteself are fine (e.g. self._contents = X)
def __setitem__(self, indices, new_contents):
# Make it mutable if needed
if self.is_sparse:
self._contents = self._contents.asformat('dok')
indices = self._prepare_indices(indices)
try:
self._contents[indices] = self._strip_units(new_contents)
except (IndexError, ValueError) as e:
if self.is_sparse:
# Check if advance indexing is being used
for index in indices:
if isinstance(index, np.ndarray):
logger.warning(
"Advance indexing is not yet be fully supported "
"for sparse arrays. See "
"https://github.com/pydata/sparse/issues/1 and "
"https://github.com/pydata/sparse/issues/114")
break
raise e
@property
def _getitem_units(self):
# Usually, if the histogram has units, [] and interp would return units.
# Unfortunatenly SparseMatrix is not compatible with Quantity, so in sparse
# mode only the values is returned
return not (self.unit is None or self.is_sparse)
def __getitem__(self, indices):
# Go back to efficient read-only if needed
if self.is_sparse:
self._contents = self._contents.asformat('coo')
indices = self._prepare_indices(indices)
try:
if self._getitem_units:
return self._contents[indices]*self.unit
else:
return self._contents[indices]
except IndexError as e:
if self.is_sparse:
# Check if advance indexing is being used
for index in indices:
if isinstance(index, np.ndarray):
logger.warning(
"Advance indexing is not yet be fully supported "
"for sparse arrays. See "
"https://github.com/pydata/sparse/issues/1 and "
"https://github.com/pydata/sparse/issues/114")
break
raise e
class _special_getitem:
"""
This allows to use regular indexing for special access methods.
e.g. h.sumw2[] and h.bin_error[]
"""
def __init__(self, hist):
self._hist = hist
def __getitem__(self, item):
raise NotImplmentedError
def __array__(self):
return self.contents
@property
def contents(self):
return self[:]
@property
def full_contents(self):
return self[...]
class _get_sumw2(_special_getitem):
"""
Return the sum fo the weights squares. If sumw2 is not stored, then
it assumed all the weights of all entries equal 1.
"""
def __getitem__(self, item):
return self._hist._sumw2[item]
def __setitem__(self, indices, new_sumw2):
if self._hist._sumw2 is None:
raise ValueError("Histogram does not have sumw2")
self._hist._sumw2[indices] = new_sumw2
class _get_bin_error(_special_getitem):
"""
Return the sqrt of sumw2
"""
def __getitem__(self, item):
if self._hist._sumw2 is not None:
return np.sqrt(self._hist._sumw2[item])
else:
if self._hist._getitem_units:
# Fake units
return np.sqrt(np.abs(self._hist[item].value))*self._hist.unit
else:
return np.sqrt(np.abs(self._hist[item]))
return np.sqrt(np.abs(self._hist[item]))
def __setitem__(self, indices, new_bin_error):
if self._hist._sumw2 is None:
raise ValueError("Histogram does not have sumw2")
self._hist._sumw2[indices] = np.power(new_bin_error, 2)
@property
def contents(self):
"""
Equivalent to :code:`h[:]`. Does not include under and overflow bins.
"""
return self[:]
@property
def full_contents(self):
"""
Equivalent to :code:`h[...]`. Includes all under and overflow bins.
"""
return self[...]
@property
def axes(self):
"""
Underlaying axes object
"""
return self._axes
@property
def axis(self):
"""
Equivalent to :code:`self.axes[0]`, but fails if :code:`ndim > 1`
"""
if self.ndim > 1:
raise ValueError("Property 'axis' can only be used with 1D "
"histograms. Use `axes` for multidimensional "
"histograms")
return self.axes[0]
@axis.setter
def axis(self, new):
self.axes[0] = new
if self._sumw2 is not None:
self._sumw2.axis = new
@property
def nbins(self):
if self.ndim == 1:
return self.axes[0].nbins
else:
return self.axes.nbins
[docs] def interp(self, *values):
"""
Get a linearly interpolated content for a given set of values
along each axes. The bin contents are assigned to the center of the bin.
Args:
values (float or array): Coordinates within the axes to interpolate.
Must have the same size as `ndim`. Input values as
`(1,2,3)` or `([1,2,3])`
Return:
float
"""
bins,weights = self._axes.interp_weights(*values)
if self._getitem_units:
content = 0*self.unit
else:
content = 0
if bins.ndim == 1:
# Single point case
for bin,weight in zip(bins, weights):
content += weight*self[bin]
else:
# Multi
for bin,weight in zip(bins, weights):
# Each b can be a tuple
bin = np.array(bin, ndmin = 1)
bin_values = np.reshape([self[b] for b in bin.flatten()],
bin.shape)
content += weight*bin_values
return content
def find_bin(self, *args, **kwargs):
return self.axes.find_bin(*args, **kwargs)
def _strip_units(self, quantity):
if isinstance(quantity, u.Quantity):
if quantity.unit == u.dimensionless_unscaled:
return quantity.value
if self.unit is None:
return u.UnitConversionError("Histogram without units")
return quantity.to_value(self.unit)
elif isinstance(quantity, u.UnitBase):
if quantity == u.dimensionless_unscaled:
# Do no crash is self.unit is None
return 1
return quantity.to(self.unit)
else:
if self.unit is not None:
raise u.UnitConversionError("Specify units")
return quantity
[docs] def fill(self, *values, weight = None):
'''
And an entry to the histogram. Can be weighted.
Follow same convention as find_bin()
Args:
values (float or array): Value of entry
weight (float): Value weight in histogram. Defaults to 1 in whatever units
the histogram has
Note:
Note that weight needs to be specified explicitely by key, otherwise
it will be considered a value an a IndexError will be thrown.
'''
indices = self.find_bin(*values)
# Standarize is single axis
if not isinstance(indices, tuple):
indices = (indices,)
# Allow for some values with different shape but broadcastable
indices = np.broadcast_arrays(*indices)
# Convert units if needed
if weight is None:
weight = 1
else:
weight = self._strip_units(weight)
# Flatten all
indices = tuple([i.flatten() for i in indices])
weight = np.array(weight).flatten()
# Make it work with multiple entries at one
new_axes = tuple(np.arange(np.ndim(indices), 2))
indices = np.expand_dims(indices, new_axes).transpose()
weight = np.broadcast_to(weight, indices.shape[0])
# Work with _contents directly. More efficient.
# We don't need all the checks from self._prepare_indices
# because these come from find_bin. This is faster.
indices += 1
if self.is_sparse:
# Filling a dict directly. Sparse has a lot of overhead checking
# that the indices are valid in the __setitem__ method, but I'm
# already checking this with _prepare_indices
# This is faster than doing self._contents[i] += w
d_new_contents = {}
for i,w in zip(indices, weight):
# Add if already exists, otherwise create new entry
try:
elem = d_new_contents[tuple(i)]
elem[0] += w
elem[1] += w*w
except KeyError:
d_new_contents[tuple(i)] = [w, w*w]
# Convert dict to sparse array
coords = np.transpose(list(d_new_contents.keys()))
data = np.array(list(d_new_contents.values()))
new_contents = COO(coords = coords, data = data[:,0],
shape = self._contents.shape)
# Add
self._contents = self._contents.asformat('coo')
self._contents += new_contents
if self._sumw2 is not None:
self._sumw2._contents = self._sumw2._contents.asformat('coo')
new_contents_sqr = COO(coords = coords, data = data[:,1],
shape = self._contents.shape)
self._sumw2._contents += new_contents_sqr
else:
# Add directly for dense array
for i,w in zip(indices, weight):
self._contents[tuple(i)] += w
if self._sumw2 is not None:
for i,w in zip(indices, weight):
self._sumw2._contents[tuple(i)] += w*w
[docs] def project(self, *axis):
"""
Return a histogram consisting on a projection of the current one
Args:
axis (int or str or list): axis or axes onto which the
histogram will be projected --i.e. will sum up over the
other dimensiones--. The axes of the new histogram will
have the same order --i.e. you can transpose axes--
Return:
Histogram: Projected histogram
"""
if self.ndim == 1:
raise ValueError("Can't project a 1D histogram. "
"Consider using np.sum(h) or np.sum(h.full_contents) (with under/overflow)")
#Standarize
if len(axis) == 1 and \
isinstance(axis[0], (list, np.ndarray, range, tuple)):
# Got a sequence
axis = axis[0]
axis = self._axes.label_to_index(axis)
if len(np.unique(axis)) != len(axis):
raise ValueError("An axis can't repeat")
# Project
sum_axes = tuple(dim for dim in range(0, self.ndim) if dim not in axis)
if len(sum_axes) == 0:
new_contents = deepcopy(self.full_contents)
else:
new_contents = self.full_contents.sum(axis = sum_axes)
# Transpose the contents to match the order of the axis provided by the
# the user, which are currently sorted
new_contents = np.transpose(new_contents,
axes = np.argsort(np.argsort(axis)))
new_sumw2 = None
if self._sumw2 is not None:
new_sumw2 = self._sumw2.project(axis).full_contents
return Histogram(edges = self._axes[axis],
contents = new_contents,
sumw2 = new_sumw2,
unit = self.unit)
[docs] def clear_overflow(self, axes = None):
"""
Set all overflow bins to 0, including sumw2
Args:
axes (None or array): Axes number or labels. All by default
"""
if axes is None:
axes = range(self.ndim)
elif np.isscalar(axes):
axes = [axes]
for n in axes:
indices = self.expand_dict({n:self.of}, self.all)
self[indices] = 0
if self._sumw2 is not None:
self._sumw2.clear_overflow(axes)
[docs] def clear_underflow(self, axes = None):
"""
Set all overflow bins to 0, including sumw2
Args:
axes (None or array): Axes number or labels. All by default
"""
if axes is None:
axes = range(self.ndim)
elif np.isscalar(axes):
axes = [axes]
for n in axes:
indices = self.expand_dict({n:self.uf}, self.all)
self[indices] = 0
if self._sumw2 is not None:
self._sumw2.clear_underflow(axes)
[docs] def clear_underflow_and_overflow(self, *args, **kwargs):
"""
Set all underflow and overflow bins to 0, including sumw2
Args:
axes (None or array): Axes number or labels. All by default
"""
self.clear_underflow(*args, **kwargs)
self.clear_overflow(*args, **kwargs)
class _slice:
"""
Return a histogram which is a slice of the current one
"""
def __init__(self, hist):
self._hist = hist
def __getitem__(self, item):
# Standarize indices into slices
indices = self._hist._prepare_indices(item)
indices = tuple(slice(i,i+1) if isinstance(i, (int, np.integer)) else i for i in indices)
# Check indices and either pad or use under/overflor
# If under/overflow are including, will use them instead of pads
padding = np.ones([self._hist.ndim, 2], dtype = int)
axes_indices = np.empty(self._hist.ndim, dtype = 'O')
new_nbins = np.empty(self._hist.ndim, dtype = int)
for ndim,(index, axis) in enumerate(zip(indices,
self._hist.axes)):
# Sanity checks
if not isinstance(index, slice):
raise TypeError("Only slices and integers allowed in slice[]")
start,stop,stride = index.indices(axis.nbins+2)
if stride != 1:
raise ValueError("Step must be 1 when getting a slice."
"Alertnatively us rebin().")
if start == stop:
raise ValueError("Slice must have a least one bin in "
"all dimensions. Alternatively use project().")
if start > stop:
raise ValueError("Slices cannot reverse the bin order.")
# Handle under/overflow
axis_start = start # Axes don't have under/overflow
axis_stop = stop
if start == 0:
if stop-start == 1:
raise ValueError("The slice cannot contain only the "
"underflow bin")
axis_start += 1
padding[ndim, 0] = 0
if stop == axis.nbins + 2:
if stop-start == 1:
raise ValueError("The slice cannot contain only the "
"overflow bin")
axis_stop -= 1
padding[ndim, 1] = 0
axes_indices[ndim] = slice(axis_start, axis_stop)
new_nbins[ndim] = stop-start
axes_indices = tuple(axes_indices)
# Get new contents. Pad is underflow/overflow are not included
new_contents = self._hist.full_contents[indices]
new_contents = np.pad(new_contents, padding)
# New Axes
new_axes = []
for axis,index in zip(self._hist.axes,axes_indices):
new_axis_kw = {'edges': axis.edges[index.start-1:index.stop],
'label': axis.label}
if isinstance(axis, HealpixAxis):
new_axis_kw['nside'] = axis.nside
new_axis_kw['scheme'] = axis.scheme
new_axis_kw['coordsys'] = axis.coordsys
else:
new_axis_kw['scale'] = axis.axis_scale
new_axes += [axis.__class__(**new_axis_kw)]
# Sum weights squared
new_sumw2 = None
if self._hist._sumw2 is not None:
new_sumw2 = self._hist._sumw2.slice[item]
# Create new histogram
return Histogram(edges = new_axes,
contents = new_contents,
sumw2 = new_sumw2,
unit = self._hist.unit)
def _unit_operation(self, other, operation):
"""
Get the value part of the other operand and the new unit of the histogram
"""
# Separate between value and unit
if isinstance(other, Histogram):
other_unit = other.unit
other_value = other # It will be handled as a regular HealpixMap
elif isinstance(other, u.Quantity):
other_unit = other.unit
other_value = other.value
elif isinstance(other, u.UnitBase):
other_unit = other
other_value = 1
else:
# float, int, array, list
other_unit = None
other_value = np.array(other)
if self.unit is None and other_unit is None:
# If neither operand have units, do nothing else
return other_value, None
# Adjust other_value and self.unit depending on the operand
# Standarize dimensionless
if other_unit is None:
other_unit = u.dimensionless_unscaled
old_unit = self.unit
if old_unit is None:
old_unit = u.dimensionless_unscaled
# For * and / the conversion factor is stored in the unit itself
# ** only accepts scalar dimensionaless quantities, it will crash anyway
# The idencity operator (for the raterizer) doesn't use the other's units
if operation in [operator.add, operator.iadd,
operator.sub, operator.isub]:
# +, -
# We need to correct the value by the conversion unit
# No change in units
other_value = other_value * other_unit.to(old_unit)
new_unit = old_unit
elif operation in [operator.mul, operator.imul,
operator.truediv, operator.itruediv,
operator.floordiv, operator.ifloordiv]:
# *, /
# The conversion factor is stored in the unit itself
new_unit = operation(old_unit, other_unit)
else:
raise ValueError("Operation not supported")
return other_value, new_unit
def _ioperation(self, other, operation):
sum_operation = operation in [operator.isub, operator.iadd,
operator.sub, operator.add]
product_operation = operation in [operator.imul, operator.itruediv,
operator.mul, operator.truediv]
# Get the value part of other and the new unit the result should have
other, new_unit = self._unit_operation(other, operation)
# Temporarily disable units
self.to(None, update = False, copy = False)
if isinstance(other, Histogram):
# Temporarily disable units
other_unit = other._unit
other.to(None, update = False, copy = False)
# Another histogram, same axes
if self.axes != other.axes:
raise ValueError("Axes mismatch")
new_contents = operation(self.full_contents, other.full_contents)
if self._sumw2 is not None or other._sumw2 is not None:
if self._sumw2 is None or other._sumw2 is None:
logger.warning("Operation between histograms with and "
"without sumw2. Using default.")
if sum_operation:
self._sumw2._contents = self.sumw2.full_contents + other.sumw2.full_contents
elif product_operation:
# Error of either f = A*B or f = A/B is
# f_err^2 = f^2 * ((A_err/A)^2 + (B_err/B)^2)
relvar = (self.sumw2.full_contents/
(self.full_contents*self.full_contents))
other_relvar = (other.sumw2.full_contents/
(other.full_contents*other.full_contents))
self._sumw2._contents = (new_contents*new_contents*
(relvar + other_relvar))
else:
raise ValueError("Operation not supported")
self._contents = new_contents
# Restore units
other.to(other_unit, update = False, copy = False)
else:
# By scalar or array. Can be broadcasted
if np.ndim(other) != 0:
# Array. With or without under/overflow bins
if not isinstance(other, (np.ndarray, SparseArray)):
other = np.array(other)
if other.ndim != self.ndim:
raise ValueError("Operand number of dimensions ({}) does not"
"match number of axes ({})".format(other.ndim,
self.ndim))
pad = []
for n,(length,nbins) in enumerate(zip(other.shape, self.axes.nbins)):
if length == 1 or length == nbins+2:
# Single element axis can be broadcasted as is
# OR array includes under/overflow
pad += [(0,)]
elif length == nbins:
# Missing under and overflow, but right shape.
pad += [(1,)]
else:
raise ValueError("Could not broadcast array of shape {} "
"to histogram with nbins "
"{}".format(other.shape,
self.axes.nbins))
other = np.pad(other, pad)
self._contents = operation(self.full_contents, other)
if self._sumw2 is not None:
if sum_operation:
# sumw2 remains constant if summing/substracting a constant
pass
elif product_operation:
self._sumw2 = operation(operation(self._sumw2, other), other)
else:
raise ValueError("Operation not supported")
# Restore unit
self.to(new_unit, update = False, copy = False)
return self
def _operation(self, other, operation):
new = deepcopy(self)
new._ioperation(other, operation)
return new
def __imul__(self, other):
return self._ioperation(other, operator.imul)
def __mul__(self, other):
return self._operation(other, operator.mul)
def __rmul__(self, other):
return self*other
def __itruediv__(self, other):
return self._ioperation(other, operator.itruediv)
def __truediv__(self, other):
return self._operation(other, operator.truediv)
def __rtruediv__(self, other):
"""
Divide a scalar by the histogram
"""
if not np.isscalar(other):
raise ValueError("Inverse operation can only occur between "
"histograms or a histogram and a scalar")
new = deepcopy(self)
# Simple change of unit in this case. Other can't be Quantity or Unit
new_unit = None
new_w2unit = None
if new.unit is not None:
new_unit = (1/new.unit).unit
new_w2unit = new_unit**2
# Temporarily disable unit
new.to(None, update = False, copy = False)
# Error propagtion of f = b/A (where b is constant, no error) is:
# f_err^2 = f^2 (A_err/A)^2
if new._sumw2 is not None:
new._sumw2._contents = (new._sumw2.full_contents * other*other /
np.power(new.full_contents, 4))
new._contents = other/new.full_contents
# Set units
new.to(new_unit, update = False, copy = False)
return new
def __iadd__(self, other):
return self._ioperation(other, operator.iadd)
def __add__(self, other):
return self._operation(other, operator.add)
def __radd__(self, other):
return self + other
def __neg__(self):
new = deepcopy(self)
new._contents = -1*new.full_contents
# No change to sumw2 or units
return new
def __isub__(self, other):
return self._ioperation(other, operator.isub)
def __sub__(self, other):
return self._operation(other, operator.sub)
def __rsub__(self, other):
return -self + other
def _comparison_operator(self, other, operation):
if not self._getitem_units:
other = self._strip_units(other)
return operation(self.contents, other)
def __lt__(self, other):
return self._comparison_operator(other, operator.lt)
def __le__(self, other):
return self._comparison_operator(other, operator.le)
def __gt__(self, other):
return self._comparison_operator(other, operator.gt)
def __ge__(self, other):
return self._comparison_operator(other, operator.ge)
[docs] def expand_dims(self, *args, **kwargs):
"""
Same as h.axes.expand_dims().
"""
return self._axes.expand_dims(*args, **kwargs)
[docs] def broadcast(self, *args, **kwargs):
"""
Same as h.axes.broadcast().
"""
return self._axes.broadcast(*args, **kwargs)
[docs] def expand_dict(self, *args, **kwargs):
"""
Same as h.axes.expand_dict().
"""
return self._axes.expand_dict(*args, **kwargs)
[docs] def rebin(self, *ngroup):
"""
Rebin the histogram by grouping multiple bins into a single one.
Args:
ngroup (int or array): Number of bins that will be combined for each
bin of the output. Can be a single number or a different number
per axis. A number <0 indicates that the bins will start to be
combined starting from the last one.
Return:
Histogram
"""
# === Contents ===
# New number of bins.
ngroup = np.squeeze(ngroup)
ngroup = np.broadcast_to(np.abs(ngroup), (self.ndim))
ngroup_sign = np.sign(ngroup)
new_nbins = np.floor(self.axes.nbins / ngroup).astype(int)
if any(ngroup == 0):
raise ValueError("ngroup cannot be 0")
# Add padding do under/overflow bins match ngroup including "leftover bins"
padding = [(ngroup_i - 1,
ngroup_i - (1 + nbins_i - new_nbins_i * ngroup_i))
for ngroup_i, nbins_i, new_nbins_i
in zip(ngroup, self.axes.nbins, new_nbins)]
# Rever direction if needed
padding = [pad[::direction]
for pad,direction
in zip(padding,ngroup_sign)]
new_contents = np.pad(self.full_contents, padding)
# Sum every ngroup elements by reshaping first
new_shape = np.empty(2*self.ndim, dtype = int)
new_shape[0::2] = new_nbins+2
new_shape[1::2] = ngroup
new_shape = tuple(new_shape)
sum_axis = tuple(np.arange(1,2*self.ndim,2))
new_contents = np.sum(new_contents.reshape(new_shape), axis = sum_axis)
# === Adjust edges ===
new_axes = []
for axis,ngroup_i,ngroup_sign_i in zip(self._axes, ngroup, ngroup_sign):
# Very ngroup-th edge
new_edges = axis.edges[::ngroup_i * ngroup_sign_i][::ngroup_sign_i]
# New axis keeping properties
new_axes += [Axis(new_edges,
label = axis.label,
scale = axis.axis_scale)]
# === Sum weights square ===
new_sumw2 = None
if self._sumw2 is not None:
new_sumw2 = self._sumw2.rebin(ngroup * ngroup_sign)
# === New histogram ===
return Histogram(new_axes, new_contents, new_sumw2, unit = self.unit)
[docs] def plot(self, ax = None,
errorbars = None,
colorbar = True,
label_axes = True,
**kwargs):
"""
Quick plot of the histogram contents.
Under/overflow bins are not included. Only 1D and 2D histograms
are supported.
Histogram with a HealpixAxis will automatically be plotted
as a map, passing all kwargs to mhealpy's HealpixMap.plot()
Args:
ax (matplotlib.axes): Axes on where to draw the histogram. A new
one will be created by default.
errorbars (bool or None): Include errorbar for 1D histograms. The default is to
plot them if sumw2 is available
colorbar (bool): Draw colorbar in 2D plots
label_axes (bool): Label plots axes. Histogram axes must be labeled.
**kwargs: Passed to `matplotlib.errorbar()` (1D) or
`matplotlib.pcolormesh` (2D)
"""
# Matplotlib errorbar and pcolormesh need regular array
contents = self.contents
if self.is_sparse:
contents = self.contents.todense()
if self._getitem_units:
contents = self._strip_units(contents)
# Handle the special case of a healpix axis
if self.ndim == 1 and isinstance(self.axis, HealpixAxis):
# Pad in case it is partial map
contents = np.pad(contents,
(self.axis.lo_lim,
self.axis.npix - self.axis.hi_lim))
m = HealpixMap(data = contents, base = self.axis)
args = ()
if ax is not None:
args = (ax,)
plot, ax = m.plot(*args,
cbar = colorbar,
**kwargs)
return ax, plot
# Default errorbars
if errorbars is None:
if self.sumw2 is None:
errorbars = False
else:
errorbars = True
# Create axes if needed (with labels)
if ax is None:
fig,ax = plt.subplots()
# Plot, depending on number of dimensions
if self.ndim == 1:
# We have two points per bin (lower edge+center), and 2 extra
# point for under/overflow (these currently don't have errorbar,
# they looked bad)
if self.axis.unit is None:
xdata = np.empty(2*self.nbins + 2)
else:
xdata = np.empty(2*self.nbins + 2)*self.axis.unit
xdata[0] = self.axis.edges[0] # For underflow, first edge
xdata[1::2] = self.axis.edges # In between edges. Last edge for overflow
xdata[2::2] = self.axis.centers # For markers
underflow = self[-1]
overflow = self[self.nbins]
if self._getitem_units:
underflow = self._strip_units(underflow)
overflow = self._strip_units(overflow)
ydata = np.concatenate(([underflow],
np.repeat(contents, 2),
[overflow]))
# Style
drawstyle = kwargs.pop('drawstyle', 'steps-post')
# Error bars
yerr = None
if errorbars:
errors = self.bin_error.contents
if self.is_sparse:
# No auto densify
errors = errors.todense()
yerr = np.empty(2*self.nbins + 2)
yerr[2::2] = errors
yerr[0] = None # No underflow errorbar, looked bad
yerr[1::2] = None # No overflow errorbar, looked bad
# Plot
plot = ax.errorbar(u.Quantity(xdata).value,
ydata,
yerr = yerr,
drawstyle = drawstyle,
**kwargs)
# Label axes
if label_axes:
ax.set_xlabel(self.axis.label_with_unit)
if self.unit not in [None, u.dimensionless_unscaled]:
ax.set_ylabel(f"[{self.unit}]")
elif self.ndim == 2:
# No under/overflow
plot = ax.pcolormesh(u.Quantity(self.axes[0].edges).value,
u.Quantity(self.axes[1].edges).value,
np.transpose(contents),
**kwargs)
if label_axes:
ax.set_xlabel(self.axes[0].label_with_unit)
ax.set_ylabel(self.axes[1].label_with_unit)
if colorbar:
cax = ax.get_figure().colorbar(plot, ax = ax)
if self.unit not in [None, u.dimensionless_unscaled]:
cax.set_label(f"[{self.unit}]")
else:
raise ValueError("Plotting only available for 1D and 2D histograms")
if self.axes[0].axis_scale == 'log':
ax.set_xscale('log')
if self.ndim > 1:
if self.axes[1].axis_scale == 'log':
ax.set_yscale('log')
return ax,plot
draw = plot
[docs] def fit(self, f, lo_lim = None, hi_lim = None, **kwargs):
"""
Fit histogram data using least squares.
This is a convenient call to scipy.optimize.curve_fit. Sigma corresponds
to the output of `h.bin_error`. Empty bins (e.g. error equals 0) are
ignored
Args:
f (callable): Function f(x),... that takes the independent variable
x as first argument, and followed by the parameters to be fitted.
For a k-dimensional histogram is should handle arrays of shape
(k,) or (k,N).
lo_lim (float or array): Low axis limit to fit. One value per axis.
lo_lim (float or array): High axis limit to fit. One value per axis.
**kwargs: Passed to scipy.optimize.curve_fit
"""
# Sanity checks
for axis,lo,hi in zip(self.axes,
np.broadcast_to(lo_lim, self.ndim),
np.broadcast_to(hi_lim, self.ndim)):
if ((lo is not None and lo < axis.lo_lim) or
(hi is not None and hi >= axis.hi_lim)):
raise ValueError("Fit limits out of bounds")
# Get bins that correspond to the fit limits
lim_bins = tuple([slice(None if lo is None else axis.find_bin(lo),
None if hi is None else axis.find_bin(hi))
for axis,lo,hi
in zip(self.axes,
np.broadcast_to(lo_lim, self.ndim),
np.broadcast_to(hi_lim, self.ndim))])
# Get data to fit
x = [axis.centers[bins] for axis,bins in zip(self.axes,lim_bins)]
x = np.meshgrid(*x, indexing='ij') # For multi-dimensional histograms
y = self[lim_bins]
sigma = self.bin_error[lim_bins]
if self._getitem_units:
y = y.value
if self.is_sparse:
y = y.todense()
sigma = sigma.todense()
# Ignore empty bins
non_empty = sigma != 0
x = [centers[non_empty] for centers in x]
y = y[non_empty]
sigma = sigma[non_empty]
# Flat matrices
if self.ndim > 1:
x = [centers.flatten() for centers in x]
y = y.flatten()
sigma = sigma.flatten()
else:
x = x[0]
# Sanity checks
if len(x) < len(signature(f).parameters)-1:
raise RuntimeError("Less bins within limits than parameters to fit.")
# Actual fit with scipy
return curve_fit(f, x, y, sigma = sigma, **kwargs)
[docs] def write(self, filename, name = "hist", overwrite = False):
"""
Write histogram to disk.
It will be save as a group in a HDF5 file. Appended if the file already
exists.
Args:
filename (str): Path to file
name (str): Name of group to save histogram (can be any HDF5 path)
overwrite (str): Delete and overwrite group if already exists.
"""
with h5.File(filename, 'a') as f:
# Will fail on existing group by default
if name in f:
if overwrite:
del f[name]
else:
raise ValueError("Unable to write histogram. Another group "
"with the same name already exists. Choose "
"a different name or use overwrite")
# Contents
hist_group = f.create_group(name)
if self.unit is not None:
hist_group.attrs['unit'] = str(self.unit)
if self.is_sparse:
hist_group.attrs['format'] = 'coo'
contents_group = hist_group.create_group('contents')
contents = self._contents.asformat('coo')
contents_group.create_dataset('coords',
data = contents.coords,
compression = "gzip")
contents_group.create_dataset('data',
data = contents.data,
compression = "gzip")
contents_group.create_dataset('shape', data = contents.shape)
contents_group.create_dataset('fill_value', data = contents.fill_value)
if self._sumw2 is not None:
sumw2_group = hist_group.create_group('sumw2')
sumw2_contents = self._sumw2._contents.asformat('coo')
sumw2_group.create_dataset('coords',
data = sumw2_contents.coords,
compression = "gzip")
sumw2_group.create_dataset('data',
data = sumw2_contents.data,
compression ="gzip")
sumw2_group.create_dataset('shape',
data = sumw2_contents.shape)
sumw2_group.create_dataset('fill_value',
data = sumw2_contents.fill_value)
else:
hist_group.attrs['format'] = 'dense'
hist_group.create_dataset('contents', data = self._contents)
if self._sumw2 is not None:
hist_group.create_dataset('sumw2', data = self._sumw2._contents)
# Axes. Each one is a data set with attributes
axes_group = hist_group.create_group('axes', track_order = True)
for i,axis in enumerate(self.axes):
axis._write(axes_group, str(i))
[docs] @classmethod
def open(cls, filename, name = 'hist'):
"""
Read histogram from disk.
Args:
filename (str): Path to file
name (str): Name of group where the histogram was saved.
"""
with h5.File(filename, 'r') as f:
hist_group = f[name]
unit = None
if 'unit' in hist_group.attrs:
unit = u.Unit(hist_group.attrs['unit'])
# Contents
# Backwards compatible before sparse was supported
if ('format' not in hist_group.attrs or
hist_group.attrs['format'] == 'dense'):
contents = np.array(hist_group['contents'])
sumw2 = None
if 'sumw2' in hist_group:
sumw2 = np.array(hist_group['sumw2'])
elif hist_group.attrs['format'] == 'gcxs':
contents_group = hist_group['contents']
compressed_axes = None
if 'compressed_axes' in contents_group:
compressed_axes = np.array(contents_group['compressed_axes'])
contents = GCXS((np.array(contents_group['data']),
np.array(contents_group['indices']),
np.array(contents_group['indptr'])),
compressed_axes = compressed_axes,
shape = tuple(contents_group['shape']),
fill_value = np.array(contents_group['fill_value']).item())
sumw2 = None
if 'sumw2' in hist_group:
sumw2_group = hist_group['sumw2']
compressed_axes = None
if 'compressed_axes' in sumw2_group:
compressed_axes = np.array(sumw2_group['compressed_axes'])
sumw2 = GCXS((np.array(sumw2_group['data']),
np.array(sumw2_group['indices']),
np.array(sumw2_group['indptr'])),
compressed_axes = compressed_axes,
shape = tuple(sumw2_group['shape']),
fill_value = np.array(sumw2_group['fill_value']).item())
elif hist_group.attrs['format'] == 'coo':
contents_group = hist_group['contents']
contents = COO(coords = np.array(contents_group['coords']),
data = np.array(contents_group['data']),
shape = tuple(contents_group['shape']),
fill_value = np.array(contents_group['fill_value']).item())
sumw2 = None
if 'sumw2' in hist_group:
sumw2_group = hist_group['sumw2']
sumw2 = COO(coords = np.array(sumw2_group['coords']),
data = np.array(sumw2_group['data']),
shape = tuple(sumw2_group['shape']),
fill_value = np.array(sumw2_group['fill_value']).item())
else:
raise IOError(f"Format {hist_group.attrs['format']} unknown.")
# Axes
axes_group = hist_group['axes']
axes = []
for axis in axes_group.values():
# Get class. Backwards compatible with version
# with only Axis
axis_cls = Axis
if '__class__' in axis.attrs:
class_module, class_name = axis.attrs['__class__']
axis_cls = getattr(sys.modules[class_module], class_name)
axes += [axis_cls._open(axis)]
return Histogram(axes, contents = contents, sumw2 = sumw2, unit = unit)