import logging
logger = logging.getLogger(__name__)
from enum import Enum
import operator
from copy import deepcopy
import numpy as np
from sparse import DOK, COO, GCXS, SparseArray
# This is an ugly way to get the value
# of AUTO_DENSIFY. There doesn't seem to be
# any public-facing API to get it.
import sparse
from packaging.version import Version
try:
sparse_version = Version(sparse.__version__)
except TypeError:
# work when sparse is a mocked package when building sphinx documentation
sparse_version = Version("0")
if sparse_version >= Version('0.16.0'):
import sparse.numba_backend._settings as sps
else:
import sparse._settings as sps
import astropy.units as u
import datetime
from .axes import Axes
from .axis import Axis
from .time_axis import TimeAxis
from .time_delta_axis import TimeDeltaAxis
from astropy.time import Time, TimeDelta
from .feature import COPY_IF_NEEDED
# Invariants that hold for Histogram objects:
#
# At the beginning of each call to a method,
# if Histogram is sparse, it is in COO format
#
# If a Histogram has sparse contents and has sumw2,
# its sumw2 is also sparse.
[docs]
class Histogram(object):
"""This is a wrapper of a numpy array with axes and a fill
method. Sparse array from pydata's sparse package are also
supported.
Like an array, the histogram can have an arbitrary number of
dimensions.
Standard numpy array indexing is supported to get the contents
--i.e. :code:`h[:]`, :code:`h[4]`, :code:`h[[1,3,4]]`,
:code:`h[:,5:50:2]]`, etc.--. However, the meaning of the
:code:`-1` index is different. Instead of counting from the end,
:code:`-1` corresponds to the underflow bin. Similarly, an index
equal to the number of bins corresponds to the overflow bin.
You can however give relative position with respect to
:code:`h.end` --e.g. :code:`h[0:h.end]` result in all regular
bins, :code:`h[-1:h.end+1]` includes also the underflow/overflow
bins and :code:`h[h.end]` gives you the contents of the overflow
bin. The convenient aliases :code:`h.uf = -1`, :code:`h.of =
e.end` and :code:`h.all = slice(-1,h.end+1)` --or
:code:`slice(0,h.end)` if the under/overflow are not tracked-- are
provided.
You can also use an :code:`Ellipsis` object (:code:`...`) at the
end to specify that the contents from the rest of the dimension
are to have the under and overflow bins included. e.g. for a 3D
histogram :code:`h[1,-1:h.end+1,-1:h.end+1] =
h[1,...]`. :code:`h[:]` returns all contents without
under/overflow bins and h[...] returns everything, including those
special bins.
If no initial contents are provided, axes do *not* track
under/overflow by default. If contents are provided, an axis
tracks under/overflow by default if the provided contents has two
more bins in that dimension than axis.nbins. You can specify that
certain axes will/will not track under/overflow with the
:code:`track_overflow` keyword. Note that attempting to access an
underflow/overflow bin on an axis that is not tracked will result
in an IndexError.
If :code:`sumw2` is not :code:`None`, then the histogram will keep
track of the sum of the weights squared. You should use this
feature if you are using weighted data and are concerned about
error propagation. You can access the sum of squared wieghts with
:code:`h.sumw2[item]`, where `item` is interpreted the same way as
in :code:`h[item]`. :code:`h.bin_error[item]` return the
:code:`sqrt(sumw2)` (or :code:`sqrt(contents)` is :code:`sumw2 was
not specified`).
The binary operators :code:`+`, :code:`-`, :code:`*` and :code:`/`
are supported for Histograms and correctly propagate error if
sumw2 is present. The other operand can be a Histogram, a scalar
or an array of appropiate size. Note that :code:`h += h0` is more
efficient than :code:`h = h + h0` since latter involves the
instantiation of a new histogram. Unary negation of a Histogram is
supported, as is dividing a scalar by a Histogram.
Args:
edges (Axes, Axis or array): Definition of bin edges, as anything
that can be processes by Axes. Lower edge value is
included in the bin, upper edge value is excluded.
contents (array-like or SparseArray): Initialization of
histogram contents. May include overflow/underflow bins if
overflow is being tracked; if tracking is enabled and
contents does not have these bins, they will be
initialized to zeros. If omitted, creates an array of
zeros.
sumw2 (None, bool or array): If not None, the histogram will
maintain squared weights associated with the elements of
contents. These weights are initially zero if sumw2 =
True but may instead be initialized explicitly with an
array. Arithmetic between two histograms with squared
weights propagates these weights to the result according
to propagation-of-error theory.
labels (array of str): Optionally label the axes for easier indexing
axis_scale (str or array): Bin center mode e.g. `"linear"` or
`"log"`. See ``Axis.axis_scale``.
sparse (bool): indicate if contents and sumw2 should be
maintained as dense or sparse arrays. If specified,
contents and sumw2 will be converted to the specified
sparsity if needed (but attempting to densify a sparse
matrix will fail to avoid unexpected memory blowups). If
not specified, the Histogram's sparsity follows that of
the provided contents, or is dense if no contents are
provided.
unit (Unit-like): unit of contents; if not specified, inferred
from contents if u.Quantity or None otherwise
track_overflow (bool, array-like, or dict): Whether to
allocate space to track the underflow and overflow
bins. Acceptable forms include
- a single boolean value (applies to all axes)
- a 1-D array-like with a boolean value per axis
- a dictionary specifying boolean values for a set
of named/numbered axes. For axes not in the dict,
the default is True.
If this parameter is not provided, the default behavior
depends on the value of the `contents` argument.
- if `contents` is not provided, overflow is not tracked
on any axis.
- if `contents` is provided, each axis tracks overflow
if contents includes overflow/underflow bins for that
axis, i.e., if its size along that axis is two more
than the axis' number of bins.
dtype: Numpy datatype or None type of contents array; if None,
use type of provided contents, or default (float64) if
none provided.
copy_contents (bool): if True (default), numpy arrays or
Quantity arrays passed as contents and sumw2 will *not* be
copied unless necessary; hence, the Histogram's memory may
alias these values.
"""
def __init__(self, edges, contents = None, sumw2 = None,
labels = None, axis_scale = None,
sparse = None, unit = None, track_overflow = None,
dtype = None, copy_contents = True):
from scipy.sparse import sparray, spmatrix
self._axes = Axes(edges, labels = labels, axis_scale = axis_scale)
# set unit of Histogram, if any
if unit is None:
if isinstance(contents, u.Quantity):
self._unit = contents.unit # derive unit from contents
else:
self._unit = None
else:
self._unit = u.Unit(unit)
# Standardize contents (with under/overflow) or initialize them to zero.
# Track operations that might make a copy, so we know if we still need
# to copy the contents array at the end. We don't want an unconditional
# copy because contents might be very large.
if contents is not None:
is_copied = False
# convert Quantity to bare value, after adjusting it to
# histogram's unit (if any).
if isinstance(contents, u.Quantity):
cnew = contents.to_value(self._unit)
if cnew is not contents.value:
is_copied = True
contents = cnew
# convert SciPy arrays to COO
if isinstance(contents, (sparray, spmatrix)):
is_copied = True
contents = COO.from_scipy_sparse(contents)
# make sure contents is a dense or sparse array
if not isinstance(contents, (np.ndarray, SparseArray)):
is_copied = True
contents = np.asarray(contents, dtype=dtype)
contents = np.atleast_1d(contents)
if contents.ndim != self._axes.ndim:
raise ValueError(f"cannot use contents of dimension {contents.ndim} for "
f"Histogram of dimension {self._axes.ndim}")
if isinstance(contents, (GCXS, DOK)):
# sparse arrays are initially stored as COO. Slicing
# does not work well with GCXS, so we use COO instead;
# See https://github.com/pydata/sparse/issues/550.
is_copied = True
contents = contents.asformat('coo')
if dtype is not None:
# make sure contents' dtype matches given dtype
is_copied |= (contents.dtype != dtype)
contents = contents.astype(dtype, copy=False)
if sparse is not None:
if sparse and not isinstance(contents, SparseArray):
is_copied = True
contents = COO.from_numpy(contents)
elif not sparse and isinstance(contents, SparseArray):
if not sps.AUTO_DENSIFY:
raise RuntimeError("Cannot convert sparse contents to dense automatically. "
"To manually densify, use the todense method.")
is_copied = True
contents = contents.todense()
if track_overflow is None:
# by default, overflow tracking follows contents shape
track_overflow = (contents.shape == self._axes.nbins + 2)
# convert track_overflow argument to standard format
self._track_overflow = self._standardize_track_overflow(track_overflow)
# determine if contents must be padded to match overflow tracking settings
padding = self._get_padding_for_overflow(self._track_overflow, self._axes.nbins, contents.shape)
self._contents = self._pad(contents, padding)
if self._contents is not contents:
is_copied = True
if copy_contents and not is_copied:
self._contents = self._contents.copy()
else:
if track_overflow is None:
# by default, overflow tracking is *off* for empty contents
track_overflow = False
# convert track_overflow argument to standard format
self._track_overflow = self._standardize_track_overflow(track_overflow)
# initialize contents to array of all zeros
contents_shape = tuple( self._axes.nbins + 2 * self._track_overflow )
if sparse:
self._contents = DOK(shape = contents_shape,
dtype = dtype,
fill_value = 0).asformat('coo')
else:
self._contents = np.zeros(contents_shape, dtype=dtype)
self._init_specials()
# create sumw2 histogram if requested
self.set_sumw2(sumw2, copy = copy_contents)
def _init_specials(self):
"""
Create objects for special access methods on Histogram's contents
"""
self.bin_error = self._get_bin_error(self)
self.slice = self._slice(self)
[docs]
def set_sumw2(self, sumw2, copy = True):
"""
Set the sumw2 matrix to a Histogram to track the sum of error
weights. If not None/False, sumw2 must be either an array-like
with the same shape as contents or a Histogram with the same axes
as contents. It will be coerced to have the same units,
sparsity, dtype, and overflow tracking as the base Histogram.
Args:
sumw2
values for weights: True for all zeros, or a Histogram, or an
array-like; None or False to remove any existing sumw2
copy: bool
if True (default), copy the object passed as sumw2; otherwise,
create a view into the object if possible.
"""
if sumw2 is None or sumw2 is False:
self._sumw2 = None
else:
# derive unit of sumw2 from that of main histogram
if self._unit is not None:
w2unit = self._unit**2
else:
w2unit = None
if isinstance(sumw2, Histogram):
if sumw2._axes != self._axes:
raise ValueError("Histogram sumw2 must have same axes as contents")
is_copied = False
# match dtype of sumw2 to that of contents
is_copied |= (sumw2._contents.dtype != self._contents.dtype)
sumw2 = sumw2.astype(self._contents.dtype, copy=False)
# match sparsity of sumw2 to that of contents
if not sumw2.is_sparse and self.is_sparse:
is_copied = True
sumw2 = sumw2.to_sparse()
elif sumw2.is_sparse and not self.is_sparse:
if not sps.AUTO_DENSIFY:
raise RuntimeError("Cannot convert a sparse histogram to dense automatically. "
"To manually densify, use the todense method.")
is_copied = True
sumw2 = sumw2.to_dense()
# match overflow tracking of sumw2 to that of contents
if not np.array_equal(sumw2._track_overflow, self._track_overflow):
is_copied = True
sumw2.track_overflow(self._track_overflow)
# adjust unit of sumw2 and copy it if needed
self._sumw2 = sumw2.to(w2unit, copy = copy and not is_copied)
else:
sumw2_contents = None if sumw2 is True else sumw2
# for zero init, bare value, or Quantity, let __init__ do coercion of contents
self._sumw2 = Histogram(self._axes,
contents = sumw2_contents,
dtype = self._contents.dtype,
sparse = self.is_sparse,
unit = w2unit,
track_overflow = self._track_overflow,
copy_contents = copy)
[docs]
def copy(self):
"""
Make a deep copy of a Histogram. The copy shares no
*writable* members with the original; the only shared
members are those that will never be mutated.
This function preserves subclass types if called from a
derived class. Subclasses with additional data members
may override this function; if they do not, their
data members will be deepcopied.
"""
# short-circuit if we are in the middle of _replace
if hasattr(self, '_new_instance'):
return self._new_instance
else:
return self._replace() # no replacements
def __deepcopy__(self, memo):
"""
Hook for deepcopy()
"""
self._memo = memo # cache memo dict in case we need it
new = self._replace() # no replacements
del self._memo
return new
# all recognized data members of Histogram.
# CHANGE THIS and the _replace() function if
# we add or remove data members.
_data_members = frozenset([
'_axes',
'_contents',
'_sumw2',
'_unit',
'_track_overflow',
])
def _replace(self, **kwargs):
"""
Make a deep copy of a Histogram as defined for copy(), but
optionally specify one or more key-value pairs naming a data
member of Histogram and a value to use for it in the new
object instead of performing the default copy operation.
"""
cls = self.__class__
new = cls.__new__(cls)
# set any members specified by kwargs
for member in kwargs:
assert member in Histogram._data_members, \
f"Histogram has no data member {member}"
setattr(new, member, kwargs[member])
if not hasattr(new, '_axes'):
new._axes = self._axes.copy()
if not hasattr(new, '_contents'):
new._contents = self._contents.copy()
if not hasattr(new, '_unit'):
new._unit = self._unit # no need to copy unit
if not hasattr(new, '_track_overflow'):
new._track_overflow = self._track_overflow # no need to copy track_overflow
new._init_specials()
if not hasattr(new, '_sumw2'):
if self._sumw2 is not None:
new._sumw2 = self._sumw2.copy()
else:
new._sumw2 = None
if cls != Histogram:
if cls.copy == Histogram.copy: # copy not overridden
self_dict = vars(self)
new_dict = vars(new)
# if we were called from __deepcopy__(), pass along
# the supplied memo object to recursive deepcopy calls.
kwargs = {}
if hasattr(self, '_memo'):
kwargs['memo'] = self._memo
# don't copy the temporary _memo field recursively
for member in self_dict.keys() - new_dict.keys() - { '_memo' }:
setattr(new, member, deepcopy(self_dict[member], **kwargs))
else:
self._new_instance = new # copy() will return this
self.copy()
del self._new_instance
return new
[docs]
def astype(self, dtype, copy=True):
"""
Cast the contents and, if present, the sumw2 of a Histogram to
a different data type. If the new type differs from the old
type, we always return a copy; otherwise, we return a copy
if copy=True or the original if copy=False.
"""
if self._contents.dtype == dtype and not copy:
return self
else:
new_contents = self._contents.astype(dtype) # makes a copy
if self._sumw2 is not None:
new_sumw2 = self._sumw2.astype(dtype) # makes a copy
else:
new_sumw2 = None
return self._replace(_contents=new_contents,
_sumw2=new_sumw2)
@staticmethod
def _compute_padding(needs_padding):
"""
Given a Boolean array indicating whether overflow padding is
needed in each dimension of an array, compute an argument to
np.pad that adds padding where needed.
Args:
needs_padding: np.ndarray of bool [axes.ndims]
ith entry is True iff ith dimension needs padding
Returns:
np.ndarray *view* of size |needs_padding| x 2 with
padding values for each dimension of the array.
NB: return value is not writable!
"""
padlen = needs_padding.astype(int) # 1 if needed, 0 if not
return np.broadcast_to(padlen[:,None], (needs_padding.size, 2)) # ints -> pairs
@staticmethod
def _get_padding_for_overflow(track_overflow, hshape, oshape,
mask=None, check_sanity=True,
first_dim = 0):
"""
Compute the padding necessary to convert an array of shape oshape to
match the contents shape of a Histogram with shape hshape and
specified track_overflow settings.
Args:
track_overflow: np.ndarray of bool
Should overflow be tracked for each dimension of this Histogram?
hshape: array-like of int
Shape of Histogram, not including overflow/underflow bins
oshape: array-like of int
Shape of array being padded to match Histogram
mask: np.ndarray (optional)
If not None, mask of length == hshape indicating which
dimensions should be checked for padding. Unchecked dimensions
always receive zero padding.
check_sanity: bool
If True, make sure each dimension of oshape either conforms to
hshape given its tracking requirement or can be padded to conform.
Raise an exception if this test fails.
first_dim: int
The dimension of the Histogram corresponding to the first entry
in track_overflow (used only for error printing)
Returns:
np.ndarray *view* of size |needs_padding| x 2 with
padding values for each dimension of the array.
NB: return value is not writable!
"""
hshape = np.asarray(hshape) # make sure at least one of oshape, hshape is array
if check_sanity:
# First, check that oshape can be transformed to hshape via padding
invalid_padding = track_overflow & ~np.isin(oshape - hshape, (0,2))
invalid_nopadding = ~track_overflow & ~(oshape == hshape)
if mask is not None:
invalid_padding &= mask
invalid_nopadding &= mask
if np.any(invalid_padding):
bad_axes = np.nonzero(invalid_padding)[0]
first_bad = bad_axes[0]
hlen = hshape[first_bad]
olen = oshape[first_bad]
raise ValueError(f"Array axis {first_dim + first_bad} has size {olen}; "
f"must be {hlen} or {hlen+2} to use with track_overflow = True")
if np.any(invalid_nopadding):
bad_axes = np.nonzero(invalid_nopadding)[0]
first_bad = bad_axes[0]
hlen = hshape[first_bad]
olen = oshape[first_bad]
raise ValueError(f"Array axis {first_dim + first_bad} has size {olen}; "
f"must be {hlen} to use with track_overflow = False")
# Next, identify which dimensions need to be padded
needs_padding = track_overflow & (oshape == hshape)
if mask is not None:
needs_padding &= mask
# Finally, return the padding argument to np.pad
return Histogram._compute_padding(needs_padding)
@staticmethod
def _pad(arr, padding, value=0):
"""
Pad array arr as needed with value.
Args:
arr -- np.ndarray
padding -- array-like of size [arr.ndim x 2]
giving padding values for each dim of arr
value -- value to use for padding cells (default 0)
Returns:
padded array, which may be the original (if no padding is
needed) or a copy
"""
padding = np.asarray(padding)
if not padding.any():
return arr
else:
return np.pad(arr, padding, constant_values=value)
def _standardize_track_overflow(self, track_overflow):
"""
Convert any valid form of track_overflow to an array of
Boolean values per axis.
Args:
track_overflow (bool, array-like, or dict): Whether to
allocate space to track the underflow and overflow
bins. Acceptable forms include
- a single boolean value (applies to all axes)
- a 1-D array-like with a boolean value per axis
- a dictionary specifying boolean values for a set
of named/numbered axes. For axes not in the dict,
the default is False.
"""
if isinstance(track_overflow, (bool, np.bool_)):
track_overflow = np.full(self.ndim, track_overflow)
elif isinstance(track_overflow, (np.ndarray, list, tuple)):
track_overflow = np.atleast_1d(track_overflow)
if track_overflow.size != self.ndim:
raise ValueError("track_overflow size doesn't match the number of dimensions")
elif isinstance(track_overflow, dict):
track_overflow = np.asarray(self._axes.expand_dict(track_overflow, default = False))
else:
raise TypeError("Track overflow can only be bool, array, or dictionary.")
return track_overflow
def _match_track_overflow(self, old_track_overflow, new_track_overflow):
"""
Compute slices and padding needed to go from old to new overflow settings.
Args:
old_track_overflow -- np.ndarray of bool
old track_overflow settings
new_track_overflow -- np.ndarray of bool
new track_overflow settings
Returns:
tuple (slices, padding)
slices -- tuple of slices to reduce each dimension from
which overflow is removed
padding -- np.ndarray *view* with padding values for
each dimension
NB: padding is not writable!
"""
needs_padding = ~old_track_overflow & new_track_overflow
padding = self._compute_padding(needs_padding)
needs_slicing = old_track_overflow & ~new_track_overflow
slices = tuple( slice(1,-1) if b else slice(None) for b in needs_slicing )
return slices, padding
def _update_track_overflow(self, new_track_overflow):
"""
Update the contents and sumw2 of this Histogram to reflect
a specified set of overflow tracking settings.
Args:
new_track_overflow -- bool, array-like, or dict
specifying track-overflow settings per dimension
"""
new_track_overflow = self._standardize_track_overflow(new_track_overflow)
slices, padding = self._match_track_overflow(self._track_overflow,
new_track_overflow)
self._track_overflow = new_track_overflow
self._contents = self._pad(self._contents[slices], padding)
# if we are possibly no longer padding a dense array that was
# padded, make sure array is contiguous in memory after any slicing
if not self.is_sparse and not padding.any():
self._contents = np.ascontiguousarray(self._contents)
if self._sumw2 is not None:
self._sumw2._update_track_overflow(new_track_overflow)
[docs]
def track_overflow(self, track_overflow = None):
"""
Obtain an array specifying whether each axis is tracking underflows and overflows.
If input is not None, adjust the track_overflow settings to those provided.
Args:
track_overflow (bool, array-like, or dict): Optional. New overflow tracking settings
Returns:
np.ndarray with *copy* of current overflow tracking settings
We return a copy to external callers because it is unsafe to modify a live
track_overflow array in place; any updates *must* be fed back to track_overflow
(internally, to _update_track_overflow) to take effect.
"""
if track_overflow is not None:
self._update_track_overflow(track_overflow)
# return a copy to avoid letting caller mutate the
# track_overflow array, which will not work.
return self._track_overflow.copy()
@property
def unit(self):
return self._unit
[docs]
def to(self, unit, equivalencies=[], update=True, copy=True):
"""
Convert a Histogram to a different unit.
Args:
unit (unit-like): Unit to convert to.
equivalencies (list or tuple): A list of equivalence pairs to try if the units are not
directly convertible.
update (bool): If ``update`` is ``False``, only the units will be changed without
updating the contents accordingly
copy (bool): If True (default), then the value is copied. Otherwise, a copy
will only be made if necessary.
"""
old_unit = self._unit
new_unit = None if unit is None else u.Unit(unit)
if copy:
new = self.copy()
else:
new = self
if update:
# Apply factor needed to convert to new unit to contents and sumw2
if old_unit is None:
if new_unit is not None and new_unit != u.dimensionless_unscaled:
raise TypeError("Assigning unit to Histogram without units")
elif new_unit is not None:
factor = old_unit.to(new_unit, equivalencies = equivalencies)
if factor != 1.0:
new *= factor
# Update units
new._unit = new_unit
if new._sumw2 is not None:
new._sumw2._unit = None if new_unit is None else new_unit**2
return new
def _with_units(self, value):
"""
Add unit to value if histogram has one and value is dense,
creating a Quantity array.
Ideally, we would add a unit whether value is dense or sparse.
Unfortunately, SparseArray is not compatible with Quantity, so
if we want to return a sparse array value, we cannot set the
unit. We do not want to pay to densify a potentially large
matrix, so we just return the raw value.
"""
if self._unit is not None and not isinstance(value, SparseArray):
# prevent copy when adding unit
return u.Quantity(value, unit=self._unit, copy=COPY_IF_NEEDED)
else:
return value
@property
def is_sparse(self):
"""
Return True if the underlying histogram contents array is sparse,
or False if dense.
"""
return isinstance(self._contents, SparseArray)
[docs]
def to_dense(self, copy=True):
"""
Return a dense copy of a histogram
copy : if true, always copy whether conversion is needed or
not; otherwise, copy only if converting
"""
if not self.is_sparse:
h_dense = self.copy() if copy else self
else:
new_contents = self._contents.todense()
if self._sumw2 is not None:
new_sumw2 = self._sumw2.to_dense(copy=True)
else:
new_sumw2 = None
h_dense = self._replace(_contents = new_contents,
_sumw2 = new_sumw2)
return h_dense
# alias pydata sparse style
todense = to_dense
[docs]
def to_sparse(self, copy=True):
"""
Return a sparse copy of a histogram.
copy : if true, always copy whether conversion is needed or
not; otherwise, copy only if converting
"""
if self.is_sparse:
h_sparse = self.copy() if copy else self
else:
new_contents = COO.from_numpy(self._contents)
if self._sumw2 is not None:
new_sumw2 = self._sumw2.to_sparse(copy=True)
else:
new_sumw2 = None
h_sparse = self._replace(_contents = new_contents,
_sumw2 = new_sumw2)
return h_sparse
# pydata sparse style
tosparse = to_sparse
@property
def sumw2(self):
return self._sumw2
@property
def ndim(self):
return self._axes.ndim
def __eq__(self, other):
"""
Check whether two Histograms are equal. Sparse
and dense histograms with the same axes, unit, and
contents/sumw2, including under/overflow bins if
present, are considered equal.
"""
return (self._axes == other._axes
and
np.array_equal(self._track_overflow, other._track_overflow)
and
self._unit == other._unit
and
np.all(self._contents == other._contents) # sparse does not support array_equal
and
self._sumw2 == other._sumw2)
def __array__(self, dtype=None, copy=None):
"""
Return a view or copy of our contents as a dense ndarray.
If histogram has units, return a Quantity array to preserve
them.
"""
if self.is_sparse:
if not sps.AUTO_DENSIFY:
raise RuntimeError("Cannot convert a sparse histogram to dense automatically. "
"To manually densify, use the todense method.")
arr = self._get_contents().todense()
else:
arr = self._get_contents()
if dtype is not None and arr.dtype != dtype:
arr = arr.astype(dtype) # makes a copy
elif copy and not self.is_sparse: # todense copies
arr = arr.copy()
return self._with_units(arr)
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
if len(inputs) != 1 or inputs[0] is not self:
raise NotImplementedError(
"Only numpy functions with a single operand are supported "
"e.g. np.sin(h). Call h.contents explicitly to "
"get the corresponding array e.g. np.maximum(h.contents, a)")
# units are *not* dropped for dense contents (but are for sparse)
arr = self._with_units(self._get_contents())
return ufunc(arr, *inputs[1:], **kwargs)
def __array_function__(self, func, types, args, kwargs):
if len(args) != 1 or args[0] is not self:
raise NotImplementedError(
"Only numpy functions with a single operand are supported "
"e.g. np.sum(h). Call h.contents explicitly to "
"get the corresponding array e.g. np.dot(h.contents, a)")
# units are *not* dropped for dense contents (but are for sparse)
arr = self._with_units(self._get_contents())
return func(arr, *args[1:], **kwargs)
class _NBINS():
'''
Convenience class that will expand to the number of bins of a
given dimension.
The trick is to overload the -/+ operators so that h.end +/-
offset (h being an instance of Histogram and 'end' an static
instance of Histogram._NBINS) returns an instance of _NBINS
itself, which stores the offset. The [] operator can then
detect that the input is an instance of _NBINS and convert it
into an integer with respect to the size of the appropriate
axis.
'''
def __init__(self, offset = 0):
self.offset = offset
def __add__(self, offset):
return self.__class__(offset)
def __sub__(self, offset):
return self + (-offset)
end = _NBINS()
# Convenient aliases
uf = -1
of = end
class _ALL():
"""
Expands to slice(-1, end+1) if the axis tracks under/overflow,
or slice(0, end) otherwise
"""
all = _ALL()
def _prepare_indices(self, indices):
"""
Adjust a multi-dimensional index expression into the
contents of a Histogram to properly index ._contents.
"""
def _prepare_index(index, dim):
"""
Adjust an index expressed as a scalar value (integer or
'end' expression), a slice, or a multi-D array of scalar
indices to account for overflow padding. As a special
case, handle an index that is Boolean mask, which may
index one or more dimensions of the Histogram starting
with dim.
"""
overflow_offset = int(self._track_overflow[dim])
def _adjust_scalar_index(index):
"""
adjust a scalar index expression and make sure it
is valid for the Histogram's contents
"""
if isinstance(index, self._NBINS): # expand 'end' expression
index = self._axes[dim].nbins + index.offset
index += overflow_offset
if index < 0 or index > self._contents.shape[dim]:
raise IndexError("Bin index out of bounds")
return index
if isinstance(index, slice):
start = 0 if index.start is None else index.start
stop = self._axes[dim].nbins if index.stop is None else index.stop
index = slice(_adjust_scalar_index(start),
_adjust_scalar_index(stop),
index.step)
elif isinstance(index, self._ALL):
# convert 'all' to slice for whole of dim, including oflow/uflow
# (no need to check bounds)
index = slice(0, self._axes[dim].nbins + 2*overflow_offset)
elif isinstance(index, np.ndarray):
if index.dtype in (bool, np.bool_):
# Boolean mask array -- pad to match overflow
# settings for its dimension range
end_dim = dim + index.ndim
padding = self._get_padding_for_overflow(self._track_overflow[dim:end_dim],
self._axes.nbins[dim:end_dim],
index.shape, first_dim = dim)
index = self._pad(index, padding, value=False)
else:
# (possibly multi-D) array of scalar indices (ints or _NBINS)
adjindex = [_adjust_scalar_index(i) for i in index.ravel()]
index = np.reshape(adjindex, index.shape)
else: # scalar index (could be _NBINS)
index = _adjust_scalar_index(index)
return index
def normalize(index):
"""
If index is a slice or a scalar value, return it as-is.
If it is array-like, convert to a proper ndarray.
If none of the above, fail.
For each index we return, we also return the number of dimension
it refers to in the array (usually 1, but Boolean masks can refer
to several consecutive dimensions).
"""
if isinstance(index, (slice, int, np.integer, self._NBINS, self._ALL)):
return (index, 1)
elif isinstance(index, (np.ndarray, list, tuple, range)):
arr = np.asarray(index)
if arr.ndim > 0:
return (arr, arr.ndim if arr.dtype in (bool, np.bool_) else 1)
else:
return (arr.item(), 1)
elif index is Ellipsis:
raise IndexError("Ellipsis is supported only at end of index tuple")
else:
raise TypeError("Index can only be slice, scalar, or array-like")
if isinstance(indices, dict):
# expand dict of per-axis indices into tuple
indices = self._axes.expand_dict(indices, default = slice(None))
elif not isinstance(indices, tuple):
indices = (indices,)
# strip Ellipsis from end of index tuple; it is not supported elsewhere
has_ellipsis = (indices[-1] is Ellipsis)
if has_ellipsis:
indices = indices[:-1]
indices = tuple( normalize(i) for i in indices )
has_multi_dim = any( i[1] > 1 for i in indices ) # does any index cover multiple dims?
if not has_multi_dim:
# easy case -- each index describes exactly one dimension
adjusted_indices = tuple(_prepare_index(index, dim) for
dim, (index, _) in enumerate(indices))
nSpecifiedDims = len(indices)
else:
# hard case -- some mask index describes > 1 dimension
adjusted_indices = []
dim = 0
for index, ndim in indices:
adjusted_indices.append(_prepare_index(index, dim))
dim += ndim
adjusted_indices = tuple(adjusted_indices)
nSpecifiedDims = dim
track = self._track_overflow
if has_ellipsis:
# unspecified dimensions at end refer to all of .full_contents
rest_indices = tuple(slice(0, self._axes[dim].nbins + 2*int(track[dim]))
for dim in range(nSpecifiedDims, self.ndim))
else:
# unspecified dimensions at end refer to all of .contents
rest_indices = tuple(slice(int(track[dim]), self._axes[dim].nbins + int(track[dim]))
for dim in range(nSpecifiedDims, self.ndim))
return adjusted_indices + rest_indices
"""
Access methods
The following code defines two kinds of access methods: *internal*
methods, which should be used by the methods of the Histogram
class, and *external* methods, which should be used by users of
the class and by subclasses.
Both internal and external accessors understand the special
indexing rules associated with overflow tracking, as well as the
special values 'all' and 'end'. They also manipulate the format
of a sparse contents array to ensure that writes to it will
succeed.
External accessors also understand unit conformity and will return
a Quantity for dense histograms with associated units, whereas
internal accessors always return a bare value.
CAUTION: external users should *never* directly read or write a
Histogram's _contents array; doing so may yield unexpected results
depending on whether the Histogram is using overflow tracking and
may fail depending on its sparsity. Always use the external
accessors __getitem__ and __setitem__ to read or write subarrays
of a Histogram; they can be called by directly indexing the
histogram object, e.g., x = h[1:3] or h[3:5, 1:2] = 0. Use
.contents to obtain a view of the histogram's contents without
overflow/underflow bins, or .full_contents to obtain a view with
these extra bins.
Internal users should use the _get() and _set() methods to read
and write subarrays of the Histogram's contents if indices are to
be interpreted as for __getitem__ / __setitem__. These methods
work with bare values; *do not* use the external accessors (e.g.,
self[1:3]) unless the intent is to obtain/provide a Quantity
whenever the Histogram has units. Use _get_contents() to
obtain the bare equivalent of .contents, or use ._contents
directly for the bare equivalent of .full_contents.
Internal users that directly access ._contents instead of using
_get() and _set() are responsible for handling the impacts of
overflow tracking and for ensuring that sparse arrays are in the
correct form (DOK for writing, COO for everything else) if needed.
"""
def _set(self, indices, new_contents):
"""
Update a slice of a Histogram's contents. If the Histogram
is sparse, it is converted to DOK form to enable partial
updates in place.
"""
# Make it mutable if needed
if self.is_sparse:
self._contents = self._contents.asformat('dok')
indices = self._prepare_indices(indices)
try:
self._contents[indices] = new_contents
except (IndexError, ValueError) as e:
if self.is_sparse:
# Check if advanced indexing is being used
for index in indices:
if isinstance(index, np.ndarray):
logger.warning(
"Advanced indexing is not yet fully supported "
"for sparse arrays. See "
"https://github.com/pydata/sparse/issues/1 and "
"https://github.com/pydata/sparse/issues/114")
break
raise e
# Make it fast read-only
if self.is_sparse:
self._contents = self._contents.asformat('coo')
def __setitem__(self, indices, new_contents):
"""
Update a slice of a Histogram's contents. The
value used to update the contents must be unit-conformable
to the Histogram's units, if present.
"""
self._set(indices, self._strip_units(new_contents))
def _get(self, indices):
"""
Get a slice of a Histogram's contents as an array.
Returns a *view* into original contents. If the
histogram is sparse, it is converted to COO form.
"""
indices = self._prepare_indices(indices)
try:
return self._contents[indices]
except IndexError as e:
if self.is_sparse:
# Check if advanced indexing is being used
for index in indices:
if isinstance(index, np.ndarray):
logger.warning(
"Advanced indexing is not yet fully supported "
"for sparse arrays. See "
"https://github.com/pydata/sparse/issues/1 and "
"https://github.com/pydata/sparse/issues/114")
break
raise e
def _get_contents(self):
return self._get(slice(None))
def __getitem__(self, indices):
"""
Get a slice of a Histogram's contents as an array.
Returns a *view* into original contents. If the
Histogram has a unit *and* the contents array is dense,
return the slice as a Quantity.
"""
value = self._get(indices)
return self._with_units(value)
@property
def contents(self):
"""
Equivalent to :code:`h[:]`. Does not include under and overflow bins.
"""
return self[:]
@property
def full_contents(self):
"""
Equivalent to :code:`h[...]`. The size of each axis can be
nbins or nbins+2, depending on the track_overflow parameters
"""
return self[...]
class _special_getitem:
"""
This allows to use regular indexing for special access methods,
e.g., h.bin_error[]
"""
def __init__(self, hist):
self._hist = hist
def __getitem__(self, item):
raise NotImplementedError
def __array__(self):
return self.contents
@property
def contents(self):
return self[:]
@property
def full_contents(self):
return self[...]
class _get_bin_error(_special_getitem):
"""
Return the sqrt of sumw2
"""
def __getitem__(self, item):
if self._hist._sumw2 is not None:
result = np.sqrt(self._hist._sumw2._get(item))
else:
# fake it if no sumw2
result = np.sqrt(np.abs(self._hist._get(item)))
return self._hist._with_units(result)
def __setitem__(self, indices, new_bin_error):
if self._hist._sumw2 is None:
raise ValueError("Histogram does not have sumw2")
self._hist._sumw2[indices] = new_bin_error**2
@property
def shape(self):
"""
Tuple with length of each axis
"""
return self._axes.shape
@property
def axes(self):
"""
Underlying axes object
"""
return self._axes
@property
def axis(self):
"""
Equivalent to :code:`self.axes[0]`, but fails if :code:`ndim > 1`
"""
if self.ndim > 1:
raise ValueError("Property 'axis' can only be used with 1D "
"histograms. Use `axes` for multidimensional "
"histograms")
return self._axes[0]
@axis.setter
def axis(self, new):
self._axes[0] = new
if self._sumw2 is not None:
self._sumw2.axis = new
[docs]
def expand_dims(self, *args, **kwargs):
"""
Same as h.axes.expand_dims().
"""
return self._axes.expand_dims(*args, **kwargs)
[docs]
def broadcast(self, *args, **kwargs):
"""
Same as h.axes.broadcast().
"""
return self._axes.broadcast(*args, **kwargs)
[docs]
def expand_dict(self, *args, **kwargs):
"""
Same as h.axes.expand_dict().
"""
return self._axes.expand_dict(*args, **kwargs)
@property
def dtype(self):
return self._contents.dtype
@property
def nbins(self):
if self.ndim == 1:
return self._axes[0].nbins
else:
return self._axes.nbins
[docs]
def interp(self, *values, kind="linear"):
"""
Perform multilinear interpolation of one or more values
relative to the contents of this Histogram. The center of
histogram bin (i1,...,in) is assumed to have the value
h[i1,...,in] for purposes of interpolation.
The interpolation weights along each axis are controlled by
its scale. If the scale is 'linear' or 'symmetric',
interpolation is linear; if it is 'log', the interpolation is
log-linear. Hence, interpolating a value halfway between two
bin centers along a log-scale axis returns the *geometric*
mean of the centers.
Interpolation may be done using either the values in the
Histogram (default) or their logs using the 'kind'
parameter. If kind='log' is requested, the histogram's
contents should all be > 0 to avoid warnings or errors.
Args:
values (scalar or array-like): value(s) to interpolate
If single value, may be ndim coordinates
as separate arguments or a single array-like
if multiple values, may be ndim array-likes
of coordinates as separate arguments or
a single array-like containing same
kind (string): 'linear' (default) if multilinear interpolation
is to be done using this Histogram's contents, or
'log' if it is to be done on the logs of the contents
and the results converted back to the linear domain.
Return:
interpolated values (scalar or array of same shape as values)
"""
bins, weights = self._axes.interp_weights(*values)
# If contents is sparse, we cannot index it with numpy
# arrays of dimension > 1 due to limitations of the Sparse
# package (even though this is perfectly fine for dense
# arrays). In this case, ravel the bins and weights
# before interpolating and restore the result shape to
# match the bins shape (which == the values shape) after.
if self.is_sparse and bins[0].ndim > 2:
vshape = bins[0].shape[1:]
bins = tuple( b.reshape(b.shape[0], -1) for b in bins )
weights = tuple( w.reshape(w.shape[0], -1) for w in weights )
else:
vshape = None
isLog = (kind == "log") # do log, not linear, combining
contents = self._get_contents()
if isLog: # convert to log domain before interpolating
contents = np.log(contents)
interp_values = self._interp_multilinear(contents, bins, weights)
if isLog: # convert result back out of log domain
interp_values = np.exp(interp_values)
if vshape is not None:
interp_values = interp_values.reshape(vshape)
return self._with_units(interp_values)
@staticmethod
def _interp_multilinear(contents, bins, weights):
"""
Perform multilinear interpolation with respect to matrix 'contents',
given arrays of bins and weights for a set of points.
Args:
contents: array of dimension ndim
bins: tuple of ndim arrays; ith array contains one or more
integers per input point specifying which bins of contents
contain the values to be interpolated in ith dimension.
weights: tuple of ndim arrays; ith array contains
one or more floats per input point specifying the weights
for values to be interpolated in ith dimension.
For all dimensions i and all bins j, both bins[i][j] and
weights[i][j] must have the same (arbitrary) shape if contents
is dense, or must be a flat 1D array if it is sparse. The
number of bins may be different for each dimension but must
match the corresponding number of weights.
Returns:
array of interpolated values of same shape as each
elt of bins / weights
"""
is_sparse = isinstance(contents, SparseArray)
def densify(v):
if is_sparse and not np.isscalar(v):
return v.todense()
else:
return v
def interp_r(idim, prev_bins):
"""
Recursive multilinear interpolation
Allows interpolation in a given dimension to use a
linear combination of two or more bins; standard linear
interpolation uses two bins with weights w and 1 - w.
"""
w = weights[idim]
# extend each bin in current dim with all previous bins
dimbins = ((b,) + prev_bins for b in bins[idim])
# interpolate remaining dimensions to get result for each
# bin in bins[idim], and combine results into one array
c = np.stack(tuple(
((densify(contents[b]) # base case
if idim == 0
else interp_r(idim - 1, b))
for b in dimbins)
))
# weighted sum gives one interpolated value per input point
return np.sum(c * w, axis=0)
return interp_r(len(bins) - 1, ())
def find_bin(self, *args, **kwargs):
return self._axes.find_bin(*args, **kwargs)
def _strip_units(self, quantity):
"""
Remove the unit from a quantity (if it exists) and return
its value in the units of the Histogram, so that we may combine
it with the Histogram's contents.
We FAIL if:
* we try to combine a non-dimensionless Quantity with
a Histogram that has no units
* we try to combine a scalar with a Histogram that has
units
"""
# convert bare unit to trivial Quantity
if isinstance(quantity, u.UnitBase):
quantity = 1. * quantity
if isinstance(quantity, u.Quantity):
if quantity.unit == u.dimensionless_unscaled:
return quantity.value
if self._unit is None:
raise u.UnitConversionError("Cannot apply Quantity to Histogram without units")
return quantity.to_value(self._unit)
else:
if not (self._unit is None or self._unit == u.dimensionless_unscaled):
raise u.UnitConversionError("Cannot apply scalar to Histogram with units")
return quantity
[docs]
def fill(self, *values, weight = None, warn_overflow = True):
'''
Add an entry to the histogram. Can be weighted.
Follow same convention as find_bin()
Args:
values (float or array): Value of entry
weight (float): Value weight in histogram. Defaults to 1
in whatever units the histogram has
warn_overflow (bool): Enable/disable warnings when an
underflow or overflow occurs --i.e. when one or more of
the input values falls beyond the range of the
corresponding axis.
Note:
Note that weight needs to be specified explicitly by key;
otherwise it will be considered a value, and an IndexError
will be thrown.
'''
# Remove units from weight, converting if needed
if weight is None:
weight = 1
else:
weight = self._strip_units(weight)
indices = self.find_bin(*values)
# Standardize if single axis
if not isinstance(indices, tuple):
indices = (indices,)
# Each index must have same size for stacking, so make sure
# the sizes match by broadcasting all to largest size
indices = np.broadcast_arrays(*indices)
# Build 2D array [ndims x npoints] of indices for input values.
# Must reshape because indices in each dimension may not be
# a 1-D linear array.
indices = np.stack(indices, axis=0).reshape(len(indices),-1)
weight = np.broadcast_to(np.asarray(weight), indices.shape[1])
# adjust indices to be relative to .full_contents, so
# that we can use them to access ._contents directly
indices += self._track_overflow[:,None]
# eliminate any indices that are out of bounds (can happen
# if we are not tracking overflow in some dimension)
mask = np.all(np.logical_and(indices >= 0,
indices < np.asarray(self._contents.shape)[:,None]),
axis = 0)
if not mask.all():
if warn_overflow:
logger.warning(
"fill() discarded one or more values due to out-of-bounds "
"coordinate in a dimension without under/overflow tracking"
)
indices = indices[:,mask]
weight = weight[mask]
if weight.size > 0: # empty fails for dense, pointless for sparse
if not self.is_sparse:
indices = tuple(indices)
# add.at sums values associated with duplicate indices
np.add.at(self._contents, indices, weight)
if self._sumw2 is not None:
np.add.at(self._sumw2._contents, indices, weight**2)
else:
# COO sums values associated with duplicate indices by default
new_contents = COO(coords = indices,
data = weight,
shape = self._contents.shape)
self._contents += new_contents
if self._sumw2 is not None:
new_contents_sq = COO(coords = indices,
data = weight**2,
shape = self._contents.shape)
self._sumw2._contents += new_contents_sq
[docs]
def clear(self):
"""
Set all counts to 0
"""
if self.is_sparse:
self._contents = DOK(shape = self._contents.shape,
dtype = self._contents.dtype,
fill_value = 0).asformat('coo')
else:
self._contents[:] = 0
if self._sumw2 is not None:
self._sumw2.clear()
def _clear_border(self, which, axes = None):
"""
Set border cells (over and/or underflow) to 0 in both
contents and sumw2
Args:
which : list of which borders to zero out
axes (None or array): Axis numbers or labels. All by default
"""
if self.is_sparse: # ensure sparse contents is writable
self._contents = self._contents.asformat('dok')
if axes is None:
axes = range(self.ndim)
elif np.isscalar(axes):
axes = [axes]
axes = self._axes.label_to_index(axes)
for n in axes:
if self._track_overflow[n]:
for idx in which:
indices = self.expand_dict({n : idx}, self.all)
self._set(indices, 0)
if self.is_sparse: # return sparse contents to fast readonly form
self._contents = self._contents.asformat('coo')
if self._sumw2 is not None:
self._sumw2._clear_border(which, axes)
def clear_overflow(self, axes = None):
self._clear_border((self.of,), axes)
def clear_underflow(self, axes = None):
self._clear_border((self.uf,), axes)
def clear_underflow_and_overflow(self, axes = None):
self._clear_border((self.uf, self.of), axes)
[docs]
def project(self, *axis):
"""
Return a histogram containing a projection of the current one.
Args:
axis (axis index/label or array-like of same): axis or axes onto
which the histogram will be projected. Omitted axes will be
summed over. The axes of the projected histogram will have the
order specified by this argument, so project() can be used
to permute a Histogram's axes (whether or not some are
projected away).
Return:
Histogram: Projected histogram (a new object, not a view)
"""
return self._project(axis, project_out=False)
[docs]
def project_out(self, *axis):
"""
Return a histogram containing a projection that sums over the
specified axes of the current one, leaving the rest intact.
Args:
axis (axis index/label or array-like of same): axis or
axes that will be projected out of the
histogram. Omitted axes will be retained in their
current order.
Return:
Histogram: Projected histogram (a new object, not a view)
"""
return self._project(axis, project_out = True)
def _project(self, axis, project_out = False):
"""
Common core of project() and project_out()
If project_out is False; the axes in `axis` will be retained
in the specified order. If it is True, the axes in `axis`
will be summed away.
Args:
axis: array-like of axis numbers or labels
project_out: bool
Return:
Histogram: Projected histogram (a new object, not a view)
"""
def _project_contents(contents, keep_axes, sum_axes):
# Transpose the contents so that the first keep_axes
# dimensions are the axes to keep, in the order requested
# by the caller, while the remaining axes are the ones
# to be summed away.
#
# We transpose before projecting out the sum_axes so
# that the projected array is contiguous in memory.
reordered_axes = np.concatenate((keep_axes, sum_axes))
new_contents = np.transpose(contents, reordered_axes)
if len(sum_axes) == 0: # sparse does not handle this case
new_contents = new_contents.copy()
else:
new_sum_axes = tuple(range(len(keep_axes), new_contents.ndim))
new_contents = new_contents.sum(axis = new_sum_axes)
return new_contents
# standardize input
if len(axis) == 1 and \
isinstance(axis[0], (list, np.ndarray, range, tuple)):
# Got a sequence
axis = axis[0]
axes = np.asarray(self._axes.label_to_index(axis), dtype=int)
if len(np.unique(axes)) != len(axes):
raise ValueError("An axis can't repeat")
# Compute axis ids in 0..ndim that are not in exes;
# result is guaranteed to be in increasing order
rest = np.setdiff1d(np.arange(self.ndim, dtype=int),
axes,
assume_unique=True)
if project_out: # axes are to be summed over
sum_axes = axes
keep_axes = rest
else: # axes are to be retained
keep_axes = axes
sum_axes = rest
if len(keep_axes) == 0:
raise ValueError("Cannot project out all axes of a Histogram. "
"Consider using np.sum(h) or np.sum(h.full_contents) (includes under/overflow)")
new_contents = _project_contents(self._contents, keep_axes, sum_axes)
new_sumw2 = None
if self._sumw2 is not None:
new_sumw2_contents = _project_contents(self._sumw2._contents, keep_axes, sum_axes)
new_sumw2 = self._sumw2._replace(_axes = self._axes[keep_axes],
_contents = new_sumw2_contents,
_track_overflow = self._track_overflow[keep_axes])
new = self._replace(_axes = self._axes[keep_axes],
_contents = new_contents,
_sumw2 = new_sumw2,
_track_overflow = self._track_overflow[keep_axes])
return new
[docs]
@staticmethod
def concatenate(edges, histograms, label = None, track_overflow = None):
"""
Generate a Histogram H from a list of histograms h_1
... h_n. We create a new first axis of length equal to the
list and set H[i] = h_i.
For this operation to be well-defined, the axes of all input
histograms must be equal, and they must all have the same
sparsity; if any input has a unit, all must have compatible
units. If any input is a subclass of Histogram, all must have
the same subclass type.
If all inputs have sumw2, the output will as well; otherwise,
all sumw2 values are discarded.
Generate a Histogram from a list of histograms. The axes of
all input histograms must be equal, and the new histogram will
have one more dimension than the input. The new axis has index
0. If histograms can be subclassed, all of them must have the
same class type.
Args:
edges (Axes or array): Definition of bin edges of the new dimension
histograms (list of Histogram): List of histogram to fill contents.
Might or might not include under/overflow bins.
labels (str): Label the new dimension
track_overflow (bool): Track underflow and overflow on the newly created axis.
Defaults to True if number of histograms on new axis is 2 + # bins,
or False otherwise.
Return:
new object of the same type as histograms[0] (Histogram or subclass)
"""
def check_compatible(h, h0):
if h.__class__ != h0.__class__:
raise ValueError("Cannot combine Histograms of different subclasses "
f"{h.__class__.__name__}, {h0.__class__.__name__}")
if h.axes != h0.axes:
raise ValueError("Cannot combine Histograms with different axes")
if h.is_sparse != h0.is_sparse:
raise ValueError("Cannot combine Histograms with different sparsities")
if not np.array_equal(h._track_overflow, h0._track_overflow):
raise ValueError("Cannot combine Histograms with different track_overflow settings")
if (h._unit == None) != (h0._unit == None):
raise ValueError("Cannot combine Histograms with and without units")
return (h._sumw2 is not None) # do we have sumw2?
if len(histograms) == 0:
raise ValueError("Cannot concatenate empty list of Histograms")
h0 = histograms[0]
has_sumw2 = [ check_compatible(h, h0) for h in histograms ]
shared_axes = h0._axes
axis_newdim = Axis(edges, label=label)
new_axes = Axes([axis_newdim] + list(shared_axes), copy_axes=False)
# stick all the old histogram contents together
new_contents = np.concatenate([h._contents[None] for h in histograms])
new_unit = h0._unit
if new_unit is not None:
# adjust each histogram's value to account for different, but compatible units
for i, h in enumerate(histograms):
unit_factor = h._unit.to(new_unit)
if unit_factor != 1.:
new_contents[i] *= unit_factor
if track_overflow is None:
# by default, overflow tracking follows contents shape
track_overflow = (len(histograms) == axis_newdim.nbins + 2)
new_track_overflow = np.append(track_overflow, h0._track_overflow)
# compute overflow padding for dimension corresponding to new axis
# we need padding in only one dimension, but padding function expects arrays
padding_newdim = Histogram._get_padding_for_overflow(np.atleast_1d(track_overflow),
np.atleast_1d(axis_newdim.nbins),
np.atleast_1d(len(histograms)))
# padding of all axes after the first is unchanged
new_padding = np.concatenate((padding_newdim, np.zeros((shared_axes.ndim, 2), dtype=int)))
new_contents = Histogram._pad(new_contents, new_padding)
# Combine sumw2s if they exist in all histograms, using same unit and padding adjustments
# as for contents (except that we must square the unit)
if all(has_sumw2):
new_sumw2_contents = np.concatenate([h._sumw2._contents[None] for h in histograms])
if new_unit is not None:
# adjust each histogram's value to account for different, but compatible units
for i, h in enumerate(histograms):
unit_factor = h._unit.to(new_unit)
if unit_factor != 1.:
new_sumw2_contents[i] *= unit_factor**2
new_sumw2_contents = Histogram._pad(new_sumw2_contents, new_padding)
new_sumw2 = h0._sumw2._replace(_axes=new_axes,
_contents=new_sumw2_contents,
_track_overflow=new_track_overflow)
else:
if any(has_sumw2):
logger.warning("Not all input histograms have sum of weights "
"squared. sumw2 will be dropped")
new_sumw2 = None
new = h0._replace(_axes=new_axes,
_contents=new_contents,
_sumw2=new_sumw2,
_track_overflow=new_track_overflow)
return new
class _slice:
def __init__(self, hist):
self._hist = hist
def __getitem__(self, item):
"""Return a Histogram which is a *view* containing a slice of
the current one.
Args:
item -- an indexing expression suitable for a
Histogram's contents. Advanced indexing is not
supported, but .end, .all, and ellipsis are supported
with the same restrictions as for ordinary Histogram
indexing.
Restrictions on valid indexing expressions include:
(1) the slice must include at least one non-under/overflow
bin of the original Histogram. Slices containing
only overflow or underflow bins are not permitted.
(2) slicees with stride > 1 are not permitted due to
ill-defined interactions with under/overflow.
Returns:
A view into the current Histogram containing the
specified slice of its contents.
The new Histogram tracks overflow in the same set of
dimensions as the original. If the slice explicitly
includes the Histogram's underflow and overflow bins for
a given axis, those will be copied into the result;
otherwise, they will be set to zero.
If the original Histogram contains sumw2, the new
one contains the corresponding slice of sumw2.
Note that, unlike Histogram.__getitem__, slice.__getitem__
does *not* squeeze away dimensions of length 1 -- the
slice will always have exactly as many axes as the
original Histogram, so that we do not lose axis
information for axes with a single bin.
Because the returned Histogram is a view, there is no
guarantee as to whether it shares axis or contents
data with the original. Make a copy before modifying
the new Histogram.
"""
# Standardize indices into slices
indices = self._hist._prepare_indices(item)
indices = tuple(slice(i,i+1) if isinstance(i, (int, np.integer)) else i for i in indices)
padding = np.zeros((self._hist.ndim, 2), dtype=int)
new_axes = []
for dim,(index, axis) in enumerate(zip(indices,
self._hist._axes)):
track_overflow = self._hist._track_overflow[dim]
# Sanity checks not already enforced by Axis.__getitem__
if not isinstance(index, slice):
raise TypeError("slice[] supports only integers and slices as indices")
start, stop, stride = index.indices(axis.nbins+2*track_overflow)
if stride != 1:
raise IndexError("slice[] does not support non-unit strides")
# Handle under/overflow
if track_overflow:
if start == 0:
if stop-start == 1:
raise IndexError(f"slice[] cannot return only underflow bin on axis {dim}")
else:
padding[dim, 0] = 1 # supply our own, new underflow bin
if stop == axis.nbins + 2:
if stop-start == 1:
raise IndexError(f"slice[] cannot return only overflow bin on axis {dim}")
else:
padding[dim, 1] = 1 # supply our own, new overflow bin
# axes do not reference under/overflow bins
axis_start = max(start - 1, 0)
axis_stop = min(stop - 1, axis.nbins)
else:
axis_start = start
axis_stop = stop
# result of Histogram slice is a *view*, so we
# allow result to share edge array with input Histogram
new_axes.append( axis[axis_start:axis_stop] )
# new axes will not be referenced outside Axes object
new_axes = Axes(new_axes, copy_axes=False)
# get new contents. Pad if we need to add zeros for oflow/uflow.
new_contents = self._hist._contents[indices]
new_contents = self._hist._pad(new_contents, padding)
# apply identical slice to sumw2
if self._hist._sumw2 is not None:
new_sumw2_contents = self._hist._sumw2._contents[indices]
new_sumw2_contents = self._hist._pad(new_sumw2_contents, padding)
new_sumw2 = self._hist._sumw2._replace(_axes = new_axes,
_contents = new_sumw2_contents)
else:
new_sumw2 = None
return self._hist._replace(_axes = new_axes,
_contents = new_contents,
_sumw2 = new_sumw2)
# operation descriptors for every arithmetic operation
# supported through the _operation() method below.
[docs]
class OpType(Enum):
SUM = 1,
PRODUCT = 2
arith_op_properties = {
# operator inplace? sum or product op?
operator.add: (False, OpType.SUM),
operator.sub: (False, OpType.SUM),
operator.mul: (False, OpType.PRODUCT),
operator.truediv: (False, OpType.PRODUCT),
operator.iadd: (True, OpType.SUM),
operator.isub: (True, OpType.SUM),
operator.imul: (True, OpType.PRODUCT),
operator.itruediv: (True, OpType.PRODUCT),
}
def _unit_operation(self, other, operation):
"""
For binary arithmetic operation self <operation> other,
- if other is not a Histogram, extract its value w/o units
- compute the unit that will result from the operation
Returns:
tuple (value, new unit)
If other is a Histogram or sparse or dense array, value == other
Else, value is an equivalent scalar or np.ndarray
"""
# Separate between value and unit
if isinstance(other, Histogram):
other_unit = other._unit
other_value = other
elif isinstance(other, u.Quantity):
other_unit = other.unit
other_value = other.value
elif isinstance(other, u.UnitBase):
other_unit = other
other_value = 1
elif isinstance(other, SparseArray):
other_unit = None
other_value = other # cannot use np.asarray
else: # scalar or array-like
other_unit = None
if np.ndim(other) > 0:
other_value = np.asarray(other)
else:
other_value = other
if self._unit is None and other_unit is None:
# If neither operand have units, do nothing else
return other_value, None
# Adjust other_value and self._unit depending on the operand
# Standardize dimensionless
if other_unit is None:
other_unit = u.dimensionless_unscaled
self_unit = self._unit
if self_unit is None:
self_unit = u.dimensionless_unscaled
_, optype = Histogram.arith_op_properties.get(operation, (None, None))
assert (optype is not None), "operation not supported"
# For * and / the conversion factor is stored in the unit itself
# ** only accepts scalar dimensionaless quantities, it will crash anyway
# The idencity operator (for the raterizer) doesn't use the other's units
if optype == Histogram.OpType.SUM:
# +, -
# We need to convert other's units to those of self. If
# not a no-op, it requires copying the value (as we don't
# want to modify the original). No change in units
unit_conv = other_unit.to(self_unit)
if unit_conv != 1.0:
other_value = other_value * unit_conv
new_unit = self_unit
else: # optype == Histogram.OpType.PROD
# *, /
# The conversion factor is stored in the unit itself
new_unit = operation(self_unit, other_unit)
return other_value, new_unit
def _operation(self, other, operation):
"""
Perform binary arithmetic with first operand a Histogram (self) and
a second operand 'other', which might be a Histogram, a scalar,
or an array-like.
This function is mainly concerned with getting other into a
form that can be combined with self. The details of the
actual operation differ depending on whether operation is
in-place (e.g., h *= x) or out-of-place (e.g., h = h * x).
"""
is_inplace, optype = self.arith_op_properties.get(operation, (None, None))
assert (optype is not None), "Unsupported arithmetic operation"
# Get the value part of other and the new unit the result should have
other, new_unit = self._unit_operation(other, operation)
# convert other into a bare np.ndarray or scalar that can be
# combined with this Histogram
if isinstance(other, Histogram):
# Histograms for operation must have same axes
if self._axes != other._axes:
raise ValueError("Axes mismatch")
# temporarily remove other's units before extracting contents/sumw2
other_unit = other._unit
other.to(None, update = False, copy = False)
# transform other to match overflow/underflow of self
slices, padding = self._match_track_overflow(other._track_overflow,
self._track_overflow)
other_value = other._contents[slices]
other_value = self._pad(other_value, padding)
if self._sumw2 is not None:
if other._sumw2 is not None:
# Match over/underflow
other_sumw2 = other._sumw2._contents[slices]
other_sumw2 = self._pad(other_sumw2, padding)
else:
# self has sumw2, but other does not
logger.warning("Other histogram lacks sumw2; assuming zero")
other_sumw2 = None
else:
if other._sumw2 is not None:
# other has sumw2, but self does not
logger.warning("Discarding sumw2 from other histogram")
other_sumw2 = None
# restore other's units after value extraction
other.to(other_unit, update = False, copy = False)
else:
# other is array or scalar; array-likes were converted by _unit_operation()
if np.ndim(other) > 0:
if other.ndim != self.ndim:
raise ValueError(f"Operand number of dimensions ({other.ndim}) does not"
f"match number of axes ({self.ndim})")
if isinstance(other, DOK):
# Do not allow DOK format in arithmetic. If combined with
# a dense array, it gives a DOK result, so it can 'pollute'
# a Histogram. Use COO instead; we check later on whether
# the result is sparse and update .sumw2 to match if needed.
other = other.asformat('coo')
# single-element axis can be broadcast as-is, so no need for overflow/underflow
# bins; otherwise, match other's overflow with self
oshape = np.asarray(other.shape)
padding = self._get_padding_for_overflow(self._track_overflow,
self._axes.nbins, oshape,
mask=(oshape != 1))
other_value = self._pad(other, padding)
else:
# other is scalar
other_value = other
other_sumw2 = None # bare value has no sumw2
if is_inplace:
self._inplace_operation(other_value, other_sumw2, operation,
optype, new_unit)
new = self
else:
new = self._outofplace_operation(other_value, other_sumw2, operation,
optype, new_unit)
# Ensure that sumw2 still has same dtype as contents
# and is sparse if contents is sparse. An operation
# on dense self and sparse other can make the result
# have sparse contents, but it might leave sumw2 dense.
if new._sumw2 is not None:
if new.is_sparse and not new._sumw2.is_sparse:
new._sumw2 = new.sumw2.to_sparse()
new._sumw2 = new._sumw2.astype(new._contents.dtype, copy=False)
return new
def _sumw2_product_op(self, self_sumw2, other_sumw2,
self_contents, new_contents,
other, operation):
"""
Update the sum of squared weights self_sumw2 to reflect the impact
of performing the operation self_contents <operation> other.
operation is assumed to be a PRODUCT type.
Args:
self_sumw2 -- _sumw2._contents for Histogram self
other_sumw2 -- _sumw2._contents for Histogram other, or
None if is lacks sumw2 or is not a Histogram
self_contents -- _contents for Histogram self before operation
new_contents -- _contents for Histogram self after operation
other -- other operand's value (_contents if Histogram,
or scalar or np.ndarray)
operation -- operation performed
Returns:
updated sumw2 value for result of operation
If operation is in-place, self_sumw2 is modified in place
and returned if possible; otherwise, a new array is allocated
and returned. Either way, it is safe to set _sumw2._contents
to the return value of this function for both in-place and
out-of-place operations.
"""
if other_sumw2 is None:
# Other operaand is assumed to have zero error. Use
# simplification of error formula below that depends
# only on prior sumw2 and other's value.
new_sumw2 = Histogram._inplace_operation_handle_sparse(
Histogram._inplace_operation_handle_sparse(self_sumw2, other, operation, Histogram.OpType.PRODUCT),
other, operation, Histogram.OpType.PRODUCT)
else:
# Error of either f = A*B or f = A/B is
# f_err^2 = f^2 * ((A_err/A)^2 + (B_err/B)^2)
relvar = self_sumw2 / (self_contents * self_contents)
other_relvar = other_sumw2 / (other * other)
new_sumw2 = new_contents * new_contents * (relvar + other_relvar)
return new_sumw2
def _inplace_operation(self, other, other_sumw2, operation,
optype, new_unit):
"""
Perform an in-place binary arithmetic operation on the histogram
self with second operand other. Update both self's contents
and its sumw2 value, if present.
Args:
other -- second operand
other_sumw2 -- second operand's sumw2 matrix, or None
if not defined
operation -- in-place arithmetic operation to be performed
optype -- is operation a SUM or PRODUCT type?
new_unit -- unit to assign to result
self's contents and sumw2 are updated; nothing is returned.
"""
# delete old units of self; will replace with result units
self.to(None, update = False, copy = False)
if self._sumw2 is not None and \
other_sumw2 is not None and \
optype == Histogram.OpType.PRODUCT:
# save a copy of self._contents for sumw2 computation
self_contents = self._contents.copy()
else:
self_contents = None # not used
# overwrites self._contents
new_contents = Histogram._inplace_operation_handle_sparse(self._contents, other, operation, optype)
if self._sumw2 is not None:
self_sumw2_contents = self._sumw2._contents
if optype == Histogram.OpType.SUM:
if other_sumw2 is not None:
Histogram._inplace_operation_handle_sparse(self_sumw2_contents, other_sumw2, operator.iadd, Histogram.OpType.SUM)
else: # optype == Histogram.OpType.PRODUCT
self._sumw2._contents = \
self._sumw2_product_op(self_sumw2_contents,
other_sumw2,
self_contents,
new_contents,
other,
operation)
# set unit of result
self.to(new_unit, update = False, copy = False)
@staticmethod
def _inplace_operation_handle_sparse(contents, other, operation, optype):
"""
Performs in place operations on contents when other is a sparse matrix. This type of operation is not supported
by the sparse library
"""
if isinstance(other, sparse.SparseArray):
# TODO: Optimize. Instead of allocating the a new dense array, we could modify directly the non-zero
# elements in contents. This requires to check the operation type, as well as the fill value of the
# sparse array.
other = other.todense()
return operation(contents, other)
def _outofplace_operation(self, other, other_sumw2, operation,
optype, new_unit):
"""
Perform an out-of-place binary arithmetic operation on the
histogram self with second operand other. Allocate and
return a new object of the same class as self to hold the result.
Hence, this function works for subclasses of Histogram as
well as for the base class.
Args:
other -- second operand
other_sumw2 -- second operand's sumw2 matrix, or None
if not defined
operation -- in-place arithmetic operation to be performed
optype -- is operation a SUM or PRODUCT type?
new_unit -- unit to assign to result
Returns:
new object of the same class as self holding result
"""
# temporarily remove self's units
self_unit = self._unit
self.to(None, update = False, copy = False)
self_contents = self._contents
new_contents = operation(self_contents, other)
if self._sumw2 is not None:
self_sumw2_contents = self._sumw2._contents
if optype == Histogram.OpType.SUM:
if other_sumw2 is None:
new_sumw2_contents = self_sumw2_contents.copy()
else:
new_sumw2_contents = self_sumw2_contents + other_sumw2
else: # optype == Histogram.OpType.PRODUCT
new_sumw2_contents = \
self._sumw2_product_op(self_sumw2_contents,
other_sumw2,
self_contents,
new_contents,
other,
operation)
new_sumw2_unit = None if new_unit is None else new_unit**2
new_sumw2 = self._sumw2._replace(_contents = new_sumw2_contents,
_unit = new_sumw2_unit)
else:
new_sumw2 = None
# restore unit of self
self.to(self_unit, update = False, copy = False)
new = self._replace(_contents = new_contents,
_sumw2 = new_sumw2,
_unit = new_unit)
return new
def __imul__(self, other):
return self._operation(other, operator.imul)
def __mul__(self, other):
return self._operation(other, operator.mul)
def __rmul__(self, other):
return self*other
def __itruediv__(self, other):
return self._operation(other, operator.itruediv)
def __truediv__(self, other):
return self._operation(other, operator.truediv)
def __rtruediv__(self, other):
"""
Divide a scalar by the histogram
"""
if not np.isscalar(other):
raise ValueError("Inverse operation can only occur between "
"histograms or a histogram and a scalar")
# Temporarily disable self unit
self_unit = self._unit
self.to(None, update = False, copy = False)
# Simple change of unit in this case. Other can't be Quantity or Unit
new_unit = None if self_unit is None else (1/self_unit).unit
# Error propagtion of f = b/A (where b is constant, no error) is:
# f_err^2 = f^2 (A_err/A)^2
self_contents = self._contents
new_contents = other/self_contents
if self._sumw2 is not None:
new_sumw2_contents = (self._sumw2._contents * other * other /
np.power(self_contents, 4))
# make sure sumw2's type follows contents (not sure if necessary?)
new_sumw2_contents = new_sumw2_contents.astype(self._contents.dtype,
copy=False)
new_sumw2_unit = None if new_unit is None else new_unit**2
new_sumw2 = self._sumw2._replace(_contents = new_sumw2_contents,
_unit = new_sumw2_unit)
else:
new_sumw2 = None
# restore self unit
self.to(self_unit, update = False, copy = False)
new = self._replace(_contents = new_contents,
_sumw2 = new_sumw2,
_unit = new_unit)
return new
def __iadd__(self, other):
return self._operation(other, operator.iadd)
def __add__(self, other):
return self._operation(other, operator.add)
def __radd__(self, other):
return self + other
def __neg__(self):
# Temporarily disable self unit
self_unit = self._unit
self.to(None, update = False, copy = False)
new_contents = -(self._contents)
# restore self unit
self.to(self_unit, update = False, copy = False)
# copy sumw2 if it exists, since it is unchanged by negation
new = self._replace(_contents = new_contents)
return new
def __isub__(self, other):
return self._operation(other, operator.isub)
def __sub__(self, other):
return self._operation(other, operator.sub)
def __rsub__(self, other):
return -self + other
def _comparison_operator(self, other, operation):
other = self._strip_units(other)
return operation(self._get_contents(), other)
def __lt__(self, other):
return self._comparison_operator(other, operator.lt)
def __le__(self, other):
return self._comparison_operator(other, operator.le)
def __gt__(self, other):
return self._comparison_operator(other, operator.gt)
def __ge__(self, other):
return self._comparison_operator(other, operator.ge)
[docs]
def rebin(self, *ngroup):
"""
Rebin a histogram by grouping adjacent bins into one on each axis
If an axis does not have overflow tracking enabled, any
partial group along that axis will be discarded. If it *does*
have overflow tracking enabled, any partial group's sum will
be added to the axis' underflow bin if it is on the left, or
to the overflow bin if it is on the right.
For histograms with multiple axes, the result of rebinning is
equivalent to rebinning the input on the first axis, then
rebinning the result on the second axis, and so forth for all
axes.
Args:
ngroup (int or array-like):
number of adjacent bins to combine for each axis. If
this value is > 0 for an axis, binning starts from left
side of contents, so the last partial group (if any)
is on the right; if < 0, binning starts from right side,
so last partial group (if any) is on the left.
Return:
a new, rebinned Histogram
"""
if len(ngroup) == 1:
if not np.isscalar(ngroup[0]): # a single array-like
ngroup = ngroup[0]
else:
# an array-like of values (must be scalar)
if not all(np.isscalar(v) for v in ngroup):
raise ValueError("input to rebin must be scalar or array-like of scalars")
group_size = np.asarray(ngroup, dtype=int)
if not group_size.all():
raise ValueError("ngroup cannot contain 0")
# number of bins to group on each axis, along with direction
group_sign = np.broadcast_to(np.sign(group_size), self.ndim)
group_size = np.broadcast_to(np.abs(group_size), self.ndim)
# compute edges for each axis after grouping, along with padding necessary
# for each axis' contents to have the correct size (including any uflow/oflow)
# after rebinning
new_axes = []
padding = []
slicing = []
for a, gsz, gsgn, track in zip(self._axes, group_size, group_sign, self._track_overflow):
# new axis contains every ngroupth bin. If sign is +, we start from
# bin 0; if -, we shift to ensure last bin is included.
start = a.nbins % gsz if gsgn < 0 else 0
new_axes.append( a[start::gsz] )
# for untracked dimensions, slice away any unused bins
trim = 0 if track else a.nbins % gsz
slicing.append( slice(trim, None) if gsgn < 0 else slice(None, a.nbins + 2*track - trim))
# pad before and after by the minimum amount needed to ensure that
# padded size is multiple of gsz and any uflow/oflow will be preserved
p1 = track * (gsz - 1) # won't be grouped with contents
p2 = (gsz - (a.nbins - trim + track)) % gsz # might be grouped with contents
padding.append( (p1, p2) if gsgn > 0 else (p2, p1) )
# create new Axes object. *Do* copy the existing Axes so that
# they do not share edge arrays with the input Histogram's axes.
new_axes = Axes(new_axes)
slicing = tuple(slicing)
# new shape splits each dimension into two; even dims are the
# size of the array post-rebining, and odd dims are the size
# of the groups to be summed on that axis
new_shape = np.empty(2*self.ndim, dtype = int)
new_shape[0::2] = (self._axes.nbins // group_size) + 2*self._track_overflow
new_shape[1::2] = group_size
sum_axes = tuple(range(1, 2*self.ndim, 2)) # axes to be summed away
# pad and reshape contents matrix prior to summing
new_contents = self._contents[slicing]
new_contents = self._pad(new_contents, padding)
new_contents = np.reshape(new_contents, new_shape)
new_contents = np.sum(new_contents, axis=sum_axes)
# apply same rebinning to sumw2 if it exists
if self._sumw2 is not None:
new_sumw2_c = self._sumw2._contents[slicing]
new_sumw2_c = self._pad(new_sumw2_c, padding)
new_sumw2_c = np.reshape(new_sumw2_c, new_shape)
new_sumw2_c = np.sum(new_sumw2_c, axis=sum_axes)
new_sumw2 = self._sumw2._replace(_axes = new_axes,
_contents = new_sumw2_c)
else:
new_sumw2 = None
return self._replace(_axes = new_axes,
_contents = new_contents,
_sumw2 = new_sumw2)
[docs]
def plot(self, ax = None,
errorbars = None,
colorbar = True,
label_axes = True,
**kwargs):
"""
Quick plot of the histogram contents.
Under/overflow bins are not included. Only 1D and 2D histograms
are supported.
Histogram with a HealpixAxis will automatically be plotted
as a map, passing all kwargs to mhealpy's HealpixMap.plot()
Args:
ax (matplotlib.axes): Axes on which to draw the histogram. A new
one will be created by default.
errorbars (bool or None): Include errorbars for 1D histograms. The
default is to plot them if sumw2 is available
colorbar (bool): Draw colorbar in 2D plots
label_axes (bool): Label plots axes. Histogram axes must be labeled.
**kwargs: Passed to `matplotlib.errorbar()` (1D) or
`matplotlib.pcolormesh` (2D)
"""
import matplotlib.pyplot as plt
from mhealpy import HealpixMap
from .healpix_axis import HealpixAxis
def without_units(a):
return a.value if isinstance(a, u.Quantity) else a
# Matplotlib errorbar and pcolormesh need regular array
contents = self._get_contents() # contents without units, oflow/uflow
if self.is_sparse:
contents = contents.todense()
# Handle the special case of a healpix axis
if self.ndim == 1 and isinstance(self._axes[0], HealpixAxis):
axis = self._axes[0]
# Pad in case it is partial map
contents = self._pad(contents,
(axis.lo_lim,
axis.npix - axis.hi_lim))
m = HealpixMap(data = contents, base = axis)
args = ()
if ax is not None:
args = (ax,)
plot, ax = m.plot(*args,
cbar = colorbar,
**kwargs)
return ax, plot
# Default errorbars
if errorbars is None:
if self._sumw2 is None:
errorbars = False
else:
errorbars = True
# Create axes if needed (with labels)
if ax is None:
fig, ax = plt.subplots()
# Plot, depending on number of dimensions
if self.ndim == 1:
axis = self._axes[0]
if isinstance(axis, TimeDeltaAxis):
# Convert to regular axis with default unit
# Include unit in label since were are dropping it
axis = axis.to()
axis.label = self._axes[0].label_with_unit
# We have two points per bin (lower edge+center), and 2 extra
# point for under/overflow (these currently don't have errorbar,
# they looked bad)
if isinstance(axis, TimeAxis):
xdata = Time([datetime.datetime(2000,1,1,0,0,0)]*(2 * axis.nbins + 2), format = 'datetime')
else:
# For regular Axis and TimeAxis
if axis.unit is None:
xdata = np.empty(2*axis.nbins + 2)
else:
xdata = np.empty(2*axis.nbins + 2)*axis.unit
xdata[0] = axis.edges[0] # For underflow, first edge
xdata[1::2] = axis.edges # In between edges. Last edge for overflow
xdata[2::2] = axis.centers # For markers
# Handle time axis, to it can be formatted lated
if isinstance(axis, TimeAxis):
xdata = xdata.datetime
else:
# Regular Axis and TimeAxis
xdata = without_units(xdata)
underflow = self._get(-1) if self._track_overflow[0] else 0
overflow = self._get(axis.nbins) if self._track_overflow[0] else 0
ydata = np.concatenate(([underflow],
np.repeat(contents, 2),
[overflow]))
# Style
drawstyle = kwargs.pop('drawstyle', 'steps-post')
# Error bars
yerr = None
if errorbars:
errors = without_units(self.bin_error.contents)
if self.is_sparse:
# No auto densify
errors = errors.todense()
yerr = np.empty(2*axis.nbins + 2)
yerr[2::2] = errors
yerr[0] = None # No underflow errorbar, looked bad
yerr[1::2] = None # No overflow errorbar, looked bad
# Plot
plot = ax.errorbar(xdata,
ydata,
yerr = yerr,
drawstyle = drawstyle,
**kwargs)
# Label axes
if label_axes:
ax.set_xlabel(axis.label_with_unit)
if self._unit not in (None, u.dimensionless_unscaled):
ax.set_ylabel(f"[{self._unit}]")
elif self.ndim == 2:
plot_edges = []
for axis in self._axes:
if isinstance(axis, TimeAxis):
plot_edges.append(axis.edges.datetime)
elif isinstance(axis, TimeDeltaAxis):
plot_edges.append(axis.to().edges)
else:
# Regular axis
plot_edges.append(without_units(axis.edges))
# No under/overflow
plot = ax.pcolormesh(plot_edges[0], plot_edges[1],
np.transpose(contents),
**kwargs)
if label_axes:
ax.set_xlabel(self._axes[0].label_with_unit)
ax.set_ylabel(self._axes[1].label_with_unit)
if colorbar:
cax = ax.get_figure().colorbar(plot, ax = ax)
if self._unit not in (None, u.dimensionless_unscaled):
cax.set_label(f"[{self._unit}]")
else:
raise ValueError("Plotting only available for 1D and 2D histograms")
if self._axes[0].axis_scale == 'log':
ax.set_xscale('log')
if self.ndim > 1 and self._axes[1].axis_scale == 'log':
ax.set_yscale('log')
return ax, plot
# draw() is an alias of plot()
draw = plot
[docs]
def fit(self, f, lo_lim = None, hi_lim = None, **kwargs):
"""
Fit histogram data using least squares.
This is a convenient call to scipy.optimize.curve_fit. Sigma corresponds
to the output of `h.bin_error`. Empty bins (e.g. error equals 0) are
ignored
Args:
f (callable): Function f(x),... that takes the independent variable
x as first argument, and followed by the parameters to be fitted.
For a k-dimensional histogram is should handle arrays of shape
(k,) or (k,N). The inputs and outputs must be unitless.
lo_lim (float or array): Low axis limit to fit. One value per axis.
lo_lim (float or array): High axis limit to fit. One value per axis.
**kwargs: Passed to scipy.optimize.curve_fit
"""
from inspect import signature
from scipy.optimize import curve_fit
lo_lim = np.broadcast_to(lo_lim, self.ndim, subok = True)
hi_lim = np.broadcast_to(hi_lim, self.ndim, subok = True)
# Sanity checks
for axis,lo,hi in zip(self._axes, lo_lim, hi_lim):
if ((lo is not None and lo < axis.lo_lim) or
(hi is not None and hi >= axis.hi_lim)):
raise ValueError("Fit limits out of bounds")
# Get bins that correspond to the fit limits
lim_bins = tuple(slice(None if lo is None else axis.find_bin(lo),
None if hi is None else axis.find_bin(hi))
for axis,lo,hi
in zip(self._axes, lo_lim, hi_lim))
# Get data to fit
x = [axis.centers[bins].value
if axis.unit is not None
else axis.centers[bins]
for axis,bins in zip(self._axes,lim_bins)]
x = np.meshgrid(*x, indexing='ij') # For multi-dimensional histograms
y = self._get(lim_bins)
sigma = self.bin_error[lim_bins]
# Sparse
if self.is_sparse:
y = y.todense()
sigma = sigma.todense()
# Ignore empty bins
non_empty = (sigma != 0)
x = [centers[non_empty] for centers in x]
y = y[non_empty]
sigma = sigma[non_empty]
# Flat matrices
if self.ndim > 1:
x = np.asarray([centers.ravel() for centers in x])
y = y.ravel()
sigma = sigma.ravel()
else:
x = x[0]
# Sanity checks
if x.shape[-1] < len(signature(f).parameters) - 1:
raise RuntimeError("Fewer bins within limits than parameters to fit.")
# Actual fit with scipy
return curve_fit(f, x, y, sigma = sigma, **kwargs)
[docs]
def write(self, filename, name = "hist", overwrite = False,
h5_dataset_kw = None):
"""
Write histogram to a group in an HDF5 file. Appended if the file already
exists.
Args:
filename (str): Path to file
name (str): Name of group to save histogram (can be any HDF5 path)
overwrite (str): Delete and overwrite group if already exists.
h5_dataset_kw (dict, optional): dictionary of keywords to control
how the Histogram's will be written to datasets by the underlying
HDF5 library
For information on keywords controlling dataset creation, see
https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset
"""
import h5py as h5
with h5.File(filename, 'a') as f:
# Will fail on existing group by default
if name in f:
if overwrite:
del f[name]
else:
raise ValueError("Unable to write histogram. Another group "
"with the same name already exists. Choose "
"a different name or use overwrite")
self._write(f, name, h5_dataset_kw = h5_dataset_kw)
[docs]
@classmethod
def open(cls, filename, name = 'hist'):
"""
Read a Histogram from a specified group in an HDF5 file.
"""
import h5py as h5
with h5.File(filename, 'r') as f:
return cls._open(f[name])
def _write(self, file, group_name, h5_dataset_kw=None, **kwargs):
"""
Write histogram to the named group in an HDF5 file. This method
can be overriden by subclasses to add additional information to
the group.
Args:
file : HDF5 file handle
group_name (str): Name of group to save histogram (can be
any HDF5 path)
h5_dataset_kw (dict, optional): dictionary of keywords to control
how the Histogram's will be written to datasets by the underlying
HDF5 library
"""
if h5_dataset_kw is None:
h5_dataset_kw = {}
hist_group = file.create_group(group_name)
if self._unit is not None:
hist_group.attrs['unit'] = str(self._unit)
# Axes. Each one is a data set with attributes.
# We rely on Axes to return them later in the same
# order that they are saved.
axes_group = hist_group.create_group('axes')
self._axes.write(axes_group)
if self.is_sparse:
hist_group.attrs['format'] = 'coo'
contents_group = hist_group.create_group('contents')
contents = self._contents
contents_group.create_dataset('coords',
data = contents.coords,
compression = "gzip",
track_times = False)
contents_group.create_dataset('data',
data = contents.data,
compression = "gzip",
track_times = False)
contents_group.create_dataset('shape',
data = contents.shape,
track_times = False)
contents_group.create_dataset('fill_value',
data = contents.fill_value,
track_times = False)
if self._sumw2 is not None:
sumw2_group = hist_group.create_group('sumw2')
sumw2_contents = self._sumw2._contents
sumw2_group.create_dataset('coords',
data = sumw2_contents.coords,
compression = "gzip",
track_times = False)
sumw2_group.create_dataset('data',
data = sumw2_contents.data,
compression ="gzip",
track_times = False)
sumw2_group.create_dataset('shape',
data = sumw2_contents.shape,
track_times = False)
sumw2_group.create_dataset('fill_value',
data = sumw2_contents.fill_value,
track_times = False)
else:
hist_group.attrs['format'] = 'dense'
hist_group.create_dataset('contents',
data = self._contents,
track_times = False,
**h5_dataset_kw)
if self._sumw2 is not None:
hist_group.create_dataset('sumw2',
data = self._sumw2._contents,
track_times = False,
**h5_dataset_kw)
return hist_group
@classmethod
def _open(cls, hist_group, **kwargs):
"""
Alternative constructor that reads contents from a group in an
HDF5 file. May be overridden just like a regular constructor.
Args:
hist_group: group in HDF5 file where data is stored
Returns:
New object of type cls (which may be a subclass of Histogram)
"""
axes, contents, sumw2, unit, track_overflow = Histogram._read_from_hdf(hist_group)
new = cls.__new__(cls)
# sanity-checks values and allocates special accessor objects
Histogram.__init__(new,
edges = axes,
contents = contents,
sumw2 = sumw2,
unit = unit,
track_overflow = track_overflow,
copy_contents = False)
return new
@staticmethod
def _read_from_hdf(hist_group):
"""Read contents of a histogram from a data group. Return the raw
contents as a tuple; caller is responsible for creating the
object.
Args:
hist_group: an HD5 group object
Returns:
a tuple (axes, contents, sumw2, unit, track_overflow)
that may be given to the Histogram constructor
"""
unit = None
if 'unit' in hist_group.attrs:
unit = u.Unit(hist_group.attrs['unit'])
# Axes
axes_group = hist_group['axes']
axes = Axes.open(axes_group)
# Contents
# Backwards compatible before sparse was supported
if ('format' not in hist_group.attrs or
hist_group.attrs['format'] == 'dense'):
contents = np.asarray(hist_group['contents'])
sumw2 = None
if 'sumw2' in hist_group:
sumw2 = np.asarray(hist_group['sumw2'])
elif hist_group.attrs['format'] == 'gcxs':
contents_group = hist_group['contents']
compressed_axes = None
if 'compressed_axes' in contents_group:
compressed_axes = np.asarray(contents_group['compressed_axes'])
contents = GCXS((np.asarray(contents_group['data']),
np.asarray(contents_group['indices']),
np.asarray(contents_group['indptr'])),
compressed_axes = compressed_axes,
shape = tuple(contents_group['shape']),
fill_value = np.asarray(contents_group['fill_value']).item())
sumw2 = None
if 'sumw2' in hist_group:
sumw2_group = hist_group['sumw2']
compressed_axes = None
if 'compressed_axes' in sumw2_group:
compressed_axes = np.asarray(sumw2_group['compressed_axes'])
sumw2 = GCXS((np.asarray(sumw2_group['data']),
np.asarray(sumw2_group['indices']),
np.asarray(sumw2_group['indptr'])),
compressed_axes = compressed_axes,
shape = tuple(sumw2_group['shape']),
fill_value = np.asarray(sumw2_group['fill_value']).item())
elif hist_group.attrs['format'] == 'coo':
contents_group = hist_group['contents']
contents = COO(coords = np.asarray(contents_group['coords']),
data = np.asarray(contents_group['data']),
shape = tuple(contents_group['shape']),
fill_value = np.asarray(contents_group['fill_value']).item())
sumw2 = None
if 'sumw2' in hist_group:
sumw2_group = hist_group['sumw2']
sumw2 = COO(coords = np.asarray(sumw2_group['coords']),
data = np.asarray(sumw2_group['data']),
shape = tuple(sumw2_group['shape']),
fill_value = np.asarray(sumw2_group['fill_value']).item())
else:
raise IOError(f"Format {hist_group.attrs['format']} unknown.")
# Deduce track_overflow based on contents shape
track_overflow = [size == a.nbins + 2
for size,a in zip(contents.shape, axes)]
return (axes, contents, sumw2, unit, track_overflow)