Source code for histpy.histogram

import logging

logger = logging.getLogger(__name__)

from enum import Enum

import operator

from copy import deepcopy

import numpy as np

from sparse import DOK, COO, GCXS, SparseArray

# This is an ugly way to get the value
# of AUTO_DENSIFY. There doesn't seem to be
# any public-facing API to get it.
import sparse
from packaging.version import Version

try:
    sparse_version = Version(sparse.__version__)
except TypeError:
    # work when sparse is a mocked package when building sphinx documentation
    sparse_version = Version("0")

if sparse_version >= Version('0.16.0'):
    import sparse.numba_backend._settings as sps
else:
    import sparse._settings as sps

import astropy.units as u

import datetime

from .axes import Axes
from .axis import Axis
from .time_axis import TimeAxis
from .time_delta_axis import TimeDeltaAxis

from astropy.time import Time, TimeDelta

from .feature import COPY_IF_NEEDED

# Invariants that hold for Histogram objects:
#
# At the beginning of each call to a method,
#  if Histogram is sparse, it is in COO format
#
# If a Histogram has sparse contents and has sumw2,
# its sumw2 is also sparse.


[docs]
class Histogram(object):
    """This is a wrapper of a numpy array with axes and a fill
    method. Sparse array from pydata's sparse package are also
    supported.

    Like an array, the histogram can have an arbitrary number of
    dimensions.

    Standard numpy array indexing is supported to get the contents
    --i.e. :code:`h[:]`, :code:`h[4]`, :code:`h[[1,3,4]]`,
    :code:`h[:,5:50:2]]`, etc.--. However, the meaning of the
    :code:`-1` index is different. Instead of counting from the end,
    :code:`-1` corresponds to the underflow bin. Similarly, an index
    equal to the number of bins corresponds to the overflow bin.

    You can however give relative position with respect to
    :code:`h.end` --e.g. :code:`h[0:h.end]` result in all regular
    bins, :code:`h[-1:h.end+1]` includes also the underflow/overflow
    bins and :code:`h[h.end]` gives you the contents of the overflow
    bin. The convenient aliases :code:`h.uf = -1`, :code:`h.of =
    e.end` and :code:`h.all = slice(-1,h.end+1)` --or
    :code:`slice(0,h.end)` if the under/overflow are not tracked-- are
    provided.

    You can also use an :code:`Ellipsis` object (:code:`...`) at the
    end to specify that the contents from the rest of the dimension
    are to have the under and overflow bins included. e.g. for a 3D
    histogram :code:`h[1,-1:h.end+1,-1:h.end+1] =
    h[1,...]`. :code:`h[:]` returns all contents without
    under/overflow bins and h[...] returns everything, including those
    special bins.

    If no initial contents are provided, axes do *not* track
    under/overflow by default.  If contents are provided, an axis
    tracks under/overflow by default if the provided contents has two
    more bins in that dimension than axis.nbins. You can specify that
    certain axes will/will not track under/overflow with the
    :code:`track_overflow` keyword. Note that attempting to access an
    underflow/overflow bin on an axis that is not tracked will result
    in an IndexError.

    If :code:`sumw2` is not :code:`None`, then the histogram will keep
    track of the sum of the weights squared. You should use this
    feature if you are using weighted data and are concerned about
    error propagation. You can access the sum of squared wieghts with
    :code:`h.sumw2[item]`, where `item` is interpreted the same way as
    in :code:`h[item]`.  :code:`h.bin_error[item]` return the
    :code:`sqrt(sumw2)` (or :code:`sqrt(contents)` is :code:`sumw2 was
    not specified`).

    The binary operators :code:`+`, :code:`-`, :code:`*` and :code:`/`
    are supported for Histograms and correctly propagate error if
    sumw2 is present.  The other operand can be a Histogram, a scalar
    or an array of appropiate size.  Note that :code:`h += h0` is more
    efficient than :code:`h = h + h0` since latter involves the
    instantiation of a new histogram. Unary negation of a Histogram is
    supported, as is dividing a scalar by a Histogram.

    Args:
        edges (Axes, Axis or array): Definition of bin edges, as anything
            that can be processes by Axes. Lower edge value is
            included in the bin, upper edge value is excluded.
        contents (array-like or SparseArray): Initialization of
            histogram contents. May include overflow/underflow bins if
            overflow is being tracked; if tracking is enabled and
            contents does not have these bins, they will be
            initialized to zeros. If omitted, creates an array of
            zeros.
        sumw2 (None, bool or array): If not None, the histogram will
            maintain squared weights associated with the elements of
            contents.  These weights are initially zero if sumw2 =
            True but may instead be initialized explicitly with an
            array.  Arithmetic between two histograms with squared
            weights propagates these weights to the result according
            to propagation-of-error theory.
        labels (array of str): Optionally label the axes for easier indexing
        axis_scale (str or array): Bin center mode e.g. `"linear"` or
            `"log"`.  See ``Axis.axis_scale``.
        sparse (bool): indicate if contents and sumw2 should be
            maintained as dense or sparse arrays.  If specified,
            contents and sumw2 will be converted to the specified
            sparsity if needed (but attempting to densify a sparse
            matrix will fail to avoid unexpected memory blowups).  If
            not specified, the Histogram's sparsity follows that of
            the provided contents, or is dense if no contents are
            provided.
        unit (Unit-like): unit of contents; if not specified, inferred
            from contents if u.Quantity or None otherwise
        track_overflow (bool, array-like, or dict): Whether to
            allocate space to track the underflow and overflow
            bins. Acceptable forms include
              - a single boolean value (applies to all axes)
              - a 1-D array-like with a boolean value per axis
              - a dictionary specifying boolean values for a set
                of named/numbered axes.  For axes not in the dict,
                the default is True.
            If this parameter is not provided, the default behavior
            depends on the value of the `contents` argument.
              - if `contents` is not provided, overflow is not tracked
                on any axis.
              - if `contents` is provided, each axis tracks overflow
                if contents includes overflow/underflow bins for that
                axis, i.e., if its size along that axis is two more
                than the axis' number of bins.
        dtype: Numpy datatype or None type of contents array; if None,
            use type of provided contents, or default (float64) if
            none provided.
        copy_contents (bool): if True (default), numpy arrays or
            Quantity arrays passed as contents and sumw2 will *not* be
            copied unless necessary; hence, the Histogram's memory may
            alias these values.

    """
    def __init__(self, edges, contents = None, sumw2 = None,
                 labels = None, axis_scale = None,
                 sparse = None, unit = None, track_overflow = None,
                 dtype = None, copy_contents = True):

        from scipy.sparse import sparray, spmatrix

        self._axes = Axes(edges, labels = labels, axis_scale = axis_scale)

        # set unit of Histogram, if any
        if unit is None:
            if isinstance(contents, u.Quantity):
                self._unit = contents.unit # derive unit from contents
            else:
                self._unit = None
        else:
            self._unit = u.Unit(unit)

        # Standardize contents (with under/overflow) or initialize them to zero.
        # Track operations that might make a copy, so we know if we still need
        # to copy the contents array at the end.  We don't want an unconditional
        # copy because contents might be very large.
        if contents is not None:

            is_copied = False

            # convert Quantity to bare value, after adjusting it to
            # histogram's unit (if any).
            if isinstance(contents, u.Quantity):
                cnew = contents.to_value(self._unit)
                if cnew is not contents.value:
                    is_copied = True
                contents = cnew

            # convert SciPy arrays to COO
            if isinstance(contents, (sparray, spmatrix)):
                is_copied = True
                contents = COO.from_scipy_sparse(contents)

            # make sure contents is a dense or sparse array
            if not isinstance(contents, (np.ndarray, SparseArray)):
                is_copied = True
                contents = np.asarray(contents, dtype=dtype)
                contents = np.atleast_1d(contents)

            if contents.ndim != self._axes.ndim:
                raise ValueError(f"cannot use contents of dimension {contents.ndim} for "
                                 f"Histogram of dimension {self._axes.ndim}")

            if isinstance(contents, (GCXS, DOK)):
                # sparse arrays are initially stored as COO.  Slicing
                # does not work well with GCXS, so we use COO instead;
                # See https://github.com/pydata/sparse/issues/550.
                is_copied = True
                contents = contents.asformat('coo')

            if dtype is not None:
                # make sure contents' dtype matches given dtype
                is_copied |= (contents.dtype != dtype)
                contents = contents.astype(dtype, copy=False)

            if sparse is not None:
                if sparse and not isinstance(contents, SparseArray):
                    is_copied = True
                    contents = COO.from_numpy(contents)
                elif not sparse and isinstance(contents, SparseArray):
                    if not sps.AUTO_DENSIFY:
                        raise RuntimeError("Cannot convert sparse contents to dense automatically. "
                                           "To manually densify, use the todense method.")

                    is_copied = True
                    contents = contents.todense()

            if track_overflow is None:
                # by default, overflow tracking follows contents shape
                track_overflow = (contents.shape == self._axes.nbins + 2)

            # convert track_overflow argument to standard format
            self._track_overflow = self._standardize_track_overflow(track_overflow)

            # determine if contents must be padded to match overflow tracking settings
            padding = self._get_padding_for_overflow(self._track_overflow, self._axes.nbins, contents.shape)
            self._contents = self._pad(contents, padding)
            if self._contents is not contents:
                is_copied = True

            if copy_contents and not is_copied:
                self._contents = self._contents.copy()
        else:

            if track_overflow is None:
                # by default, overflow tracking is *off* for empty contents
                track_overflow = False

            # convert track_overflow argument to standard format
            self._track_overflow = self._standardize_track_overflow(track_overflow)

            # initialize contents to array of all zeros
            contents_shape = tuple( self._axes.nbins + 2 * self._track_overflow )

            if sparse:
                self._contents = DOK(shape = contents_shape,
                                     dtype = dtype,
                                     fill_value = 0).asformat('coo')
            else:
                self._contents = np.zeros(contents_shape, dtype=dtype)

        self._init_specials()

        # create sumw2 histogram if requested
        self.set_sumw2(sumw2, copy = copy_contents)

    def _init_specials(self):
        """
        Create objects for special access methods on Histogram's contents
        """
        self.bin_error = self._get_bin_error(self)
        self.slice = self._slice(self)


[docs]
    def set_sumw2(self, sumw2, copy = True):
        """
        Set the sumw2 matrix to a Histogram to track the sum of error
        weights. If not None/False, sumw2 must be either an array-like
        with the same shape as contents or a Histogram with the same axes
        as contents. It will be coerced to have the same units,
        sparsity, dtype, and overflow tracking as the base Histogram.

        Args:
           sumw2
             values for weights: True for all zeros, or a Histogram, or an
             array-like; None or False to remove any existing sumw2
           copy: bool
             if True (default), copy the object passed as sumw2; otherwise,
             create a view into the object if possible.

        """

        if sumw2 is None or sumw2 is False:
            self._sumw2 = None
        else:
            # derive unit of sumw2 from that of main histogram
            if self._unit is not None:
                w2unit = self._unit**2
            else:
                w2unit = None

            if isinstance(sumw2, Histogram):
                if sumw2._axes != self._axes:
                    raise ValueError("Histogram sumw2 must have same axes as contents")

                is_copied = False

                # match dtype of sumw2 to that of contents
                is_copied |= (sumw2._contents.dtype != self._contents.dtype)
                sumw2 = sumw2.astype(self._contents.dtype, copy=False)

                # match sparsity of sumw2 to that of contents
                if not sumw2.is_sparse and self.is_sparse:
                    is_copied = True
                    sumw2 = sumw2.to_sparse()
                elif sumw2.is_sparse and not self.is_sparse:
                    if not sps.AUTO_DENSIFY:
                        raise RuntimeError("Cannot convert a sparse histogram to dense automatically. "
                                           "To manually densify, use the todense method.")

                    is_copied = True
                    sumw2 = sumw2.to_dense()

                # match overflow tracking of sumw2 to that of contents
                if not np.array_equal(sumw2._track_overflow, self._track_overflow):
                    is_copied = True
                    sumw2.track_overflow(self._track_overflow)

                # adjust unit of sumw2 and copy it if needed
                self._sumw2 = sumw2.to(w2unit, copy = copy and not is_copied)

            else:
                sumw2_contents = None if sumw2 is True else sumw2

                # for zero init, bare value, or Quantity, let __init__ do coercion of contents
                self._sumw2 = Histogram(self._axes,
                                        contents = sumw2_contents,
                                        dtype = self._contents.dtype,
                                        sparse = self.is_sparse,
                                        unit = w2unit,
                                        track_overflow = self._track_overflow,
                                        copy_contents = copy)



[docs]
    def copy(self):
        """
        Make a deep copy of a Histogram.  The copy shares no
        *writable* members with the original; the only shared
        members are those that will never be mutated.

        This function preserves subclass types if called from a
        derived class.  Subclasses with additional data members
        may override this function; if they do not, their
        data members will be deepcopied.
        """

        # short-circuit if we are in the middle of _replace
        if hasattr(self, '_new_instance'):
            return self._new_instance
        else:
            return self._replace() # no replacements


    def __deepcopy__(self, memo):
        """
        Hook for deepcopy()
        """

        self._memo = memo # cache memo dict in case we need it
        new = self._replace() # no replacements
        del self._memo

        return new

    # all recognized data members of Histogram.
    # CHANGE THIS and the _replace() function if
    # we add or remove data members.
    _data_members = frozenset([
        '_axes',
        '_contents',
        '_sumw2',
        '_unit',
        '_track_overflow',
    ])

    def _replace(self, **kwargs):
        """
        Make a deep copy of a Histogram as defined for copy(), but
        optionally specify one or more key-value pairs naming a data
        member of Histogram and a value to use for it in the new
        object instead of performing the default copy operation.

        """
        cls = self.__class__
        new = cls.__new__(cls)

        # set any members specified by kwargs
        for member in kwargs:

            assert member in Histogram._data_members, \
                f"Histogram has no data member {member}"

            setattr(new, member, kwargs[member])

        if not hasattr(new, '_axes'):
            new._axes = self._axes.copy()

        if not hasattr(new, '_contents'):
            new._contents = self._contents.copy()

        if not hasattr(new, '_unit'):
            new._unit = self._unit  # no need to copy unit

        if not hasattr(new, '_track_overflow'):
            new._track_overflow = self._track_overflow  # no need to copy track_overflow

        new._init_specials()

        if not hasattr(new, '_sumw2'):
            if self._sumw2 is not None:
                new._sumw2 = self._sumw2.copy()
            else:
                new._sumw2 = None

        if cls != Histogram:
            if cls.copy == Histogram.copy: # copy not overridden
                self_dict = vars(self)
                new_dict = vars(new)

                # if we were called from __deepcopy__(), pass along
                # the supplied memo object to recursive deepcopy calls.
                kwargs = {}
                if hasattr(self, '_memo'):
                    kwargs['memo'] = self._memo

                # don't copy the temporary _memo field recursively
                for member in self_dict.keys() - new_dict.keys() - { '_memo' }:
                    setattr(new, member, deepcopy(self_dict[member], **kwargs))

            else:
                self._new_instance = new # copy() will return this
                self.copy()
                del self._new_instance

        return new


[docs]
    def astype(self, dtype, copy=True):
        """
        Cast the contents and, if present, the sumw2 of a Histogram to
        a different data type.  If the new type differs from the old
        type, we always return a copy; otherwise, we return a copy
        if copy=True or the original if copy=False.
        """

        if self._contents.dtype == dtype and not copy:
            return self
        else:

            new_contents = self._contents.astype(dtype) # makes a copy

            if self._sumw2 is not None:
                new_sumw2 = self._sumw2.astype(dtype) # makes a copy
            else:
                new_sumw2 = None

            return self._replace(_contents=new_contents,
                                 _sumw2=new_sumw2)


    @staticmethod
    def _compute_padding(needs_padding):
        """
        Given a Boolean array indicating whether overflow padding is
        needed in each dimension of an array, compute an argument to
        np.pad that adds padding where needed.

        Args:
          needs_padding: np.ndarray of bool [axes.ndims]
          ith entry is True iff ith dimension needs padding
        Returns:
          np.ndarray *view* of size |needs_padding| x 2 with
          padding values for each dimension of the array.

        NB: return value is not writable!
        """
        padlen = needs_padding.astype(int) # 1 if needed, 0 if not
        return np.broadcast_to(padlen[:,None], (needs_padding.size, 2)) # ints -> pairs

    @staticmethod
    def _get_padding_for_overflow(track_overflow, hshape, oshape,
                                  mask=None, check_sanity=True,
                                  first_dim = 0):
        """
        Compute the padding necessary to convert an array of shape oshape to
        match the contents shape of a Histogram with shape hshape and
        specified track_overflow settings.

        Args:
         track_overflow: np.ndarray of bool
            Should overflow be tracked for each dimension of this Histogram?
         hshape: array-like of int
            Shape of Histogram, not including overflow/underflow bins
        oshape: array-like of int
            Shape of array being padded to match Histogram
        mask: np.ndarray (optional)
            If not None, mask of length == hshape indicating which
            dimensions should be checked for padding. Unchecked dimensions
            always receive zero padding.
        check_sanity: bool
            If True, make sure each dimension of oshape either conforms to
            hshape given its tracking requirement or can be padded to conform.
            Raise an exception if this test fails.
        first_dim: int
            The dimension of the Histogram corresponding to the first entry
            in track_overflow (used only for error printing)
        Returns:
          np.ndarray *view* of size |needs_padding| x 2 with
          padding values for each dimension of the array.

        NB: return value is not writable!
        """

        hshape = np.asarray(hshape) # make sure at least one of oshape, hshape is array

        if check_sanity:
            # First, check that oshape can be transformed to hshape via padding
            invalid_padding   = track_overflow & ~np.isin(oshape - hshape, (0,2))
            invalid_nopadding = ~track_overflow & ~(oshape == hshape)

            if mask is not None:
                invalid_padding   &= mask
                invalid_nopadding &= mask

            if np.any(invalid_padding):
                bad_axes = np.nonzero(invalid_padding)[0]
                first_bad = bad_axes[0]
                hlen = hshape[first_bad]
                olen = oshape[first_bad]
                raise ValueError(f"Array axis {first_dim + first_bad} has size {olen}; "
                                 f"must be {hlen} or {hlen+2} to use with track_overflow = True")

            if np.any(invalid_nopadding):
                bad_axes = np.nonzero(invalid_nopadding)[0]
                first_bad = bad_axes[0]
                hlen = hshape[first_bad]
                olen = oshape[first_bad]
                raise ValueError(f"Array axis {first_dim + first_bad} has size {olen}; "
                                 f"must be {hlen} to use with track_overflow = False")

        # Next, identify which dimensions need to be padded
        needs_padding = track_overflow & (oshape == hshape)
        if mask is not None:
            needs_padding &= mask

        # Finally, return the padding argument to np.pad
        return Histogram._compute_padding(needs_padding)

    @staticmethod
    def _pad(arr, padding, value=0):
        """
        Pad array arr as needed with value.

        Args:
          arr     -- np.ndarray
          padding -- array-like of size [arr.ndim x 2]
                     giving padding values for each dim of arr
          value   -- value to use for padding cells (default 0)

        Returns:
          padded array, which may be the original (if no padding is
          needed) or a copy
        """

        padding = np.asarray(padding)

        if not padding.any():
            return arr
        else:
            return np.pad(arr, padding, constant_values=value)

    def _standardize_track_overflow(self, track_overflow):
        """
        Convert any valid form of track_overflow to an array of
        Boolean values per axis.

        Args:
          track_overflow (bool, array-like, or dict): Whether to
          allocate space to track the underflow and overflow
          bins. Acceptable forms include
            - a single boolean value (applies to all axes)
            - a 1-D array-like with a boolean value per axis
            - a dictionary specifying boolean values for a set
              of named/numbered axes.  For axes not in the dict,
              the default is False.

        """
        if isinstance(track_overflow, (bool, np.bool_)):
            track_overflow = np.full(self.ndim, track_overflow)
        elif isinstance(track_overflow, (np.ndarray, list, tuple)):
            track_overflow = np.atleast_1d(track_overflow)
            if track_overflow.size != self.ndim:
                raise ValueError("track_overflow size doesn't match the number of dimensions")
        elif isinstance(track_overflow, dict):
            track_overflow = np.asarray(self._axes.expand_dict(track_overflow, default = False))
        else:
            raise TypeError("Track overflow can only be bool, array, or dictionary.")

        return track_overflow

    def _match_track_overflow(self, old_track_overflow, new_track_overflow):
        """
        Compute slices and padding needed to go from old to new overflow settings.

        Args:
          old_track_overflow -- np.ndarray of bool
                                old track_overflow settings
          new_track_overflow -- np.ndarray of bool
                                new track_overflow settings

        Returns:
          tuple (slices, padding)
          slices -- tuple of slices to reduce each dimension from
                    which overflow is removed
          padding --  np.ndarray *view* with padding values for
                     each dimension

        NB: padding is not writable!
        """

        needs_padding = ~old_track_overflow & new_track_overflow
        padding = self._compute_padding(needs_padding)

        needs_slicing = old_track_overflow & ~new_track_overflow
        slices = tuple( slice(1,-1) if b else slice(None) for b in needs_slicing )

        return slices, padding

    def _update_track_overflow(self, new_track_overflow):
        """
        Update the contents and sumw2 of this Histogram to reflect
        a specified set of overflow tracking settings.

        Args:
          new_track_overflow -- bool, array-like, or dict
             specifying track-overflow settings per dimension
        """

        new_track_overflow = self._standardize_track_overflow(new_track_overflow)

        slices, padding = self._match_track_overflow(self._track_overflow,
                                                     new_track_overflow)

        self._track_overflow = new_track_overflow

        self._contents = self._pad(self._contents[slices], padding)

        # if we are possibly no longer padding a dense array that was
        # padded, make sure array is contiguous in memory after any slicing
        if not self.is_sparse and not padding.any():
            self._contents = np.ascontiguousarray(self._contents)

        if self._sumw2 is not None:
            self._sumw2._update_track_overflow(new_track_overflow)


[docs]
    def track_overflow(self, track_overflow = None):
        """
        Obtain an array specifying whether each axis is tracking underflows and overflows.
        If input is not None, adjust the track_overflow settings to those provided.

        Args:
            track_overflow (bool, array-like, or dict): Optional. New overflow tracking settings

        Returns:
            np.ndarray with *copy* of current overflow tracking settings

        We return a copy to external callers because it is unsafe to modify a live
        track_overflow array in place; any updates *must* be fed back to track_overflow
        (internally, to _update_track_overflow) to take effect.
        """

        if track_overflow is not None:
            self._update_track_overflow(track_overflow)

        # return a copy to avoid letting caller mutate the
        # track_overflow array, which will not work.
        return self._track_overflow.copy()


    @property
    def unit(self):
        return self._unit


[docs]
    def to(self, unit, equivalencies=[], update=True, copy=True):
        """
        Convert a Histogram to a different unit.

        Args:
            unit (unit-like): Unit to convert to.
            equivalencies (list or tuple): A list of equivalence pairs to try if the units are not
                directly convertible.
            update (bool): If ``update`` is ``False``, only the units will be changed without
                updating the contents accordingly
            copy (bool): If True (default), then the value is copied. Otherwise, a copy
                will only be made if necessary.
        """

        old_unit = self._unit
        new_unit = None if unit is None else u.Unit(unit)

        if copy:
            new = self.copy()
        else:
            new = self

        if update:
            # Apply factor needed to convert to new unit to contents and sumw2
            if old_unit is None:
                if new_unit is not None and new_unit != u.dimensionless_unscaled:
                    raise TypeError("Assigning unit to Histogram without units")
            elif new_unit is not None:
                factor = old_unit.to(new_unit, equivalencies = equivalencies)
                if factor != 1.0:
                    new *= factor

        # Update units
        new._unit = new_unit

        if new._sumw2 is not None:
            new._sumw2._unit = None if new_unit is None else new_unit**2

        return new


    def _with_units(self, value):
        """
        Add unit to value if histogram has one and value is dense,
        creating a Quantity array.

        Ideally, we would add a unit whether value is dense or sparse.
        Unfortunately, SparseArray is not compatible with Quantity, so
        if we want to return a sparse array value, we cannot set the
        unit.  We do not want to pay to densify a potentially large
        matrix, so we just return the raw value.

        """

        if self._unit is not None and not isinstance(value, SparseArray):
            # prevent copy when adding unit
            return u.Quantity(value, unit=self._unit, copy=COPY_IF_NEEDED)
        else:
            return value

    @property
    def is_sparse(self):
        """
        Return True if the underlying histogram contents array is sparse,
        or False if dense.

        """
        return isinstance(self._contents, SparseArray)


[docs]
    def to_dense(self, copy=True):
        """
        Return a dense copy of a histogram

        copy : if true, always copy whether conversion is needed or
               not; otherwise, copy only if converting

        """

        if not self.is_sparse:
            h_dense = self.copy() if copy else self
        else:
            new_contents = self._contents.todense()

            if self._sumw2 is not None:
                new_sumw2 = self._sumw2.to_dense(copy=True)
            else:
                new_sumw2 = None

            h_dense = self._replace(_contents = new_contents,
                                    _sumw2    = new_sumw2)

        return h_dense


    # alias pydata sparse style
    todense = to_dense


[docs]
    def to_sparse(self, copy=True):
        """
        Return a sparse copy of a histogram.

        copy : if true, always copy whether conversion is needed or
               not; otherwise, copy only if converting

        """

        if self.is_sparse:
            h_sparse = self.copy() if copy else self
        else:
            new_contents = COO.from_numpy(self._contents)

            if self._sumw2 is not None:
                new_sumw2 = self._sumw2.to_sparse(copy=True)
            else:
                new_sumw2 = None

            h_sparse = self._replace(_contents = new_contents,
                                     _sumw2    = new_sumw2)

        return h_sparse


    # pydata sparse style
    tosparse = to_sparse

    @property
    def sumw2(self):
        return self._sumw2

    @property
    def ndim(self):
        return self._axes.ndim

    def __eq__(self, other):
        """
        Check whether two Histograms are equal.  Sparse
        and dense histograms with the same axes, unit, and
        contents/sumw2, including under/overflow bins if
        present, are considered equal.
        """

        return (self._axes == other._axes
                and
                np.array_equal(self._track_overflow, other._track_overflow)
                and
                self._unit == other._unit
                and
                np.all(self._contents == other._contents) # sparse does not support array_equal
                and
                self._sumw2 == other._sumw2)

    def __array__(self, dtype=None, copy=None):
        """
        Return a view or copy of our contents as a dense ndarray.
        If histogram has units, return a Quantity array to preserve
        them.

        """

        if self.is_sparse:

            if not sps.AUTO_DENSIFY:
                raise RuntimeError("Cannot convert a sparse histogram to dense automatically. "
                                    "To manually densify, use the todense method.")

            arr = self._get_contents().todense()
        else:
            arr = self._get_contents()

        if dtype is not None and arr.dtype != dtype:
            arr = arr.astype(dtype) # makes a copy
        elif copy and not self.is_sparse: # todense copies
            arr = arr.copy()

        return self._with_units(arr)

    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):

        if len(inputs) != 1 or inputs[0] is not self:
            raise NotImplementedError(
                "Only numpy functions with a single operand are supported "
                "e.g. np.sin(h). Call h.contents explicitly to "
                "get the corresponding array e.g. np.maximum(h.contents, a)")

        # units are *not* dropped for dense contents (but are for sparse)
        arr = self._with_units(self._get_contents())

        return ufunc(arr, *inputs[1:], **kwargs)

    def __array_function__(self, func, types, args, kwargs):

        if len(args) != 1 or args[0] is not self:
            raise NotImplementedError(
                "Only numpy functions with a single operand are supported "
                "e.g. np.sum(h). Call h.contents explicitly to "
                "get the corresponding array e.g. np.dot(h.contents, a)")

        # units are *not* dropped for dense contents (but are for sparse)
        arr = self._with_units(self._get_contents())

        return func(arr, *args[1:], **kwargs)

    class _NBINS():
        '''
        Convenience class that will expand to the number of bins of a
        given dimension.

        The trick is to overload the -/+ operators so that h.end +/-
        offset (h being an instance of Histogram and 'end' an static
        instance of Histogram._NBINS) returns an instance of _NBINS
        itself, which stores the offset. The [] operator can then
        detect that the input is an instance of _NBINS and convert it
        into an integer with respect to the size of the appropriate
        axis.

        '''

        def __init__(self, offset = 0):
            self.offset = offset

        def __add__(self, offset):
            return self.__class__(offset)

        def __sub__(self, offset):
            return self + (-offset)

    end = _NBINS()

    # Convenient aliases
    uf = -1
    of = end

    class _ALL():
        """
        Expands to slice(-1, end+1) if the axis tracks under/overflow,
        or slice(0, end) otherwise
        """

    all = _ALL()

    def _prepare_indices(self, indices):
        """
        Adjust a multi-dimensional index expression into the
        contents of a Histogram to properly index ._contents.
        """

        def _prepare_index(index, dim):
            """
            Adjust an index expressed as a scalar value (integer or
            'end' expression), a slice, or a multi-D array of scalar
            indices to account for overflow padding.  As a special
            case, handle an index that is Boolean mask, which may
            index one or more dimensions of the Histogram starting
            with dim.
            """

            overflow_offset = int(self._track_overflow[dim])

            def _adjust_scalar_index(index):
                """
                adjust a scalar index expression and make sure it
                is valid for the Histogram's contents
                """
                if isinstance(index, self._NBINS): # expand 'end' expression
                    index = self._axes[dim].nbins + index.offset

                index += overflow_offset

                if index < 0 or index > self._contents.shape[dim]:
                    raise IndexError("Bin index out of bounds")

                return index

            if isinstance(index, slice):
                start = 0 if index.start is None else index.start
                stop  = self._axes[dim].nbins if index.stop is None else index.stop

                index = slice(_adjust_scalar_index(start),
                              _adjust_scalar_index(stop),
                              index.step)

            elif isinstance(index, self._ALL):
                # convert 'all' to slice for whole of dim, including oflow/uflow
                # (no need to check bounds)
                index = slice(0, self._axes[dim].nbins + 2*overflow_offset)

            elif isinstance(index, np.ndarray):

                if index.dtype in (bool, np.bool_):
                    # Boolean mask array -- pad to match overflow
                    # settings for its dimension range
                    end_dim = dim + index.ndim
                    padding = self._get_padding_for_overflow(self._track_overflow[dim:end_dim],
                                                             self._axes.nbins[dim:end_dim],
                                                             index.shape, first_dim = dim)
                    index = self._pad(index, padding, value=False)

                else:
                    # (possibly multi-D) array of scalar indices (ints or _NBINS)
                    adjindex = [_adjust_scalar_index(i) for i in index.ravel()]
                    index = np.reshape(adjindex, index.shape)

            else: # scalar index (could be _NBINS)
                index = _adjust_scalar_index(index)

            return index

        def normalize(index):
            """
            If index is a slice or a scalar value, return it as-is.
            If it is array-like, convert to a proper ndarray.
            If none of the above, fail.

            For each index we return, we also return the number of dimension
            it refers to in the array (usually 1, but Boolean masks can refer
            to several consecutive dimensions).
            """
            if isinstance(index, (slice, int, np.integer, self._NBINS, self._ALL)):
                return (index, 1)
            elif isinstance(index, (np.ndarray, list, tuple, range)):
                arr = np.asarray(index)
                if arr.ndim > 0:
                    return (arr, arr.ndim if arr.dtype in (bool, np.bool_) else 1)
                else:
                    return (arr.item(), 1)
            elif index is Ellipsis:
                raise IndexError("Ellipsis is supported only at end of index tuple")
            else:
                raise TypeError("Index can only be slice, scalar, or array-like")

        if isinstance(indices, dict):
            # expand dict of per-axis indices into tuple
            indices = self._axes.expand_dict(indices, default = slice(None))
        elif not isinstance(indices, tuple):
            indices = (indices,)

        # strip Ellipsis from end of index tuple; it is not supported elsewhere
        has_ellipsis = (indices[-1] is Ellipsis)
        if has_ellipsis:
            indices = indices[:-1]

        indices = tuple( normalize(i) for i in indices )
        has_multi_dim = any( i[1] > 1 for i in indices ) # does any index cover multiple dims?

        if not has_multi_dim:
            # easy case -- each index describes exactly one dimension
            adjusted_indices = tuple(_prepare_index(index, dim) for
                                     dim, (index, _) in enumerate(indices))
            nSpecifiedDims = len(indices)
        else:
            # hard case -- some mask index describes > 1 dimension
            adjusted_indices = []
            dim = 0

            for index, ndim in indices:
                adjusted_indices.append(_prepare_index(index, dim))
                dim += ndim

            adjusted_indices = tuple(adjusted_indices)
            nSpecifiedDims = dim

        track = self._track_overflow
        if has_ellipsis:
            # unspecified dimensions at end refer to all of .full_contents
            rest_indices = tuple(slice(0, self._axes[dim].nbins + 2*int(track[dim]))
                                 for dim in range(nSpecifiedDims, self.ndim))
        else:
            # unspecified dimensions at end refer to all of .contents
            rest_indices = tuple(slice(int(track[dim]), self._axes[dim].nbins + int(track[dim]))
                                 for dim in range(nSpecifiedDims, self.ndim))

        return adjusted_indices + rest_indices

    """
    Access methods

    The following code defines two kinds of access methods: *internal*
    methods, which should be used by the methods of the Histogram
    class, and *external* methods, which should be used by users of
    the class and by subclasses.

    Both internal and external accessors understand the special
    indexing rules associated with overflow tracking, as well as the
    special values 'all' and 'end'.  They also manipulate the format
    of a sparse contents array to ensure that writes to it will
    succeed.

    External accessors also understand unit conformity and will return
    a Quantity for dense histograms with associated units, whereas
    internal accessors always return a bare value.

    CAUTION: external users should *never* directly read or write a
    Histogram's _contents array; doing so may yield unexpected results
    depending on whether the Histogram is using overflow tracking and
    may fail depending on its sparsity.  Always use the external
    accessors __getitem__ and __setitem__ to read or write subarrays
    of a Histogram; they can be called by directly indexing the
    histogram object, e.g., x = h[1:3] or h[3:5, 1:2] = 0.  Use
    .contents to obtain a view of the histogram's contents without
    overflow/underflow bins, or .full_contents to obtain a view with
    these extra bins.

    Internal users should use the _get() and _set() methods to read
    and write subarrays of the Histogram's contents if indices are to
    be interpreted as for __getitem__ / __setitem__.  These methods
    work with bare values; *do not* use the external accessors (e.g.,
    self[1:3]) unless the intent is to obtain/provide a Quantity
    whenever the Histogram has units.  Use _get_contents() to
    obtain the bare equivalent of .contents, or use ._contents
    directly for the bare equivalent of .full_contents.

    Internal users that directly access ._contents instead of using
    _get() and _set() are responsible for handling the impacts of
    overflow tracking and for ensuring that sparse arrays are in the
    correct form (DOK for writing, COO for everything else) if needed.
    """

    def _set(self, indices, new_contents):
        """
        Update a slice of a Histogram's contents.  If the Histogram
        is sparse, it is converted to DOK form to enable partial
        updates in place.
        """

        # Make it mutable if needed
        if self.is_sparse:
            self._contents = self._contents.asformat('dok')

        indices = self._prepare_indices(indices)

        try:
            self._contents[indices] = new_contents
        except (IndexError, ValueError) as e:

            if self.is_sparse:
                # Check if advanced indexing is being used
                for index in indices:
                    if isinstance(index, np.ndarray):
                        logger.warning(
                            "Advanced indexing is not yet fully supported "
                            "for sparse arrays. See "
                            "https://github.com/pydata/sparse/issues/1 and "
                            "https://github.com/pydata/sparse/issues/114")

                        break
            raise e

        # Make it fast read-only
        if self.is_sparse:
            self._contents = self._contents.asformat('coo')

    def __setitem__(self, indices, new_contents):
        """
        Update a slice of a Histogram's contents.  The
        value used to update the contents must be unit-conformable
        to the Histogram's units, if present.
        """
        self._set(indices, self._strip_units(new_contents))

    def _get(self, indices):
        """
        Get a slice of a Histogram's contents as an array.
        Returns a *view* into original contents.  If the
        histogram is sparse, it is converted to COO form.
        """
        indices = self._prepare_indices(indices)

        try:
            return self._contents[indices]
        except IndexError as e:

            if self.is_sparse:
                # Check if advanced indexing is being used
                for index in indices:
                    if isinstance(index, np.ndarray):
                        logger.warning(
                            "Advanced indexing is not yet fully supported "
                            "for sparse arrays. See "
                            "https://github.com/pydata/sparse/issues/1 and "
                            "https://github.com/pydata/sparse/issues/114")

                        break
            raise e

    def _get_contents(self):
        return self._get(slice(None))

    def __getitem__(self, indices):
        """
        Get a slice of a Histogram's contents as an array.
        Returns a *view* into original contents.  If the
        Histogram has a unit *and* the contents array is dense,
        return the slice as a Quantity.
        """

        value = self._get(indices)
        return self._with_units(value)

    @property
    def contents(self):
        """
        Equivalent to :code:`h[:]`. Does not include under and overflow bins.
        """
        return self[:]

    @property
    def full_contents(self):
        """
        Equivalent to :code:`h[...]`. The size of each axis can be
        nbins or nbins+2, depending on the track_overflow parameters
        """
        return self[...]

    class _special_getitem:
        """
        This allows to use regular indexing for special access methods,
        e.g., h.bin_error[]
        """

        def __init__(self, hist):
            self._hist = hist

        def __getitem__(self, item):
            raise NotImplementedError

        def __array__(self):
            return self.contents

        @property
        def contents(self):
            return self[:]

        @property
        def full_contents(self):
            return self[...]

    class _get_bin_error(_special_getitem):
        """
        Return the sqrt of sumw2
        """

        def __getitem__(self, item):
            if self._hist._sumw2 is not None:
                result = np.sqrt(self._hist._sumw2._get(item))
            else:
                # fake it if no sumw2
                result = np.sqrt(np.abs(self._hist._get(item)))

            return self._hist._with_units(result)

        def __setitem__(self, indices, new_bin_error):

            if self._hist._sumw2 is None:
                raise ValueError("Histogram does not have sumw2")

            self._hist._sumw2[indices] = new_bin_error**2

    @property
    def shape(self):
        """
        Tuple with length of each axis
        """
        return self._axes.shape

    @property
    def axes(self):
        """
        Underlying axes object
        """

        return self._axes

    @property
    def axis(self):
        """
        Equivalent to :code:`self.axes[0]`, but fails if :code:`ndim > 1`
        """

        if self.ndim > 1:
            raise ValueError("Property 'axis' can only be used with 1D "
                            "histograms. Use `axes` for multidimensional "
                            "histograms")

        return self._axes[0]

    @axis.setter
    def axis(self, new):

        self._axes[0] = new

        if self._sumw2 is not None:
            self._sumw2.axis = new


[docs]
    def expand_dims(self, *args, **kwargs):
        """
        Same as h.axes.expand_dims().
        """
        return self._axes.expand_dims(*args, **kwargs)



[docs]
    def broadcast(self, *args, **kwargs):
        """
        Same as h.axes.broadcast().
        """
        return self._axes.broadcast(*args, **kwargs)



[docs]
    def expand_dict(self, *args, **kwargs):
        """
        Same as h.axes.expand_dict().
        """
        return self._axes.expand_dict(*args, **kwargs)


    @property
    def dtype(self):
        return self._contents.dtype

    @property
    def nbins(self):

        if self.ndim == 1:
            return self._axes[0].nbins
        else:
            return self._axes.nbins


[docs]
    def interp(self, *values, kind="linear"):
        """
        Perform multilinear interpolation of one or more values
        relative to the contents of this Histogram. The center of
        histogram bin (i1,...,in) is assumed to have the value
        h[i1,...,in] for purposes of interpolation.

        The interpolation weights along each axis are controlled by
        its scale. If the scale is 'linear' or 'symmetric',
        interpolation is linear; if it is 'log', the interpolation is
        log-linear.  Hence, interpolating a value halfway between two
        bin centers along a log-scale axis returns the *geometric*
        mean of the centers.

        Interpolation may be done using either the values in the
        Histogram (default) or their logs using the 'kind'
        parameter. If kind='log' is requested, the histogram's
        contents should all be > 0 to avoid warnings or errors.

        Args:
            values (scalar or array-like): value(s) to interpolate
               If single value, may be ndim coordinates
                 as separate arguments or a single array-like
               if multiple values, may be ndim array-likes
                 of coordinates as separate arguments or
                 a single array-like containing same
            kind (string): 'linear' (default) if multilinear interpolation
               is to be done using this Histogram's contents, or
               'log' if it is to be done on the logs of the contents
               and the results converted back to the linear domain.
        Return:
            interpolated values (scalar or array of same shape as values)

        """

        bins, weights = self._axes.interp_weights(*values)

        # If contents is sparse, we cannot index it with numpy
        # arrays of dimension > 1 due to limitations of the Sparse
        # package (even though this is perfectly fine for dense
        # arrays). In this case, ravel the bins and weights
        # before interpolating and restore the result shape to
        # match the bins shape (which == the values shape) after.

        if self.is_sparse and bins[0].ndim > 2:
            vshape = bins[0].shape[1:]
            bins    = tuple( b.reshape(b.shape[0], -1) for b in bins )
            weights = tuple( w.reshape(w.shape[0], -1) for w in weights )
        else:
            vshape = None

        isLog = (kind == "log") # do log, not linear, combining

        contents = self._get_contents()

        if isLog: # convert to log domain before interpolating
            contents = np.log(contents)

        interp_values = self._interp_multilinear(contents, bins, weights)

        if isLog: # convert result back out of log domain
            interp_values = np.exp(interp_values)

        if vshape is not None:
            interp_values = interp_values.reshape(vshape)

        return self._with_units(interp_values)


    @staticmethod
    def _interp_multilinear(contents, bins, weights):
        """
        Perform multilinear interpolation with respect to matrix 'contents',
        given arrays of bins and weights for a set of points.

        Args:
           contents: array of dimension ndim
           bins: tuple of ndim arrays; ith array contains one or more
                integers per input point specifying which bins of contents
                contain the values to be interpolated in ith dimension.
           weights: tuple of ndim arrays; ith array contains
                one or more floats per input point specifying the weights
                for values to be interpolated in ith dimension.

        For all dimensions i and all bins j, both bins[i][j] and
        weights[i][j] must have the same (arbitrary) shape if contents
        is dense, or must be a flat 1D array if it is sparse.  The
        number of bins may be different for each dimension but must
        match the corresponding number of weights.

        Returns:
            array of interpolated values of same shape as each
            elt of bins / weights

        """

        is_sparse = isinstance(contents, SparseArray)

        def densify(v):
            if is_sparse and not np.isscalar(v):
                return v.todense()
            else:
                return v

        def interp_r(idim, prev_bins):
            """
            Recursive multilinear interpolation
            Allows interpolation in a given dimension to use a
            linear combination of two or more bins; standard linear
            interpolation uses two bins with weights w and 1 - w.

            """
            w  = weights[idim]

            # extend each bin in current dim with all previous bins
            dimbins = ((b,) + prev_bins for b in bins[idim])

            # interpolate remaining dimensions to get result for each
            # bin in bins[idim], and combine results into one array
            c = np.stack(tuple(
                ((densify(contents[b]) # base case
                  if idim == 0
                  else interp_r(idim - 1, b))
                 for b in dimbins)
            ))

            # weighted sum gives one interpolated value per input point
            return np.sum(c * w, axis=0)

        return interp_r(len(bins) - 1, ())

    def find_bin(self, *args, **kwargs):
        return self._axes.find_bin(*args, **kwargs)

    def _strip_units(self, quantity):
        """
        Remove the unit from a quantity (if it exists) and return
        its value in the units of the Histogram, so that we may combine
        it with the Histogram's contents.

        We FAIL if:
          * we try to combine a non-dimensionless Quantity with
            a Histogram that has no units
          * we try to combine a scalar with a Histogram that has
            units
        """

        # convert bare unit to trivial Quantity
        if isinstance(quantity, u.UnitBase):
            quantity = 1. * quantity

        if isinstance(quantity, u.Quantity):

            if quantity.unit == u.dimensionless_unscaled:
                return quantity.value

            if self._unit is None:
                raise u.UnitConversionError("Cannot apply Quantity to Histogram without units")

            return quantity.to_value(self._unit)

        else:

            if not (self._unit is None or self._unit == u.dimensionless_unscaled):
                raise u.UnitConversionError("Cannot apply scalar to Histogram with units")

            return quantity


[docs]
    def fill(self, *values, weight = None, warn_overflow = True):
        '''
        Add an entry to the histogram. Can be weighted.

        Follow same convention as find_bin()

        Args:
            values (float or array): Value of entry
            weight (float): Value weight in histogram. Defaults to 1
                in whatever units the histogram has
            warn_overflow (bool): Enable/disable warnings when an
            underflow or overflow occurs --i.e. when one or more of
            the input values falls beyond the range of the
            corresponding axis.

        Note:
            Note that weight needs to be specified explicitly by key;
            otherwise it will be considered a value, and an IndexError
            will be thrown.

        '''

        # Remove units from weight, converting if needed
        if weight is None:
            weight = 1
        else:
            weight = self._strip_units(weight)

        indices = self.find_bin(*values)

        # Standardize if single axis
        if not isinstance(indices, tuple):
            indices = (indices,)

        # Each index must have same size for stacking, so make sure
        # the sizes match by broadcasting all to largest size
        indices = np.broadcast_arrays(*indices)

        # Build 2D array [ndims x npoints] of indices for input values.
        # Must reshape because indices in each dimension may not be
        # a 1-D linear array.
        indices = np.stack(indices, axis=0).reshape(len(indices),-1)
        weight = np.broadcast_to(np.asarray(weight), indices.shape[1])

        # adjust indices to be relative to .full_contents, so
        # that we can use them to access ._contents directly
        indices += self._track_overflow[:,None]

        # eliminate any indices that are out of bounds (can happen
        # if we are not tracking overflow in some dimension)
        mask = np.all(np.logical_and(indices >= 0,
                                     indices < np.asarray(self._contents.shape)[:,None]),
                      axis = 0)

        if not mask.all():

            if warn_overflow:
                logger.warning(
                    "fill() discarded one or more values due to out-of-bounds "
                    "coordinate in a dimension without under/overflow tracking"
                )

            indices = indices[:,mask]
            weight = weight[mask]

        if weight.size > 0: # empty fails for dense, pointless for sparse

            if not self.is_sparse:

                indices = tuple(indices)

                # add.at sums values associated with duplicate indices
                np.add.at(self._contents, indices, weight)

                if self._sumw2 is not None:
                    np.add.at(self._sumw2._contents, indices, weight**2)
            else:

                # COO sums values associated with duplicate indices by default
                new_contents = COO(coords = indices,
                                   data = weight,
                                   shape = self._contents.shape)

                self._contents += new_contents

                if self._sumw2 is not None:
                    new_contents_sq = COO(coords = indices,
                                          data = weight**2,
                                          shape = self._contents.shape)

                    self._sumw2._contents += new_contents_sq



[docs]
    def clear(self):
        """
        Set all counts to 0
        """

        if self.is_sparse:
            self._contents = DOK(shape = self._contents.shape,
                                 dtype = self._contents.dtype,
                                 fill_value = 0).asformat('coo')
        else:
            self._contents[:] = 0

        if self._sumw2 is not None:
            self._sumw2.clear()


    def _clear_border(self, which, axes = None):
        """
        Set border cells (over and/or underflow) to 0 in both
        contents and sumw2

        Args:
            which : list of which borders to zero out
            axes (None or array): Axis numbers or labels. All by default
        """

        if self.is_sparse: # ensure sparse contents is writable
            self._contents = self._contents.asformat('dok')

        if axes is None:
            axes = range(self.ndim)
        elif np.isscalar(axes):
            axes = [axes]

        axes = self._axes.label_to_index(axes)

        for n in axes:
            if self._track_overflow[n]:
                for idx in which:
                    indices = self.expand_dict({n : idx}, self.all)
                    self._set(indices, 0)

        if self.is_sparse: # return sparse contents to fast readonly form
            self._contents = self._contents.asformat('coo')

        if self._sumw2 is not None:
            self._sumw2._clear_border(which, axes)

    def clear_overflow(self, axes = None):
        self._clear_border((self.of,), axes)

    def clear_underflow(self, axes = None):
        self._clear_border((self.uf,), axes)

    def clear_underflow_and_overflow(self, axes = None):
        self._clear_border((self.uf, self.of), axes)


[docs]
    def project(self, *axis):
        """
        Return a histogram containing a projection of the current one.

        Args:
            axis (axis index/label or array-like of same): axis or axes onto
                which the histogram will be projected.  Omitted axes will be
                summed over. The axes of the projected histogram will have the
                order specified by this argument, so project() can be used
                to permute a Histogram's axes (whether or not some are
                projected away).

        Return:
            Histogram: Projected histogram (a new object, not a view)
        """

        return self._project(axis, project_out=False)



[docs]
    def project_out(self, *axis):
        """
        Return a histogram containing a projection that sums over the
        specified axes of the current one, leaving the rest intact.

        Args:
            axis (axis index/label or array-like of same): axis or
                axes that will be projected out of the
                histogram. Omitted axes will be retained in their
                current order.

        Return:
            Histogram: Projected histogram (a new object, not a view)
        """

        return self._project(axis, project_out = True)


    def _project(self, axis, project_out = False):
        """
        Common core of project() and project_out()

        If project_out is False; the axes in `axis` will be retained
        in the specified order.  If it is True, the axes in `axis`
        will be summed away.

        Args:
           axis: array-like of axis numbers or labels
           project_out: bool

        Return:
           Histogram: Projected histogram (a new object, not a view)
        """

        def _project_contents(contents, keep_axes, sum_axes):

            # Transpose the contents so that the first keep_axes
            # dimensions are the axes to keep, in the order requested
            # by the caller, while the remaining axes are the ones
            # to be summed away.
            #
            # We transpose before projecting out the sum_axes so
            # that the projected array is contiguous in memory.

            reordered_axes = np.concatenate((keep_axes, sum_axes))
            new_contents = np.transpose(contents, reordered_axes)

            if len(sum_axes) == 0: # sparse does not handle this case
                new_contents = new_contents.copy()
            else:
                new_sum_axes = tuple(range(len(keep_axes), new_contents.ndim))
                new_contents = new_contents.sum(axis = new_sum_axes)

            return new_contents

        # standardize input
        if len(axis) == 1 and \
           isinstance(axis[0], (list, np.ndarray, range, tuple)):
            # Got a sequence
            axis = axis[0]

        axes = np.asarray(self._axes.label_to_index(axis), dtype=int)

        if len(np.unique(axes)) != len(axes):
            raise ValueError("An axis can't repeat")

        # Compute axis ids in 0..ndim that are not in exes;
        # result is guaranteed to be in increasing order
        rest = np.setdiff1d(np.arange(self.ndim, dtype=int),
                            axes,
                            assume_unique=True)

        if project_out: # axes are to be summed over
            sum_axes = axes
            keep_axes = rest
        else:           # axes are to be retained
            keep_axes = axes
            sum_axes = rest

        if len(keep_axes) == 0:
            raise ValueError("Cannot project out all axes of a Histogram. "
                             "Consider using np.sum(h) or np.sum(h.full_contents) (includes under/overflow)")

        new_contents = _project_contents(self._contents, keep_axes, sum_axes)

        new_sumw2 = None
        if self._sumw2 is not None:
            new_sumw2_contents = _project_contents(self._sumw2._contents, keep_axes, sum_axes)
            new_sumw2 = self._sumw2._replace(_axes = self._axes[keep_axes],
                                             _contents = new_sumw2_contents,
                                             _track_overflow = self._track_overflow[keep_axes])

        new = self._replace(_axes     = self._axes[keep_axes],
                            _contents = new_contents,
                            _sumw2    = new_sumw2,
                            _track_overflow = self._track_overflow[keep_axes])

        return new


[docs]
    @staticmethod
    def concatenate(edges, histograms, label = None, track_overflow = None):
        """
        Generate a Histogram H from a list of histograms h_1
        ... h_n. We create a new first axis of length equal to the
        list and set H[i] = h_i.

        For this operation to be well-defined, the axes of all input
        histograms must be equal, and they must all have the same
        sparsity; if any input has a unit, all must have compatible
        units.  If any input is a subclass of Histogram, all must have
        the same subclass type.

        If all inputs have sumw2, the output will as well; otherwise,
        all sumw2 values are discarded.

        Generate a Histogram from a list of histograms. The axes of
        all input histograms must be equal, and the new histogram will
        have one more dimension than the input. The new axis has index
        0.  If histograms can be subclassed, all of them must have the
        same class type.

        Args:
            edges (Axes or array): Definition of bin edges of the new dimension
            histograms (list of Histogram): List of histogram to fill contents.
                Might or might not include under/overflow bins.
            labels (str): Label the new dimension
            track_overflow (bool): Track underflow and overflow on the newly created axis.
                Defaults to True if number of histograms on new axis is 2 + # bins,
                or False otherwise.
        Return:
            new object of the same type as histograms[0] (Histogram or subclass)

        """

        def check_compatible(h, h0):

            if h.__class__ != h0.__class__:
                raise ValueError("Cannot combine Histograms of different subclasses "
                                 f"{h.__class__.__name__}, {h0.__class__.__name__}")

            if h.axes != h0.axes:
                raise ValueError("Cannot combine Histograms with different axes")

            if h.is_sparse != h0.is_sparse:
                raise ValueError("Cannot combine Histograms with different sparsities")

            if not np.array_equal(h._track_overflow, h0._track_overflow):
                raise ValueError("Cannot combine Histograms with different track_overflow settings")

            if (h._unit == None) != (h0._unit == None):
                raise ValueError("Cannot combine Histograms with and without units")

            return (h._sumw2 is not None) # do we have sumw2?

        if len(histograms) == 0:
            raise ValueError("Cannot concatenate empty list of Histograms")

        h0 = histograms[0]

        has_sumw2 = [ check_compatible(h, h0) for h in histograms ]

        shared_axes = h0._axes
        axis_newdim = Axis(edges, label=label)
        new_axes = Axes([axis_newdim] + list(shared_axes), copy_axes=False)

        # stick all the old histogram contents together
        new_contents = np.concatenate([h._contents[None] for h in histograms])

        new_unit = h0._unit
        if new_unit is not None:
            # adjust each histogram's value to account for different, but compatible units
            for i, h in enumerate(histograms):
                unit_factor = h._unit.to(new_unit)
                if unit_factor != 1.:
                    new_contents[i] *= unit_factor

        if track_overflow is None:
            # by default, overflow tracking follows contents shape
            track_overflow = (len(histograms) == axis_newdim.nbins + 2)

        new_track_overflow = np.append(track_overflow, h0._track_overflow)

        # compute overflow padding for dimension corresponding to new axis
        # we need padding in only one dimension, but padding function expects arrays
        padding_newdim = Histogram._get_padding_for_overflow(np.atleast_1d(track_overflow),
                                                             np.atleast_1d(axis_newdim.nbins),
                                                             np.atleast_1d(len(histograms)))

        # padding of all axes after the first is unchanged
        new_padding = np.concatenate((padding_newdim, np.zeros((shared_axes.ndim, 2), dtype=int)))

        new_contents = Histogram._pad(new_contents, new_padding)

        # Combine sumw2s if they exist in all histograms, using same unit and padding adjustments
        # as for contents (except that we must square the unit)
        if all(has_sumw2):

            new_sumw2_contents = np.concatenate([h._sumw2._contents[None] for h in histograms])

            if new_unit is not None:
                # adjust each histogram's value to account for different, but compatible units
                for i, h in enumerate(histograms):
                    unit_factor = h._unit.to(new_unit)
                    if unit_factor != 1.:
                        new_sumw2_contents[i] *= unit_factor**2

            new_sumw2_contents = Histogram._pad(new_sumw2_contents, new_padding)
            new_sumw2 = h0._sumw2._replace(_axes=new_axes,
                                           _contents=new_sumw2_contents,
                                           _track_overflow=new_track_overflow)

        else:
            if any(has_sumw2):
                logger.warning("Not all input histograms have sum of weights "
                               "squared. sumw2 will be dropped")
            new_sumw2 = None

        new = h0._replace(_axes=new_axes,
                          _contents=new_contents,
                          _sumw2=new_sumw2,
                          _track_overflow=new_track_overflow)

        return new


    class _slice:

        def __init__(self, hist):
            self._hist = hist

        def __getitem__(self, item):
            """Return a Histogram which is a *view* containing a slice of
            the current one.

            Args:
              item -- an indexing expression suitable for a
                Histogram's contents.  Advanced indexing is not
                supported, but .end, .all, and ellipsis are supported
                with the same restrictions as for ordinary Histogram
                indexing.

                Restrictions on valid indexing expressions include:
                 (1) the slice must include at least one non-under/overflow
                 bin of the original Histogram.  Slices containing
                 only overflow or underflow bins are not permitted.

                 (2) slicees with stride > 1 are not permitted due to
                 ill-defined interactions with under/overflow.

            Returns:
              A view into the current Histogram containing the
              specified slice of its contents.

              The new Histogram tracks overflow in the same set of
              dimensions as the original.  If the slice explicitly
              includes the Histogram's underflow and overflow bins for
              a given axis, those will be copied into the result;
              otherwise, they will be set to zero.

              If the original Histogram contains sumw2, the new
              one contains the corresponding slice of sumw2.

            Note that, unlike Histogram.__getitem__, slice.__getitem__
            does *not* squeeze away dimensions of length 1 -- the
            slice will always have exactly as many axes as the
            original Histogram, so that we do not lose axis
            information for axes with a single bin.

            Because the returned Histogram is a view, there is no
            guarantee as to whether it shares axis or contents
            data with the original. Make a copy before modifying
            the new Histogram.

            """

            # Standardize indices into slices
            indices = self._hist._prepare_indices(item)
            indices = tuple(slice(i,i+1) if isinstance(i, (int, np.integer)) else i for i in indices)

            padding = np.zeros((self._hist.ndim, 2), dtype=int)

            new_axes = []
            for dim,(index, axis) in enumerate(zip(indices,
                                                    self._hist._axes)):

                track_overflow = self._hist._track_overflow[dim]

                # Sanity checks not already enforced by Axis.__getitem__
                if not isinstance(index, slice):
                    raise TypeError("slice[] supports only integers and slices as indices")

                start, stop, stride = index.indices(axis.nbins+2*track_overflow)

                if stride != 1:
                    raise IndexError("slice[] does not support non-unit strides")

                # Handle under/overflow
                if track_overflow:

                    if start == 0:
                        if stop-start == 1:
                            raise IndexError(f"slice[] cannot return only underflow bin on axis {dim}")
                    else:
                        padding[dim, 0] = 1 # supply our own, new underflow bin

                    if stop == axis.nbins + 2:
                        if stop-start == 1:
                            raise IndexError(f"slice[] cannot return only overflow bin on axis {dim}")
                    else:
                        padding[dim, 1] = 1 # supply our own, new overflow bin

                    # axes do not reference under/overflow bins
                    axis_start = max(start - 1, 0)
                    axis_stop = min(stop - 1, axis.nbins)

                else:
                    axis_start = start
                    axis_stop = stop

                # result of Histogram slice is a *view*, so we
                # allow result to share edge array with input Histogram
                new_axes.append( axis[axis_start:axis_stop] )


            # new axes will not be referenced outside Axes object
            new_axes = Axes(new_axes, copy_axes=False)

            # get new contents. Pad if we need to add zeros for oflow/uflow.
            new_contents = self._hist._contents[indices]
            new_contents = self._hist._pad(new_contents, padding)

            # apply identical slice to sumw2
            if self._hist._sumw2 is not None:
                new_sumw2_contents = self._hist._sumw2._contents[indices]
                new_sumw2_contents = self._hist._pad(new_sumw2_contents, padding)
                new_sumw2 = self._hist._sumw2._replace(_axes = new_axes,
                                                       _contents = new_sumw2_contents)

            else:
                new_sumw2 = None

            return self._hist._replace(_axes = new_axes,
                                       _contents = new_contents,
                                       _sumw2 = new_sumw2)


    # operation descriptors for every arithmetic operation
    # supported through the _operation() method below.

[docs]
    class OpType(Enum):
        SUM = 1,
        PRODUCT = 2


    arith_op_properties = {
        # operator          inplace? sum or product op?
        operator.add:      (False,   OpType.SUM),
        operator.sub:      (False,   OpType.SUM),
        operator.mul:      (False,   OpType.PRODUCT),
        operator.truediv:  (False,   OpType.PRODUCT),
        operator.iadd:     (True,    OpType.SUM),
        operator.isub:     (True,    OpType.SUM),
        operator.imul:     (True,    OpType.PRODUCT),
        operator.itruediv: (True,    OpType.PRODUCT),
    }

    def _unit_operation(self, other, operation):
        """
        For binary arithmetic operation self <operation> other,
          - if other is not a Histogram, extract its value w/o units
          - compute the unit that will result from the operation

        Returns:
          tuple (value, new unit)
          If other is a Histogram or sparse or dense array, value == other
          Else, value is an equivalent scalar or np.ndarray
        """

        # Separate between value and unit
        if isinstance(other, Histogram):
            other_unit = other._unit
            other_value = other
        elif isinstance(other, u.Quantity):
            other_unit = other.unit
            other_value = other.value
        elif isinstance(other, u.UnitBase):
            other_unit = other
            other_value = 1
        elif isinstance(other, SparseArray):
            other_unit = None
            other_value = other # cannot use np.asarray
        else: # scalar or array-like
            other_unit = None
            if np.ndim(other) > 0:
                other_value = np.asarray(other)
            else:
                other_value = other

        if self._unit is None and other_unit is None:
            # If neither operand have units, do nothing else
            return other_value, None

        # Adjust other_value and self._unit depending on the operand

        # Standardize dimensionless
        if other_unit is None:
            other_unit = u.dimensionless_unscaled

        self_unit = self._unit
        if self_unit is None:
            self_unit = u.dimensionless_unscaled

        _, optype = Histogram.arith_op_properties.get(operation, (None, None))

        assert (optype is not None), "operation not supported"

        # For * and / the conversion factor is stored in the unit itself
        # ** only accepts scalar dimensionaless quantities, it will crash anyway
        # The idencity operator (for the raterizer) doesn't use the other's units
        if optype == Histogram.OpType.SUM:
            # +, -
            # We need to convert other's units to those of self. If
            # not a no-op, it requires copying the value (as we don't
            # want to modify the original). No change in units
            unit_conv = other_unit.to(self_unit)
            if unit_conv != 1.0:
                other_value = other_value * unit_conv
            new_unit = self_unit

        else: # optype == Histogram.OpType.PROD
            # *, /
            # The conversion factor is stored in the unit itself
            new_unit = operation(self_unit, other_unit)

        return other_value, new_unit

    def _operation(self, other, operation):
        """
        Perform binary arithmetic with first operand a Histogram (self) and
        a second operand 'other', which might be a Histogram, a scalar,
        or an array-like.

        This function is mainly concerned with getting other into a
        form that can be combined with self.  The details of the
        actual operation differ depending on whether operation is
        in-place (e.g., h *= x) or out-of-place (e.g., h = h * x).

        """
        is_inplace, optype = self.arith_op_properties.get(operation, (None, None))

        assert (optype is not None), "Unsupported arithmetic operation"

        # Get the value part of other and the new unit the result should have
        other, new_unit = self._unit_operation(other, operation)

        # convert other into a bare np.ndarray or scalar that can be
        # combined with this Histogram

        if isinstance(other, Histogram):

            # Histograms for operation must have same axes
            if self._axes != other._axes:
                raise ValueError("Axes mismatch")

            # temporarily remove other's units before extracting contents/sumw2
            other_unit = other._unit
            other.to(None, update = False, copy = False)

            # transform other to match overflow/underflow of self
            slices, padding = self._match_track_overflow(other._track_overflow,
                                                         self._track_overflow)

            other_value = other._contents[slices]
            other_value = self._pad(other_value, padding)

            if self._sumw2 is not None:
                if other._sumw2 is not None:
                    # Match over/underflow
                    other_sumw2 = other._sumw2._contents[slices]
                    other_sumw2 = self._pad(other_sumw2, padding)
                else:
                    # self has sumw2, but other does not
                    logger.warning("Other histogram lacks sumw2; assuming zero")
                    other_sumw2 = None
            else:
                if other._sumw2 is not None:
                    # other has sumw2, but self does not
                    logger.warning("Discarding sumw2 from other histogram")
                other_sumw2 = None

            # restore other's units after value extraction
            other.to(other_unit, update = False, copy = False)

        else:
            # other is array or scalar; array-likes were converted by _unit_operation()
            if np.ndim(other) > 0:

                if other.ndim != self.ndim:
                    raise ValueError(f"Operand number of dimensions ({other.ndim}) does not"
                                     f"match number of axes ({self.ndim})")

                if isinstance(other, DOK):
                    # Do not allow DOK format in arithmetic.  If combined with
                    # a dense array, it gives a DOK result, so it can 'pollute'
                    # a Histogram.  Use COO instead; we check later on whether
                    # the result is sparse and update .sumw2 to match if needed.
                    other = other.asformat('coo')

                # single-element axis can be broadcast as-is, so no need for overflow/underflow
                # bins; otherwise, match other's overflow with self
                oshape = np.asarray(other.shape)
                padding = self._get_padding_for_overflow(self._track_overflow,
                                                         self._axes.nbins, oshape,
                                                         mask=(oshape != 1))

                other_value = self._pad(other, padding)
            else:
                # other is scalar
                other_value = other

            other_sumw2 = None # bare value has no sumw2

        if is_inplace:
            self._inplace_operation(other_value, other_sumw2, operation,
                                    optype, new_unit)
            new = self
        else:
            new = self._outofplace_operation(other_value, other_sumw2, operation,
                                             optype, new_unit)

        # Ensure that sumw2 still has same dtype as contents
        # and is sparse if contents is sparse.  An operation
        # on dense self and sparse other can make the result
        # have sparse contents, but it might leave sumw2 dense.
        if new._sumw2 is not None:
            if new.is_sparse and not new._sumw2.is_sparse:
                new._sumw2 = new.sumw2.to_sparse()
            new._sumw2 = new._sumw2.astype(new._contents.dtype, copy=False)

        return new

    def _sumw2_product_op(self, self_sumw2, other_sumw2,
                          self_contents, new_contents,
                          other, operation):
        """
        Update the sum of squared weights self_sumw2 to reflect the impact
        of performing the operation self_contents <operation> other.
        operation is assumed to be a PRODUCT type.

        Args:
          self_sumw2  -- _sumw2._contents for Histogram self
          other_sumw2 -- _sumw2._contents for Histogram other, or
                          None if is lacks sumw2 or is not a Histogram
          self_contents -- _contents for Histogram self before operation
          new_contents  -- _contents for Histogram self after operation
          other         -- other operand's value (_contents if Histogram,
                           or scalar or np.ndarray)
          operation     -- operation performed

        Returns:
          updated sumw2 value for result of operation

        If operation is in-place, self_sumw2 is modified in place
        and returned if possible; otherwise, a new array is allocated
        and returned.  Either way, it is safe to set _sumw2._contents
        to the return value of this function for both in-place and
        out-of-place operations.
        """
        if other_sumw2 is None:
            # Other operaand is assumed to have zero error. Use
            # simplification of error formula below that depends
            # only on prior sumw2 and other's value.
            new_sumw2 = Histogram._inplace_operation_handle_sparse(
                          Histogram._inplace_operation_handle_sparse(self_sumw2, other, operation, Histogram.OpType.PRODUCT),
                          other, operation, Histogram.OpType.PRODUCT)

        else:
            # Error of either f = A*B or f = A/B is
            # f_err^2 = f^2 * ((A_err/A)^2 + (B_err/B)^2)
            relvar = self_sumw2 / (self_contents * self_contents)
            other_relvar = other_sumw2 / (other * other)

            new_sumw2 = new_contents * new_contents * (relvar + other_relvar)

        return new_sumw2

    def _inplace_operation(self, other, other_sumw2, operation,
                           optype, new_unit):
        """
        Perform an in-place binary arithmetic operation on the histogram
        self with second operand other.  Update both self's contents
        and its sumw2 value, if present.

        Args:
          other       -- second operand
          other_sumw2 -- second operand's sumw2 matrix, or None
                         if not defined
          operation   -- in-place arithmetic operation to be performed
          optype      -- is operation a SUM or PRODUCT type?
          new_unit    -- unit to assign to result

        self's contents and sumw2 are updated; nothing is returned.
        """
        # delete old units of self; will replace with result units
        self.to(None, update = False, copy = False)

        if self._sumw2 is not None and \
           other_sumw2 is not None and \
           optype == Histogram.OpType.PRODUCT:
            # save a copy of self._contents for sumw2 computation
            self_contents = self._contents.copy()
        else:
            self_contents = None # not used

        # overwrites self._contents
        new_contents = Histogram._inplace_operation_handle_sparse(self._contents, other, operation, optype)

        if self._sumw2 is not None:
            self_sumw2_contents = self._sumw2._contents

            if optype == Histogram.OpType.SUM:
                if other_sumw2 is not None:
                    Histogram._inplace_operation_handle_sparse(self_sumw2_contents, other_sumw2, operator.iadd, Histogram.OpType.SUM)

            else: # optype == Histogram.OpType.PRODUCT
                self._sumw2._contents = \
                    self._sumw2_product_op(self_sumw2_contents,
                                           other_sumw2,
                                           self_contents,
                                           new_contents,
                                           other,
                                           operation)

        # set unit of result
        self.to(new_unit, update = False, copy = False)

    @staticmethod
    def _inplace_operation_handle_sparse(contents, other, operation, optype):
        """
        Performs in place operations on contents when other is a sparse matrix. This type of operation is not supported
        by the sparse library
        """
        if isinstance(other, sparse.SparseArray):
            # TODO: Optimize. Instead of allocating the a new dense array, we could modify directly the non-zero
            #   elements in contents. This requires to check the operation type, as well as the fill value of the
            #   sparse array.
            other = other.todense()

        return operation(contents, other)

    def _outofplace_operation(self, other, other_sumw2, operation,
                              optype, new_unit):
        """
        Perform an out-of-place binary arithmetic operation on the
        histogram self with second operand other.  Allocate and
        return a new object of the same class as self to hold the result.
        Hence, this function works for subclasses of Histogram as
        well as for the base class.

        Args:
          other       -- second operand
          other_sumw2 -- second operand's sumw2 matrix, or None
                         if not defined
          operation   -- in-place arithmetic operation to be performed
          optype      -- is operation a SUM or PRODUCT type?
          new_unit    -- unit to assign to result

        Returns:
          new object of the same class as self holding result
        """

        # temporarily remove self's units
        self_unit = self._unit
        self.to(None, update = False, copy = False)

        self_contents = self._contents
        new_contents = operation(self_contents, other)

        if self._sumw2 is not None:

            self_sumw2_contents = self._sumw2._contents

            if optype == Histogram.OpType.SUM:
                if other_sumw2 is None:
                    new_sumw2_contents = self_sumw2_contents.copy()
                else:
                    new_sumw2_contents = self_sumw2_contents + other_sumw2

            else: # optype == Histogram.OpType.PRODUCT
                new_sumw2_contents = \
                    self._sumw2_product_op(self_sumw2_contents,
                                           other_sumw2,
                                           self_contents,
                                           new_contents,
                                           other,
                                           operation)
            new_sumw2_unit = None if new_unit is None else new_unit**2
            new_sumw2 = self._sumw2._replace(_contents = new_sumw2_contents,
                                             _unit = new_sumw2_unit)
        else:
            new_sumw2 = None

        # restore unit of self
        self.to(self_unit, update = False, copy = False)

        new = self._replace(_contents = new_contents,
                            _sumw2 = new_sumw2,
                            _unit = new_unit)

        return new

    def __imul__(self, other):

        return self._operation(other, operator.imul)

    def __mul__(self, other):

        return self._operation(other, operator.mul)

    def __rmul__(self, other):

        return self*other

    def __itruediv__(self, other):

        return self._operation(other, operator.itruediv)

    def __truediv__(self, other):

        return self._operation(other, operator.truediv)

    def __rtruediv__(self, other):
        """
        Divide a scalar by the histogram
        """

        if not np.isscalar(other):
            raise ValueError("Inverse operation can only occur between "
                             "histograms or a histogram and a scalar")

        # Temporarily disable self unit
        self_unit = self._unit
        self.to(None, update = False, copy = False)

        # Simple change of unit in this case. Other can't be Quantity or Unit
        new_unit = None if self_unit is None else (1/self_unit).unit

        # Error propagtion of f = b/A (where b is constant, no error) is:
        # f_err^2 = f^2 (A_err/A)^2
        self_contents = self._contents
        new_contents = other/self_contents

        if self._sumw2 is not None:
            new_sumw2_contents = (self._sumw2._contents * other * other /
                                  np.power(self_contents, 4))

            # make sure sumw2's type follows contents (not sure if necessary?)
            new_sumw2_contents = new_sumw2_contents.astype(self._contents.dtype,
                                                           copy=False)

            new_sumw2_unit = None if new_unit is None else new_unit**2

            new_sumw2 = self._sumw2._replace(_contents = new_sumw2_contents,
                                             _unit = new_sumw2_unit)

        else:

            new_sumw2 = None

        # restore self unit
        self.to(self_unit, update = False, copy = False)

        new = self._replace(_contents = new_contents,
                            _sumw2 = new_sumw2,
                            _unit = new_unit)

        return new

    def __iadd__(self, other):

        return self._operation(other, operator.iadd)

    def __add__(self, other):

        return self._operation(other, operator.add)

    def __radd__(self, other):

        return self + other

    def __neg__(self):

        # Temporarily disable self unit
        self_unit = self._unit
        self.to(None, update = False, copy = False)

        new_contents = -(self._contents)

        # restore self unit
        self.to(self_unit, update = False, copy = False)

        # copy sumw2 if it exists, since it is unchanged by negation
        new = self._replace(_contents = new_contents)

        return new

    def __isub__(self, other):

        return self._operation(other, operator.isub)

    def __sub__(self, other):

        return self._operation(other, operator.sub)

    def __rsub__(self, other):

        return -self + other

    def _comparison_operator(self, other, operation):
        other = self._strip_units(other)

        return operation(self._get_contents(), other)

    def __lt__(self, other):
        return self._comparison_operator(other, operator.lt)

    def __le__(self, other):
        return self._comparison_operator(other, operator.le)

    def __gt__(self, other):
        return self._comparison_operator(other, operator.gt)

    def __ge__(self, other):
        return self._comparison_operator(other, operator.ge)


[docs]
    def rebin(self, *ngroup):

        """
        Rebin a histogram by grouping adjacent bins into one on each axis

        If an axis does not have overflow tracking enabled, any
        partial group along that axis will be discarded.  If it *does*
        have overflow tracking enabled, any partial group's sum will
        be added to the axis' underflow bin if it is on the left, or
        to the overflow bin if it is on the right.

        For histograms with multiple axes, the result of rebinning is
        equivalent to rebinning the input on the first axis, then
        rebinning the result on the second axis, and so forth for all
        axes.

        Args:
            ngroup (int or array-like):
                number of adjacent bins to combine for each axis. If
                this value is > 0 for an axis, binning starts from left
                side of contents, so the last partial group (if any)
                is on the right; if < 0, binning starts from right side,
                so last partial group (if any) is on the left.

        Return:
            a new, rebinned Histogram

        """

        if len(ngroup) == 1:
            if not np.isscalar(ngroup[0]): # a single array-like
                ngroup = ngroup[0]
        else:
            # an array-like of values (must be scalar)
            if not all(np.isscalar(v) for v in ngroup):
                raise ValueError("input to rebin must be scalar or array-like of scalars")

        group_size = np.asarray(ngroup, dtype=int)

        if not group_size.all():
            raise ValueError("ngroup cannot contain 0")

        # number of bins to group on each axis, along with direction
        group_sign = np.broadcast_to(np.sign(group_size), self.ndim)
        group_size = np.broadcast_to(np.abs(group_size), self.ndim)

        # compute edges for each axis after grouping, along with padding necessary
        # for each axis' contents to have the correct size (including any uflow/oflow)
        # after rebinning
        new_axes = []
        padding = []
        slicing = []

        for a, gsz, gsgn, track in zip(self._axes, group_size, group_sign, self._track_overflow):

            # new axis contains every ngroupth bin. If sign is +, we start from
            # bin 0; if -, we shift to ensure last bin is included.

            start = a.nbins % gsz if gsgn < 0 else 0
            new_axes.append( a[start::gsz] )

            # for untracked dimensions, slice away any unused bins

            trim = 0 if track else a.nbins % gsz
            slicing.append( slice(trim, None) if gsgn < 0 else slice(None, a.nbins + 2*track - trim))

            # pad before and after by the minimum amount needed to ensure that
            # padded size is multiple of gsz and any uflow/oflow will be preserved

            p1 = track * (gsz - 1)                      # won't be grouped with contents
            p2 = (gsz - (a.nbins - trim + track)) % gsz # might be grouped with contents
            padding.append( (p1, p2) if gsgn > 0 else (p2, p1) )

        # create new Axes object. *Do* copy the existing Axes so that
        # they do not share edge arrays with the input Histogram's axes.
        new_axes = Axes(new_axes)
        slicing = tuple(slicing)

        # new shape splits each dimension into two; even dims are the
        # size of the array post-rebining, and odd dims are the size
        # of the groups to be summed on that axis

        new_shape = np.empty(2*self.ndim, dtype = int)
        new_shape[0::2] = (self._axes.nbins // group_size) + 2*self._track_overflow
        new_shape[1::2] = group_size

        sum_axes = tuple(range(1, 2*self.ndim, 2)) # axes to be summed away

        # pad and reshape contents matrix prior to summing
        new_contents = self._contents[slicing]
        new_contents = self._pad(new_contents, padding)
        new_contents = np.reshape(new_contents, new_shape)
        new_contents = np.sum(new_contents, axis=sum_axes)

        # apply same rebinning to sumw2 if it exists
        if self._sumw2 is not None:
            new_sumw2_c = self._sumw2._contents[slicing]
            new_sumw2_c = self._pad(new_sumw2_c, padding)
            new_sumw2_c = np.reshape(new_sumw2_c, new_shape)
            new_sumw2_c = np.sum(new_sumw2_c, axis=sum_axes)

            new_sumw2 = self._sumw2._replace(_axes = new_axes,
                                             _contents = new_sumw2_c)
        else:
            new_sumw2 = None

        return self._replace(_axes = new_axes,
                             _contents = new_contents,
                             _sumw2 = new_sumw2)



[docs]
    def plot(self, ax = None,
             errorbars = None,
             colorbar = True,
             label_axes = True,
             **kwargs):
        """
        Quick plot of the histogram contents.

        Under/overflow bins are not included. Only 1D and 2D histograms
        are supported.

        Histogram with a HealpixAxis will automatically be plotted
        as a map, passing all kwargs to mhealpy's HealpixMap.plot()

        Args:
            ax (matplotlib.axes): Axes on which to draw the histogram. A new
                one will be created by default.
            errorbars (bool or None): Include errorbars for 1D histograms. The
                 default is to plot them if sumw2 is available
            colorbar (bool): Draw colorbar in 2D plots
            label_axes (bool): Label plots axes. Histogram axes must be labeled.
            **kwargs: Passed to `matplotlib.errorbar()` (1D) or
                `matplotlib.pcolormesh` (2D)
        """

        import matplotlib.pyplot as plt
        from mhealpy import HealpixMap
        from .healpix_axis import HealpixAxis

        def without_units(a):
            return a.value if isinstance(a, u.Quantity) else a

        # Matplotlib errorbar and pcolormesh need regular array
        contents = self._get_contents() # contents without units, oflow/uflow
        if self.is_sparse:
            contents = contents.todense()

        # Handle the special case of a healpix axis
        if self.ndim == 1 and isinstance(self._axes[0], HealpixAxis):

            axis = self._axes[0]

            # Pad in case it is partial map
            contents = self._pad(contents,
                                 (axis.lo_lim,
                                  axis.npix - axis.hi_lim))

            m = HealpixMap(data = contents, base = axis)

            args = ()
            if ax is not None:
                args = (ax,)

            plot, ax = m.plot(*args,
                              cbar = colorbar,
                              **kwargs)

            return ax, plot

        # Default errorbars
        if errorbars is None:
            if self._sumw2 is None:
                errorbars = False
            else:
                errorbars = True

        # Create axes if needed (with labels)
        if ax is None:
            fig, ax = plt.subplots()

        # Plot, depending on number of dimensions
        if self.ndim == 1:

            axis = self._axes[0]

            if isinstance(axis, TimeDeltaAxis):
                # Convert to regular axis with default unit
                # Include unit in label since were are dropping it
                axis = axis.to()
                axis.label = self._axes[0].label_with_unit

            # We have two points per bin (lower edge+center), and 2 extra
            # point for under/overflow (these currently don't have errorbar,
            # they looked bad)
            if isinstance(axis, TimeAxis):
                xdata = Time([datetime.datetime(2000,1,1,0,0,0)]*(2 * axis.nbins + 2), format = 'datetime')
            else:
                # For regular Axis and TimeAxis
                if axis.unit is None:
                    xdata = np.empty(2*axis.nbins + 2)
                else:
                    xdata = np.empty(2*axis.nbins + 2)*axis.unit

            xdata[0] = axis.edges[0] # For underflow, first edge
            xdata[1::2] = axis.edges # In between edges. Last edge for overflow
            xdata[2::2] = axis.centers # For markers

            # Handle time axis, to it can be formatted lated
            if isinstance(axis, TimeAxis):
                xdata = xdata.datetime
            else:
                # Regular Axis and TimeAxis
                xdata = without_units(xdata)

            underflow = self._get(-1) if self._track_overflow[0] else 0
            overflow = self._get(axis.nbins) if self._track_overflow[0] else 0

            ydata = np.concatenate(([underflow],
                                    np.repeat(contents, 2),
                                    [overflow]))

            # Style
            drawstyle = kwargs.pop('drawstyle', 'steps-post')

            # Error bars
            yerr = None

            if errorbars:

                errors = without_units(self.bin_error.contents)
                if self.is_sparse:
                    # No auto densify
                    errors = errors.todense()

                yerr = np.empty(2*axis.nbins + 2)
                yerr[2::2] = errors
                yerr[0] = None # No underflow errorbar, looked bad
                yerr[1::2] = None # No overflow errorbar, looked bad

            # Plot
            plot = ax.errorbar(xdata,
                               ydata,
                               yerr = yerr,
                               drawstyle = drawstyle,
                               **kwargs)

            # Label axes
            if label_axes:
                ax.set_xlabel(axis.label_with_unit)

                if self._unit not in (None, u.dimensionless_unscaled):
                    ax.set_ylabel(f"[{self._unit}]")

        elif self.ndim == 2:

            plot_edges = []
            for axis in self._axes:
                if isinstance(axis, TimeAxis):
                    plot_edges.append(axis.edges.datetime)
                elif isinstance(axis, TimeDeltaAxis):
                    plot_edges.append(axis.to().edges)
                else:
                    # Regular axis
                    plot_edges.append(without_units(axis.edges))

            # No under/overflow
            plot = ax.pcolormesh(plot_edges[0], plot_edges[1],
                                 np.transpose(contents),
                                 **kwargs)

            if label_axes:
                ax.set_xlabel(self._axes[0].label_with_unit)
                ax.set_ylabel(self._axes[1].label_with_unit)

            if colorbar:

                cax = ax.get_figure().colorbar(plot, ax = ax)

                if self._unit not in (None, u.dimensionless_unscaled):
                    cax.set_label(f"[{self._unit}]")

        else:

            raise ValueError("Plotting only available for 1D and 2D histograms")

        if self._axes[0].axis_scale == 'log':
                ax.set_xscale('log')

        if self.ndim > 1 and self._axes[1].axis_scale == 'log':
                ax.set_yscale('log')

        return ax, plot


    # draw() is an alias of plot()
    draw = plot


[docs]
    def fit(self, f, lo_lim = None, hi_lim = None, **kwargs):
        """
        Fit histogram data using least squares.

        This is a convenient call to scipy.optimize.curve_fit. Sigma corresponds
        to the output of `h.bin_error`. Empty bins (e.g. error equals 0) are
        ignored

        Args:
            f (callable): Function f(x),... that takes the independent variable
                x as first argument, and followed by the parameters to be fitted.
                For a k-dimensional histogram is should handle arrays of shape
                (k,) or (k,N). The inputs and outputs must be unitless.
            lo_lim (float or array): Low axis limit to fit. One value per axis.
            lo_lim (float or array): High axis limit to fit. One value per axis.
            **kwargs: Passed to scipy.optimize.curve_fit
        """

        from inspect import signature
        from scipy.optimize import curve_fit

        lo_lim = np.broadcast_to(lo_lim, self.ndim, subok = True)
        hi_lim = np.broadcast_to(hi_lim, self.ndim, subok = True)

        # Sanity checks
        for axis,lo,hi in zip(self._axes, lo_lim, hi_lim):
            if ((lo is not None and lo < axis.lo_lim) or
                (hi is not None and hi >= axis.hi_lim)):
                raise ValueError("Fit limits out of bounds")

        # Get bins that correspond to the fit limits
        lim_bins = tuple(slice(None if lo is None else axis.find_bin(lo),
                                None if hi is None else axis.find_bin(hi))
                          for axis,lo,hi
                          in zip(self._axes, lo_lim, hi_lim))

        # Get data to fit
        x = [axis.centers[bins].value
             if axis.unit is not None
             else axis.centers[bins]
             for axis,bins in zip(self._axes,lim_bins)]
        x = np.meshgrid(*x, indexing='ij') # For multi-dimensional histograms
        y = self._get(lim_bins)
        sigma = self.bin_error[lim_bins]

        # Sparse
        if self.is_sparse:
            y = y.todense()
            sigma = sigma.todense()

        # Ignore empty bins
        non_empty = (sigma != 0)
        x = [centers[non_empty] for centers in x]
        y = y[non_empty]
        sigma = sigma[non_empty]

        # Flat matrices
        if self.ndim > 1:
            x = np.asarray([centers.ravel() for centers in x])
            y = y.ravel()
            sigma = sigma.ravel()
        else:
            x = x[0]

        # Sanity checks
        if x.shape[-1] < len(signature(f).parameters) - 1:
            raise RuntimeError("Fewer bins within limits than parameters to fit.")

        # Actual fit with scipy
        return curve_fit(f, x, y, sigma = sigma, **kwargs)



[docs]
    def write(self, filename, name = "hist", overwrite = False,
              h5_dataset_kw = None):
        """
        Write histogram to a group in an HDF5 file. Appended if the file already
        exists.

        Args:
            filename (str): Path to file
            name (str): Name of group to save histogram (can be any HDF5 path)
            overwrite (str): Delete and overwrite group if already exists.
            h5_dataset_kw (dict, optional): dictionary of keywords to control
               how the Histogram's will be written to datasets by the underlying
               HDF5 library

        For information on keywords controlling dataset creation, see
        https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset

        """
        import h5py as h5

        with h5.File(filename, 'a') as f:

            # Will fail on existing group by default
            if name in f:
                if overwrite:
                    del f[name]
                else:
                    raise ValueError("Unable to write histogram. Another group "
                                     "with the same name already exists. Choose "
                                     "a different name or use overwrite")

            self._write(f, name, h5_dataset_kw = h5_dataset_kw)



[docs]
    @classmethod
    def open(cls, filename, name = 'hist'):
        """
        Read a Histogram from a specified group in an HDF5 file.
        """
        import h5py as h5

        with h5.File(filename, 'r') as f:
            return cls._open(f[name])


    def _write(self, file, group_name, h5_dataset_kw=None, **kwargs):
        """
        Write histogram to the named group in an HDF5 file.  This method
        can be overriden by subclasses to add additional information to
        the group.

        Args:
            file : HDF5 file handle
            group_name (str): Name of group to save histogram (can be
              any HDF5 path)
            h5_dataset_kw (dict, optional): dictionary of keywords to control
               how the Histogram's will be written to datasets by the underlying
               HDF5 library

        """

        if h5_dataset_kw is None:
            h5_dataset_kw = {}

        hist_group = file.create_group(group_name)

        if self._unit is not None:
            hist_group.attrs['unit'] = str(self._unit)

        # Axes. Each one is a data set with attributes.
        # We rely on Axes to return them later in the same
        # order that they are saved.

        axes_group = hist_group.create_group('axes')

        self._axes.write(axes_group)

        if self.is_sparse:
            hist_group.attrs['format'] = 'coo'

            contents_group = hist_group.create_group('contents')

            contents = self._contents

            contents_group.create_dataset('coords',
                                          data = contents.coords,
                                          compression = "gzip",
                                          track_times = False)
            contents_group.create_dataset('data',
                                          data = contents.data,
                                          compression = "gzip",
                                          track_times = False)
            contents_group.create_dataset('shape',
                                          data = contents.shape,
                                          track_times = False)
            contents_group.create_dataset('fill_value',
                                          data = contents.fill_value,
                                          track_times = False)

            if self._sumw2 is not None:
                sumw2_group = hist_group.create_group('sumw2')

                sumw2_contents = self._sumw2._contents

                sumw2_group.create_dataset('coords',
                                           data = sumw2_contents.coords,
                                           compression = "gzip",
                                           track_times = False)
                sumw2_group.create_dataset('data',
                                           data = sumw2_contents.data,
                                           compression ="gzip",
                                           track_times = False)
                sumw2_group.create_dataset('shape',
                                           data = sumw2_contents.shape,
                                           track_times = False)
                sumw2_group.create_dataset('fill_value',
                                           data = sumw2_contents.fill_value,
                                           track_times = False)

        else:
            hist_group.attrs['format'] = 'dense'

            hist_group.create_dataset('contents',
                                      data = self._contents,
                                      track_times = False,
                                      **h5_dataset_kw)

            if self._sumw2 is not None:
                hist_group.create_dataset('sumw2',
                                          data = self._sumw2._contents,
                                          track_times = False,
                                          **h5_dataset_kw)

        return hist_group


    @classmethod
    def _open(cls, hist_group, **kwargs):
        """
        Alternative constructor that reads contents from a group in an
        HDF5 file.  May be overridden just like a regular constructor.

        Args:
            hist_group: group in HDF5 file where data is stored

        Returns:
            New object of type cls (which may be a subclass of Histogram)

        """

        axes, contents, sumw2, unit, track_overflow = Histogram._read_from_hdf(hist_group)

        new = cls.__new__(cls)

        # sanity-checks values and allocates special accessor objects
        Histogram.__init__(new,
                           edges = axes,
                           contents = contents,
                           sumw2 = sumw2,
                           unit = unit,
                           track_overflow = track_overflow,
                           copy_contents = False)

        return new


    @staticmethod
    def _read_from_hdf(hist_group):
        """Read contents of a histogram from a data group.  Return the raw
        contents as a tuple; caller is responsible for creating the
        object.

        Args:
            hist_group: an HD5 group object

        Returns:
            a tuple (axes, contents, sumw2, unit, track_overflow)
            that may be given to the Histogram constructor

        """

        unit = None
        if 'unit' in hist_group.attrs:
            unit = u.Unit(hist_group.attrs['unit'])

        # Axes
        axes_group = hist_group['axes']

        axes = Axes.open(axes_group)

        # Contents
        # Backwards compatible before sparse was supported
        if ('format' not in hist_group.attrs or
            hist_group.attrs['format'] == 'dense'):

            contents = np.asarray(hist_group['contents'])

            sumw2 = None
            if 'sumw2' in hist_group:
                sumw2 = np.asarray(hist_group['sumw2'])

        elif hist_group.attrs['format'] == 'gcxs':

            contents_group = hist_group['contents']

            compressed_axes = None
            if 'compressed_axes' in contents_group:
                compressed_axes = np.asarray(contents_group['compressed_axes'])

            contents = GCXS((np.asarray(contents_group['data']),
                             np.asarray(contents_group['indices']),
                             np.asarray(contents_group['indptr'])),
                            compressed_axes = compressed_axes,
                            shape = tuple(contents_group['shape']),
                            fill_value = np.asarray(contents_group['fill_value']).item())

            sumw2 = None
            if 'sumw2' in hist_group:
                sumw2_group = hist_group['sumw2']

                compressed_axes = None
                if 'compressed_axes' in sumw2_group:
                    compressed_axes = np.asarray(sumw2_group['compressed_axes'])

                sumw2 = GCXS((np.asarray(sumw2_group['data']),
                              np.asarray(sumw2_group['indices']),
                              np.asarray(sumw2_group['indptr'])),
                             compressed_axes = compressed_axes,
                             shape = tuple(sumw2_group['shape']),
                             fill_value = np.asarray(sumw2_group['fill_value']).item())

        elif hist_group.attrs['format'] == 'coo':

            contents_group = hist_group['contents']

            contents = COO(coords = np.asarray(contents_group['coords']),
                           data = np.asarray(contents_group['data']),
                           shape = tuple(contents_group['shape']),
                           fill_value = np.asarray(contents_group['fill_value']).item())

            sumw2 = None
            if 'sumw2' in hist_group:
                sumw2_group = hist_group['sumw2']

                sumw2 = COO(coords = np.asarray(sumw2_group['coords']),
                            data = np.asarray(sumw2_group['data']),
                            shape = tuple(sumw2_group['shape']),
                            fill_value = np.asarray(sumw2_group['fill_value']).item())

        else:
            raise IOError(f"Format {hist_group.attrs['format']} unknown.")

        # Deduce track_overflow based on contents shape
        track_overflow = [size == a.nbins + 2
                          for size,a in zip(contents.shape, axes)]

        return (axes, contents, sumw2, unit, track_overflow)