Source code for histpy.histogram

import logging
logger = logging.getLogger(__name__)

import warnings

import numpy as np

import operator

import sys

from copy import copy,deepcopy

import matplotlib.pyplot as plt

from scipy.optimize import curve_fit

from inspect import signature

import h5py as h5

from .axes import Axes,Axis
from .healpix_axis import HealpixAxis

from sparse import DOK, COO, GCXS, SparseArray

import astropy.units as u

from mhealpy import HealpixMap

[docs]class Histogram(object):
    """
    This is a wrapper of a numpy array with axes and a fill method. Sparse array from
    pydata's sparse package are also supported.
    
    Like an array, the histogram can have an arbitrary number of dimensions.

    Standard numpy array indexing is supported to get the contents 
    --i.e. :code:`h[:]`, :code:`h[4]`, :code:`h[[1,3,4]]`, :code:`h[:,5:50:2]]`,
    etc.--. However, the meaning of the :code:`-1` index is different. Instead of 
    counting from the end, :code:`-1` corresponds to the underflow bin. Similarly,
    an index equal to the number of bins corresponds to the overflow bin. 

    You can however give relative position with respect to :code:`h.end` 
    --e.g. :code:`h[0:h.end]` result in all regular bins, :code:`h[-1:h.end+1]` 
    includes also the underflow/overflow bins and :code:`h[h.end]` gives you the 
    contents of the overflow bin. The convenient aliases :code:`h.uf = -1`,
    :code:`h.of = e.end` and :code:`h.all = slice(-1,h.end+1)` are provided.
    
    You can also use an :code:`Ellipsis` object (:code:`...`) at the end to specify that the
    contents from the rest of the dimension are to have the under and overflow
    bins included. e.g. for a 3D histogram :code:`h[1,-1:h.end+1,-1:h.end+1]
    = h[1,...]`. :code:`h[:]` returns all contents without under/overflow bins and
    h[...] returns everything, including those special bins.

    If :code:`sumw2` is not :code:`None`, then the histogram will keep track of 
    the sum of the weights squared --i.e. you better use this if you are using 
    weighted data and are concern about error bars--. You can access these with
    :code:`h.sumw2[item]`, where `item` is interpreted the same was a in :code:`h[item]`.
    :code:`h.bin_error[item]` return the :code:`sqrt(sumw2)` (or :code:`sqrt(contents)` is :code:`sumw2
    was not specified`).

    The operators :code:`+`, :code:`-`, :code:`*` and :code:`-` are available.
    Both other operand can be a histograms, a scalar or an array of appropiate size.
    Note that :code:`h += h0` is more efficient than 
    :code:`h = h + h0` since latter involves the instantiation of a new histogram.

    Args:
        edges (Axes or array): Definition of bin edges, Anything that can
            be processes by Axes. Lower edge value is included in the bin, 
            upper edge value is excluded.
        contents (array or SparseArray): Initialization of histogram contents. Might or might
            not include under/overflow bins. Initialize to 0 by default.
        sumw2 (None, bool or array): If True, it will keep track of the sum of
            the weights squared. You can also initialize them with an array
        labels (array of str): Optionally label the axes for easier indexing
        axis_scale (str or array): Bin center mode e.g. `"linear"` or `"log"`. 
            See ``Axis.axis_scale``.
        sparse (bool): Initialize an empty sparse histogram. Only relevant if no contents
            are provided.

    """
            
    def __init__(self, edges, contents = None, sumw2 = None,
                 labels=None, axis_scale = None, sparse = None, unit = None):

        self._axes = Axes(edges, labels=labels, axis_scale = axis_scale)

        # Standarize contents (with under/overflow) or initialize them to zero.
        if contents is not None:

            # Deal with units
            if isinstance(contents, u.Quantity):

                if unit is None:
                    unit = contents._unit
                    contents = contents.value
                else:
                    contents = contents.to_value(unit)

            # Sparse
            self._sparse = isinstance(contents, SparseArray) 

            # Shape
            if np.array_equal(self.axes.nbins+2, np.shape(contents)):
                # Includes under and overflow

                if self.is_sparse:
                    self._contents = contents
                else:
                    self._contents = np.array(contents) 
                    
            elif np.array_equal(self.axes.nbins, np.shape(contents)):
                # Missing under and overflow, but right shape.
                # Adding empty under/overflow bins
                self._contents = np.pad(contents, 1)

            else:
                raise ValueError("Bins ({}) - contents {} size mismatch".format(self.axes.nbins,np.shape(contents)))
        else:

            self._sparse = sparse

            contents_shape = [n+2 for n in self.axes.nbins]

            if self.is_sparse:
                self._contents = DOK(contents_shape, fill_value = 0.)
            else:
                self._contents = np.zeros(contents_shape)


        # slice does not seems to work well with GCXS, force COO for now
        # Check https://github.com/pydata/sparse/issues/550
        if isinstance(self._contents, GCXS):
            self._contents = self._contents.asformat('coo')

        # Units
        if unit is not None:
            self._unit = u.Unit(unit)
        else:
            self._unit = None
        
        #Check if we'll keep track of the sum of the weights
        # Use same units as main histogram.
        w2units = None
        if self.unit is not None:
            w2units = self.unit*self.unit
        
        if sumw2 is None or sumw2 is False:
            self._sumw2 = None
        elif sumw2 is True:
            self._sumw2 = Histogram(self._axes,
                                    sparse = self.is_sparse,
                                    unit = w2units)
        elif isinstance(sumw2, Histogram):
            if self.axes != sumw2.axes:
                raise ValueError("Is sumw2 is a Histogram is needs to have "
                                 "consistent axes")

            self._sumw2 = sumw2
            
            self._sumw2.to(w2units)
        
        else:
            
            self._sumw2 = Histogram(self._axes, sumw2, unit = w2units)

        if self._sumw2 is not None and self.is_sparse != self._sumw2.is_sparse:
            raise ValueError("Both contents and sumw2 must be either "
                             "sparse or dense")

        # Special access methods
        self.sumw2 = None if self._sumw2 is None else self._get_sumw2(self)
        self.bin_error = self._get_bin_error(self)
        self.slice = self._slice(self)

    @property
    def unit(self):
        return self._unit

[docs]    def to(self, unit, equivalencies=[], update = True, copy = True):
        """
        Convert to other units.

        Args:
            unit (unit-like): Unit to convert to.
            equivalencies (list or tuple): A list of equivalence pairs to try if the units are not
                directly convertible.  
            update (bool): If ``update`` is ``False``, only the units will be changed without
                updating the contents accordingly
            copy (bool): If True (default), then the value is copied. Otherwise, a copy
                will only be made if necessary.
        """

        # Copy
        if copy:
            new = deepcopy(self)
        else:
            new = self
        
        # If no conversion is needed
        if not update:
            if unit is None:
                new._unit = None
            else:
                new._unit = u.Unit(unit)

            if new.sumw2 is not None:
                new._sumw2._unit = unit if unit is None else unit*unit
                
            return new

        # Compute factor
        if new.unit is None:
            
            if unit is None or unit == u.dimensionless_unscaled:
                factor = 1
            else:
                TypeError("Histogram without units")

        else:
            
            factor = new.unit.to(unit, equivalencies = equivalencies)

        # Update values
        new *= factor

        # Update units
        new._unit = unit

        # Update sumw2
        if new._sumw2 is not None:
            new._sumw2 *= factor*factor

            new._sumw2._unit = unit*unit

        return new

    @property
    def is_sparse(self):
        """
        Return True if the underlaying histogram contents are hold in a sparse array. False if it is a dense array.
        """
        return self._sparse

[docs]    def to_dense(self):
        """
        Return a dense representation of a sparse histogram
        """

        h_dense = deepcopy(self)
        
        if h_dense.is_sparse:
            h_dense._contents = self.full_contents.todense()

        if h_dense._sumw2 is not None:
            h_dense._sumw2 = h_dense._sumw2.to_dense()

        h_dense._sparse = False
            
        return h_dense

    # pydata sparse style
    todense = to_dense
    
[docs]    def to_sparse(self):
        """
        Return a sparse representation of a histogram.
        """

        h_sparse = deepcopy(self)
        
        if not h_sparse.is_sparse:
            h_sparse._contents = COO.from_numpy(self.full_contents)

        if h_sparse._sumw2 is not None:
            h_sparse._sumw2 = self._sumw2.to_sparse()

        h_sparse._sparse = True
            
        return h_sparse

    # pydata sparse style
    todense = to_dense

[docs]    @classmethod
    def concatenate(cls, edges, histograms, label = None):
        """
        Generate a Histogram from a list of histograms. The axes of all input
        histograms must be equal, and the new histogram will have one more
        dimension than the input. The new axis has index 0.

        Args:
            edges (Axes or array): Definition of bin edges of the new dimension
            histograms (list of Histogram): List of histogram to fill contents. 
                Might or might not include under/overflow bins.
            labels (str): Label the new dimension

        Return:
            Histogram
        """

        # Check all same axes
        old_axes = histograms[0].axes

        for hist in histograms:
            
            if hist.axes != old_axes:
                raise ValueError("The axes of all input histogram must equal")

        # Check new axis matches number of histograms,
        # with or without under/overflow
        new_axis = Axis(edges, label = label)

        if len(histograms) == new_axis.nbins:
            padding = [(1,1)]+[(0,0)]*old_axes.ndim
        elif len(histograms) == new_axis.nbins + 2:
            padding = [(0,0)]+[(0,0)]*old_axes.ndim
        else:
            raise ValueError("Mismatch between number of bins and "
                             "number of histograms")

        # Create new axes and new contents
        new_axes = Axes([new_axis] + [ax for ax in old_axes])

        # Check all sparse or all dense
        is_sparse = [h.is_sparse for h in histograms]

        if not np.all(is_sparse) and np.any(is_sparse):
            raise ValueError("Cannot concatenate a mix of sparse and dense histograms")

        # Unit conversion factor
        unit = histograms[0].unit

        if unit is None:
            # No factors, just check all histogram are the same
            
            for h in histograms[1:]:
                if h.unit is not None:
                    raise ValueError("Cannot concatenate maps with and without units.")
            
            unit_factor = 1
            
        else:

            unit_factor = [1]

            for h in histograms[1:]:
                if h.unit is None:
                    raise ValueError("Cannot concatenate maps with and without units.")
                else:
                    unit_factor += [h.unit.to(unit)]

            if np.all(unit_factor == 1):
                unit_factor = 1
            
        # Update contents, with padding and units if needed
        if unit_factor == 1:
            contents = np.concatenate([h.full_contents[None] for h in histograms])
        else:
            contents = np.concatenate([factor*h.full_contents[None]
                                       for factor,h in zip(unit_factor,histograms)])

        contents = np.pad(contents, padding)
        
        # New sumw2
        has_sumw2 = [h.sumw2 is not None for h in histograms] 
        if np.all(has_sumw2):

            if unit_factor == 1:
                sumw2 = np.concatenate([h._sumw2.full_contents[None] for h in histograms])
            else:
                sumw2 = np.concatenate([factor*factor*h.sumw2_.full_contents[None]
                                        for factor,h in zip(unit_factor,histograms)])
            
            sumw2 = np.pad(sumw2, padding)

        else:

            sumw2 = None

            if np.any(has_sumw2):
                # A mix. Drop and warn
                logger.warning("Not all input histogram have sum of weights "
                               "squared. sumw2 will be dropped")
        
        return Histogram(new_axes, contents, sumw2, unit = unit)
            
    @property
    def ndim(self):
        return self._axes.ndim
            
[docs]    def clear(self):
        """
        Set all counts to 0
        """

        self[...] = 0

        if self._sumw2 is not None:
            self._sumw2.clear()
                    
    def __eq__(self, other):
        # Histogram is completely defined by axes and contents

        return (self._axes == other._axes
                and
                np.all(self.full_contents == other.full_contents)
                and
                self.unit == other.unit
                and
                self._sumw2 == other._sumw2)

    def __array__(self):

        return np.array(self.contents)

    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):

        if len(inputs) != 1 or inputs[0] is not self:
            raise NotImplementedError(
                "Only numpy functions with a single operand are supported "
                "e.g. np.sin(h), np.sum(h). Call h.contents explicitly to "
                "get the corresponding array e.g. np.add(h.contents, a)")
        
        return ufunc(self.contents, *inputs[1:], **kwargs)
        
    def __array_function__(self, func, types, args, kwargs):

        # Only support basic functions on a single operand like np.sum(h)

        if len(args) != 1 or args[0] is not self:
            raise NotImplementedError(
                "Only numpy functions with a single operand are supported "
                "e.g. np.sin(h), np.sum(h). Call h.contents explicitly to "
                "get the corresponding array e.g. np.add(h.contents, a)")
        
        return func(self.contents, *args[1:], **kwargs)

    class _NBINS():        
        '''
        Convenience class that will expand to the number of bins of a 
        given dimension. 

        The trick is to overload the -/+ operators such than 
        h.end +/- offset (h being an instance of Histogram and 'end' an 
        static instance of Histogram._NBINS) returns an instance of _NBINS 
        itself, which stores the offset. The [] operator can then detect that
        the input is an instance of _NBINS and convert it into an integer with
        respect to the size of the appropiate axis.
        '''
        
        def __init__(self, offset = 0):
            self.offset = offset
    
        def __add__(self, offset):
            return self.__class__(offset)
    
        def __sub__(self, offset):
            return self + (-offset)
        
    end = _NBINS()

    # Convenient aliases
    uf = -1
    of = end
    all = slice(-1, end+1)
    
    def _prepare_indices(self, indices):
        '''
        Prepare indices for it use in __getitem__ and __setitem__ 
        --i.e. [] overloading --
        
        See class help.
        '''
        
        def _prepare_index(index, dim):
            '''
            Modify index for a single axis to account for under/overflow, 
            as well as to catch instances of _NBINS (see description above)
            This depend on the number of bins in the axis of a given 
            dimension (dim)
            '''

            if isinstance(index,slice):

                # Both start and stop can be either None, an instance of
                # _NBINS or an integer
                index = slice(1
                              if index.start is None
                              else
                              self.axes[dim].nbins + index.start.offset + 1
                              if isinstance(index.start, self._NBINS)
                              else
                              index.start+1,
                              
                              self.axes[dim].nbins+1
                              if index.stop is None
                              else
                              self.axes[dim].nbins + index.stop.offset + 1
                              if isinstance(index.stop, self._NBINS)
                              else
                              index.stop+1,
                              
                              index.step)
                
                # Check bounds. Note index is the _contents index at this point
                if index.start < 0 or index.stop > self.axes[dim].nbins + 2:
                    raise IndexError("Bin index out of bounds")
                
                return index
            
            elif isinstance(index, (np.integer, int)):
                if index < -1 or index > self.axes[dim].nbins:
                    raise IndexError("Bin index out of bounds")

                return index+1

            elif isinstance(index, self._NBINS):

                # Referece with respect to nbins
                return _prepare_index(self.axes[dim].nbins + index.offset, dim)
            
            elif isinstance(index, (np.ndarray, list, tuple, range)):

                # Note: this will return a copy, not a view
                    
                # Handle references with respecto to nbins
                index = np.array(index)

                if index.dtype == bool:
                    # Mask. Pad under/overflow

                    return np.pad(index, 1, constant_values = False)
                    
                else:
                    # Indices

                    index_shape = index.shape

                    index = index.flatten()
                    
                    index =  [self.axes[dim].nbins + i.offset + 1
                              if isinstance(i,self._NBINS) else
                              i + 1
                              for i in index]

                    index = np.reshape(index, index_shape)
                    
                    # Check bounds. Note index is the _contents index at this point/
                    if (np.any(index < 0) or
                         np.any(index > self.axes[dim].nbins + 1)):
                        raise IndexError("Bin index out of bounds")
                        
                    return index
                    
            else:
                raise TypeError("Index can only be an int, slice, list or array")
        
        if isinstance(indices, tuple):

            # Get the rest of the dimensions with under/overflow user used ...
            if indices[-1] is Ellipsis:
                extra_indices = tuple(slice(-1, self.axes[dim].nbins+1)
                                      for dim in range(len(indices)-1, self.ndim))

                indices = self._prepare_indices(indices[:-1] + extra_indices)

                return indices
            
            # Standard way. All other ways end up here after recursion
            indices = tuple(_prepare_index(index, dim)
                            for dim,index in enumerate(indices))

            # Remove under/overflow of the rest of the dimensions
            indices += tuple(_prepare_index(slice(None), dim) for dim in
                             range(len(indices), self.ndim))

            return indices

        elif isinstance(indices, dict):

            return self._prepare_indices(self.axes.expand_dict(indices, default = slice(None)))

        else:
            # Single axis
            return self._prepare_indices(tuple([indices]))

    # Access methods
    # DO NOT use _contents directly, all goes through here
    # to handle sparse arrays correctly. Specifically, do not
    # to operations on self._contents (*,+,slice, concatenate, etc.)
    # since DOK is very inneficient. Similarly, COO is readonly,
    # so trying to use __setitem__ on it will raise an error.
    # Assignments to _contents iteself are fine (e.g. self._contents = X)

    def __setitem__(self, indices, new_contents):

        # Make it mutable if needed
        if self.is_sparse:
            self._contents = self._contents.asformat('dok')

        indices = self._prepare_indices(indices)

        try:
            self._contents[indices] = self._strip_units(new_contents)
        except (IndexError, ValueError) as e:

            if self.is_sparse:
                # Check if advance indexing is being used
                for index in indices:
                    if isinstance(index, np.ndarray):
                        logger.warning(
                            "Advance indexing is not yet be fully supported "
                            "for sparse arrays. See "
                            "https://github.com/pydata/sparse/issues/1 and "
                            "https://github.com/pydata/sparse/issues/114")

                        break
            raise e

    @property
    def _getitem_units(self):
        # Usually, if the histogram has units, [] and interp would return units.
        # Unfortunatenly SparseMatrix is not compatible with Quantity, so in sparse
        # mode only the values is returned
        return not (self.unit is None or self.is_sparse)
        
    def __getitem__(self, indices):

        # Go back to efficient read-only if needed
        if self.is_sparse:
            self._contents = self._contents.asformat('coo')

        indices = self._prepare_indices(indices)

        try:
            if self._getitem_units:
                return self._contents[indices]*self.unit
            else:              
                return self._contents[indices]
        except IndexError as e:

            if self.is_sparse:
                # Check if advance indexing is being used
                for index in indices:
                    if isinstance(index, np.ndarray):
                        logger.warning(
                            "Advance indexing is not yet be fully supported "
                            "for sparse arrays. See "
                            "https://github.com/pydata/sparse/issues/1 and "
                            "https://github.com/pydata/sparse/issues/114")

                        break

            raise e

    class _special_getitem:
        """
        This allows to use regular indexing for special access methods. 
        e.g. h.sumw2[] and h.bin_error[]
        """
        
        def __init__(self, hist):
            self._hist = hist

        def __getitem__(self, item):
            raise NotImplmentedError

        def __array__(self):
            return self.contents

        @property
        def contents(self):
            return self[:]
    
        @property
        def full_contents(self):
            return self[...]
        
    class _get_sumw2(_special_getitem):
        """
        Return the sum fo the weights squares. If sumw2 is not stored, then
        it assumed all the weights of all entries equal 1.
        """
        
        def __getitem__(self, item):

            return self._hist._sumw2[item]
            
        def __setitem__(self, indices, new_sumw2):

            if self._hist._sumw2 is None:
                raise ValueError("Histogram does not have sumw2")

            self._hist._sumw2[indices] = new_sumw2
            
    class _get_bin_error(_special_getitem):
        """
        Return the sqrt of sumw2
        """
        
        def __getitem__(self, item):
            if self._hist._sumw2 is not None:
                return np.sqrt(self._hist._sumw2[item])
            else:

                if self._hist._getitem_units:
                    # Fake units
                    return np.sqrt(np.abs(self._hist[item].value))*self._hist.unit
                else:
                    return np.sqrt(np.abs(self._hist[item]))
                
                return np.sqrt(np.abs(self._hist[item]))
                        
        def __setitem__(self, indices, new_bin_error):

            if self._hist._sumw2 is None:
                raise ValueError("Histogram does not have sumw2")

            self._hist._sumw2[indices] = np.power(new_bin_error, 2)

    @property
    def contents(self):
        """
        Equivalent to :code:`h[:]`. Does not include under and overflow bins.
        """
        
        return self[:]
        
    @property
    def full_contents(self):
        """
        Equivalent to :code:`h[...]`. Includes all under and overflow bins.
        """
        
        return self[...]

    @property
    def axes(self):
        """
        Underlaying axes object
        """
        
        return self._axes

    @property
    def axis(self):
        """
        Equivalent to :code:`self.axes[0]`, but fails if :code:`ndim > 1`
        """

        if self.ndim > 1:
            raise ValueError("Property 'axis' can only be used with 1D "
                            "histograms. Use `axes` for multidimensional "
                            "histograms")

        return self.axes[0]

    @axis.setter
    def axis(self, new):

        self.axes[0] = new

        if self._sumw2 is not None:
            self._sumw2.axis = new
        
    @property
    def nbins(self):

        if self.ndim == 1:
            return self.axes[0].nbins
        else:
            return self.axes.nbins
            
[docs]    def interp(self, *values):
        """
        Get a linearly interpolated content for a given set of values
        along each axes. The bin contents are assigned to the center of the bin.
        
        Args:
            values (float or array): Coordinates within the axes to interpolate.
                 Must have the same size as `ndim`. Input values as
                 `(1,2,3)` or `([1,2,3])`

        Return:
            float
        """

        bins,weights = self._axes.interp_weights(*values)

        if self._getitem_units:
            content = 0*self.unit
        else:
            content = 0

        if bins.ndim == 1:
            # Single point case
            
            for bin,weight in zip(bins, weights):
                content += weight*self[bin]

        else:

            # Multi
            
            for bin,weight in zip(bins, weights):

                # Each b can be a tuple
                bin = np.array(bin, ndmin = 1)
                
                bin_values = np.reshape([self[b] for b in bin.flatten()],
                                        bin.shape)

                content += weight*bin_values

        return content

    def find_bin(self, *args, **kwargs):
        return self.axes.find_bin(*args, **kwargs)

    def _strip_units(self, quantity):

        if isinstance(quantity, u.Quantity):

            if quantity.unit == u.dimensionless_unscaled:
                return quantity.value
            
            if self.unit is None:
                return u.UnitConversionError("Histogram without units")

            return quantity.to_value(self.unit)

        elif isinstance(quantity, u.UnitBase):

            if quantity == u.dimensionless_unscaled:
                # Do no crash is self.unit is None
                return 1

            return quantity.to(self.unit)
        
        else:

            if self.unit is not None:
                raise u.UnitConversionError("Specify units")
            
            return quantity

[docs]    def fill(self, *values, weight = None):
        '''
        And an entry to the histogram. Can be weighted.

        Follow same convention as find_bin()

        Args:
            values (float or array): Value of entry
            weight (float): Value weight in histogram. Defaults to 1 in whatever units
                the histogram has
        
        Note:
            Note that weight needs to be specified explicitely by key, otherwise
            it will be considered a value an a IndexError will be thrown.
        '''

        indices = self.find_bin(*values)

        # Standarize is single axis
        if not isinstance(indices, tuple):
            indices = (indices,)
        
        # Allow for some values with different shape but broadcastable
        indices = np.broadcast_arrays(*indices)

        # Convert units if needed
        if weight is None:
            weight = 1
        else:
            weight = self._strip_units(weight)
        
        # Flatten all
        indices = tuple([i.flatten() for i in indices])
        weight = np.array(weight).flatten()
        
        # Make it work with multiple entries at one
        new_axes = tuple(np.arange(np.ndim(indices), 2))
        indices = np.expand_dims(indices, new_axes).transpose()
        weight = np.broadcast_to(weight, indices.shape[0])

        # Work with _contents directly. More efficient. 
        # We don't need all the checks from self._prepare_indices
        # because these come from find_bin. This is faster.
        indices += 1

        if self.is_sparse:

            # Filling a dict directly. Sparse has a lot of overhead checking
            # that the indices are valid in the __setitem__ method, but I'm
            # already checking this with _prepare_indices
            # This is faster than doing self._contents[i] += w
            d_new_contents = {}

            for i,w in zip(indices, weight):
                # Add if already exists, otherwise create new entry
                try:
                    elem = d_new_contents[tuple(i)]
                    elem[0] += w
                    elem[1] += w*w
                except KeyError:
                    d_new_contents[tuple(i)] = [w, w*w]

            # Convert dict to sparse array
            coords = np.transpose(list(d_new_contents.keys()))
            data = np.array(list(d_new_contents.values()))
            
            new_contents = COO(coords = coords, data = data[:,0],
                               shape = self._contents.shape)

            # Add
            self._contents = self._contents.asformat('coo')

            self._contents += new_contents

            if self._sumw2 is not None:
                self._sumw2._contents = self._sumw2._contents.asformat('coo')

                new_contents_sqr = COO(coords = coords, data = data[:,1],
                               shape = self._contents.shape)
                    
                self._sumw2._contents += new_contents_sqr
            
        else:

            # Add directly for dense array
            for i,w in zip(indices, weight):
                self._contents[tuple(i)] += w

            if self._sumw2 is not None:
                for i,w in zip(indices, weight):
                    self._sumw2._contents[tuple(i)] += w*w

[docs]    def project(self, *axis):
        """
        Return a histogram consisting on a projection of the current one

        Args:
            axis (int or str or list): axis or axes onto which the
                histogram will be projected --i.e. will sum up over the
                other dimensiones--. The axes of the new histogram will
                have the same order --i.e. you can transpose axes--

        Return:
            Histogram: Projected histogram
        """
        if self.ndim == 1:
            raise ValueError("Can't project a 1D histogram. "
                             "Consider using np.sum(h) or np.sum(h.full_contents) (with under/overflow)")

        #Standarize
        if len(axis) == 1 and \
           isinstance(axis[0], (list, np.ndarray, range, tuple)):
            # Got a sequence
            axis = axis[0]
        
        axis = self._axes.label_to_index(axis)
        
        if len(np.unique(axis)) != len(axis):
            raise ValueError("An axis can't repeat")

        # Project
        sum_axes = tuple(dim for dim in range(0, self.ndim) if dim not in axis)

        if len(sum_axes) == 0:
            new_contents = deepcopy(self.full_contents)
        else:
            new_contents = self.full_contents.sum(axis = sum_axes)

        # Transpose the contents to match the order of the axis provided by the
        # the user, which are currently sorted
        new_contents = np.transpose(new_contents,
                                    axes = np.argsort(np.argsort(axis)))

        new_sumw2 = None
        if self._sumw2 is not None:
            new_sumw2 = self._sumw2.project(axis).full_contents
                
        return Histogram(edges = self._axes[axis],
                         contents = new_contents,
                         sumw2 = new_sumw2,
                         unit = self.unit)

[docs]    def clear_overflow(self, axes = None):
        """
        Set all overflow bins to 0, including sumw2

        Args:
            axes (None or array): Axes number or labels. All by default
        """
        if axes is None:
            axes = range(self.ndim)
        elif np.isscalar(axes):
            axes = [axes]
        
        for n in axes:
            indices = self.expand_dict({n:self.of}, self.all)
            self[indices] = 0

        if self._sumw2 is not None:
            self._sumw2.clear_overflow(axes)
        
[docs]    def clear_underflow(self, axes = None):
        """
        Set all overflow bins to 0, including sumw2

        Args:
            axes (None or array): Axes number or labels. All by default
        """
        if axes is None:
            axes = range(self.ndim)
        elif np.isscalar(axes):
            axes = [axes]

        for n in axes:
            indices = self.expand_dict({n:self.uf}, self.all)
            self[indices] = 0

        if self._sumw2 is not None:
            self._sumw2.clear_underflow(axes)        

[docs]    def clear_underflow_and_overflow(self, *args, **kwargs):
        """
        Set all underflow and overflow bins to 0, including sumw2

        Args:
            axes (None or array): Axes number or labels. All by default
        """

        self.clear_underflow(*args, **kwargs)
        self.clear_overflow(*args, **kwargs)        
            
    class _slice:
        """
        Return a histogram which is a slice of the current one
        """

        def __init__(self, hist):
            self._hist = hist

        def __getitem__(self, item):

            # Standarize indices into slices
            indices = self._hist._prepare_indices(item)
            indices = tuple(slice(i,i+1) if isinstance(i, (int, np.integer)) else i for i in indices)

            # Check indices and either pad or use under/overflor

            # If under/overflow are including, will use them instead of pads
            padding = np.ones([self._hist.ndim, 2], dtype = int)

            axes_indices = np.empty(self._hist.ndim, dtype = 'O')
            new_nbins = np.empty(self._hist.ndim, dtype = int)
            
            for ndim,(index, axis) in enumerate(zip(indices,
                                                    self._hist.axes)):

                # Sanity checks
                if not isinstance(index, slice):

                    raise TypeError("Only slices and integers allowed in slice[]")
               
                start,stop,stride = index.indices(axis.nbins+2)

                if stride != 1:
                    raise ValueError("Step must be 1 when getting a slice."
                                     "Alertnatively us rebin().")

                if start == stop:
                    raise ValueError("Slice must have a least one bin in "
                                     "all dimensions. Alternatively use project().")

                if start > stop:
                    raise ValueError("Slices cannot reverse the bin order.")

                # Handle under/overflow

                axis_start = start # Axes don't have under/overflow
                axis_stop = stop
                
                if start == 0:

                    if stop-start == 1:
                        raise ValueError("The slice cannot contain only the "
                                         "underflow bin")

                    axis_start += 1
                    
                    padding[ndim, 0] = 0

                    
                if stop == axis.nbins + 2:

                    if stop-start == 1:
                        raise ValueError("The slice cannot contain only the "
                                         "overflow bin")

                    axis_stop -= 1

                    padding[ndim, 1] = 0                        

                    
                axes_indices[ndim] = slice(axis_start, axis_stop)
                new_nbins[ndim] = stop-start
                
            axes_indices = tuple(axes_indices)
            # Get new contents. Pad is underflow/overflow are not included
            new_contents = self._hist.full_contents[indices]
            new_contents = np.pad(new_contents, padding)
            
            # New Axes
            new_axes = []

            for axis,index in zip(self._hist.axes,axes_indices):

                new_axis_kw =  {'edges': axis.edges[index.start-1:index.stop],
                                'label': axis.label}
                
                if isinstance(axis, HealpixAxis):
                    new_axis_kw['nside'] = axis.nside
                    new_axis_kw['scheme'] = axis.scheme
                    new_axis_kw['coordsys'] = axis.coordsys
                else:
                    new_axis_kw['scale'] = axis.axis_scale

                new_axes += [axis.__class__(**new_axis_kw)]
                
            # Sum weights squared
            new_sumw2 = None
            if self._hist._sumw2 is not None:
                new_sumw2 = self._hist._sumw2.slice[item]

            # Create new histogram
            return Histogram(edges = new_axes,
                             contents = new_contents,
                             sumw2 = new_sumw2,
                             unit = self._hist.unit)

    def _unit_operation(self, other, operation):
        """
        Get the value part of the other operand and the new unit of the histogram
        """

        # Separate between value and unit
        if isinstance(other, Histogram):
            other_unit = other.unit
            other_value = other # It will be handled as a regular HealpixMap
        elif isinstance(other, u.Quantity):
            other_unit = other.unit
            other_value = other.value
        elif isinstance(other, u.UnitBase):
            other_unit = other
            other_value = 1
        else:
            # float, int, array, list
            other_unit = None
            other_value = np.array(other)

        if self.unit is None and other_unit is  None:
            # If neither operand have units, do nothing else
            return other_value, None

        # Adjust other_value and self.unit depending on the operand

        # Standarize dimensionless
        if other_unit is None:
            other_unit = u.dimensionless_unscaled

        old_unit = self.unit
        if old_unit is None:
            old_unit = u.dimensionless_unscaled
            
        # For * and / the conversion factor is stored in the unit itself
        # ** only accepts scalar dimensionaless quantities, it will crash anyway
        # The idencity operator (for the raterizer) doesn't use the other's units
        if operation in [operator.add, operator.iadd,
                         operator.sub, operator.isub]:
            
            # +, -
            # We need to correct the value by the conversion unit
            # No change in units
            other_value = other_value * other_unit.to(old_unit)
            new_unit = old_unit
            
        elif operation in [operator.mul, operator.imul,
                           operator.truediv, operator.itruediv,
                           operator.floordiv, operator.ifloordiv]:

            # *, /
            # The conversion factor is stored in the unit itself
            new_unit = operation(old_unit, other_unit)
                
        else: 

            raise ValueError("Operation not supported")
            
        return other_value, new_unit

    
    def _ioperation(self, other, operation):

        sum_operation = operation in [operator.isub, operator.iadd,
                                      operator.sub,  operator.add]

        product_operation = operation in [operator.imul, operator.itruediv,
                                          operator.mul, operator.truediv]

        # Get the value part of other and the new unit the result should have
        other, new_unit = self._unit_operation(other, operation) 

        # Temporarily disable units
        self.to(None, update = False, copy = False)
        
        if isinstance(other, Histogram):

            # Temporarily disable units
            other_unit = other._unit
            other.to(None, update = False, copy = False)
            
            # Another histogram, same axes
            
            if self.axes != other.axes:
                raise ValueError("Axes mismatch")
            
            new_contents = operation(self.full_contents, other.full_contents)

            if self._sumw2 is not None or other._sumw2 is not None:

                if self._sumw2 is None or other._sumw2 is None:
                    logger.warning("Operation between histograms with and "
                                   "without sumw2. Using default.")
                
                if sum_operation:

                    self._sumw2._contents = self.sumw2.full_contents + other.sumw2.full_contents

                elif product_operation:

                    # Error of either f = A*B or f = A/B is
                    # f_err^2 = f^2 * ((A_err/A)^2 + (B_err/B)^2)
                    
                    relvar = (self.sumw2.full_contents/
                              (self.full_contents*self.full_contents))

                    other_relvar = (other.sumw2.full_contents/
                                    (other.full_contents*other.full_contents))
                    
                    self._sumw2._contents = (new_contents*new_contents*
                                             (relvar + other_relvar))

                else:
                    
                    raise ValueError("Operation not supported")

            self._contents = new_contents
            
            # Restore units
            other.to(other_unit, update = False, copy = False)

        else:

            # By scalar or array. Can be broadcasted

            if np.ndim(other) != 0:

                # Array. With or without under/overflow bins
                if not isinstance(other, (np.ndarray, SparseArray)):
                    other = np.array(other)

                if other.ndim != self.ndim:
                    raise ValueError("Operand number of dimensions ({}) does not"
                                     "match number of axes ({})".format(other.ndim,
                                                                        self.ndim))
                
                pad = []

                for n,(length,nbins) in enumerate(zip(other.shape, self.axes.nbins)):

                    if length == 1 or length == nbins+2:
                        # Single element axis can be broadcasted as is
                        # OR array includes under/overflow
                        pad += [(0,)]
                    elif length == nbins:
                        # Missing under and overflow, but right shape.
                        pad += [(1,)]
                    else:
                        raise ValueError("Could not broadcast array of shape {} "
                                         "to histogram with nbins "
                                         "{}".format(other.shape,
                                                     self.axes.nbins))

                other = np.pad(other, pad)
                                    
            self._contents = operation(self.full_contents, other)

            if self._sumw2 is not None:

                if sum_operation:

                    # sumw2 remains constant if summing/substracting a constant
                    pass
                    
                elif product_operation:
                    
                    self._sumw2 = operation(operation(self._sumw2, other), other)
                    
                else:
                    
                    raise ValueError("Operation not supported")

        # Restore unit
        self.to(new_unit, update = False, copy = False)
        
        return self

    def _operation(self, other, operation):

        new = deepcopy(self)

        new._ioperation(other, operation)

        return new
    
    def __imul__(self, other):

        return self._ioperation(other, operator.imul)
                
    def __mul__(self, other):

        return self._operation(other, operator.mul)

    def __rmul__(self, other):

        return self*other

    def __itruediv__(self, other):

        return self._ioperation(other, operator.itruediv)

    def __truediv__(self, other):

        return self._operation(other, operator.truediv)

    def __rtruediv__(self, other):
        """
        Divide a scalar by the histogram
        """

        if not np.isscalar(other):
            raise ValueError("Inverse operation can only occur between "
                             "histograms or a histogram and a scalar")

        new = deepcopy(self)

        # Simple change of unit in this case. Other can't be Quantity or Unit
        new_unit = None
        new_w2unit = None
        
        if new.unit is not None:
            new_unit = (1/new.unit).unit
            new_w2unit = new_unit**2

        # Temporarily disable unit
        new.to(None, update = False, copy = False)
        
        # Error propagtion of f = b/A (where b is constant, no error) is:
        # f_err^2 = f^2 (A_err/A)^2

        if new._sumw2 is not None:
            new._sumw2._contents = (new._sumw2.full_contents * other*other /
                                    np.power(new.full_contents, 4))

        new._contents = other/new.full_contents

        # Set units
        new.to(new_unit, update = False, copy = False)
        
        return new
    
    def __iadd__(self, other):

        return self._ioperation(other, operator.iadd)
            
    def __add__(self, other):

        return self._operation(other, operator.add)

    def __radd__(self, other):

        return self + other

    def __neg__(self):

        new = deepcopy(self)

        new._contents = -1*new.full_contents

        # No change to sumw2 or units

        return new
    
    def __isub__(self, other):

        return self._ioperation(other, operator.isub)
            
    def __sub__(self, other):

        return self._operation(other, operator.sub)

    def __rsub__(self, other):

        return -self + other

    def _comparison_operator(self, other, operation):

        if not self._getitem_units:
            other = self._strip_units(other)

        return operation(self.contents, other)
    
    def __lt__(self, other):
        return self._comparison_operator(other, operator.lt)
    
    def __le__(self, other):
        return self._comparison_operator(other, operator.le)
    
    def __gt__(self, other):
        return self._comparison_operator(other, operator.gt)
    
    def __ge__(self, other):
        return self._comparison_operator(other, operator.ge)
    
[docs]    def expand_dims(self, *args, **kwargs):
        """
        Same as h.axes.expand_dims().
        """
        return self._axes.expand_dims(*args, **kwargs)
    
[docs]    def broadcast(self, *args, **kwargs):
        """
        Same as h.axes.broadcast().
        """
        return self._axes.broadcast(*args, **kwargs)
    
[docs]    def expand_dict(self, *args, **kwargs):
        """
        Same as h.axes.expand_dict().
        """
        return self._axes.expand_dict(*args, **kwargs)

[docs]    def rebin(self, *ngroup):

        """
        Rebin the histogram by grouping multiple bins into a single one.

        Args:
            ngroup (int or array): Number of bins that will be combined for each 
                bin of the output. Can be a single number or a different number
                per axis. A number <0 indicates that the bins will start to be
                combined starting from the last one.

        Return:
            Histogram
        """

        # === Contents ===

        # New number of bins.
        ngroup = np.squeeze(ngroup)
        ngroup = np.broadcast_to(np.abs(ngroup), (self.ndim))
        ngroup_sign = np.sign(ngroup)
        
        new_nbins = np.floor(self.axes.nbins / ngroup).astype(int)

        if any(ngroup == 0):
            raise ValueError("ngroup cannot be 0")
        
        # Add padding do under/overflow bins match ngroup including "leftover bins"
        padding =  [(ngroup_i - 1,
                    ngroup_i - (1 + nbins_i - new_nbins_i * ngroup_i))
                    for ngroup_i, nbins_i, new_nbins_i
                    in zip(ngroup, self.axes.nbins, new_nbins)]

        # Rever direction if needed
        padding = [pad[::direction]
                   for pad,direction
                   in zip(padding,ngroup_sign)]
        
        new_contents = np.pad(self.full_contents, padding)

        # Sum every ngroup elements by reshaping first
        new_shape = np.empty(2*self.ndim, dtype = int)
        new_shape[0::2] = new_nbins+2
        new_shape[1::2] = ngroup
        new_shape = tuple(new_shape)
        sum_axis = tuple(np.arange(1,2*self.ndim,2))
        new_contents = np.sum(new_contents.reshape(new_shape), axis = sum_axis)

        # === Adjust edges ===
        new_axes = []
        for axis,ngroup_i,ngroup_sign_i in zip(self._axes, ngroup, ngroup_sign):

            # Very ngroup-th edge
            new_edges = axis.edges[::ngroup_i * ngroup_sign_i][::ngroup_sign_i]

            # New axis keeping properties
            new_axes += [Axis(new_edges,
                              label = axis.label,
                              scale = axis.axis_scale)]
            
        # === Sum weights square ===
        new_sumw2 = None
        
        if self._sumw2 is not None:
            new_sumw2 = self._sumw2.rebin(ngroup * ngroup_sign)

        # === New histogram ===
        return Histogram(new_axes, new_contents, new_sumw2, unit = self.unit)

[docs]    def plot(self, ax = None,
             errorbars = None,
             colorbar = True,
             label_axes = True,
             **kwargs):
        """
        Quick plot of the histogram contents. 

        Under/overflow bins are not included. Only 1D and 2D histograms 
        are supported.

        Histogram with a HealpixAxis will automatically be plotted 
        as a map, passing all kwargs to mhealpy's HealpixMap.plot()

        Args:
            ax (matplotlib.axes): Axes on where to draw the histogram. A new 
                one will be created by default.
            errorbars (bool or None): Include errorbar for 1D histograms. The default is to
                plot them if sumw2 is available
            colorbar (bool): Draw colorbar in 2D plots
            label_axes (bool): Label plots axes. Histogram axes must be labeled.
            **kwargs: Passed to `matplotlib.errorbar()` (1D) or 
                `matplotlib.pcolormesh` (2D)
        """

        # Matplotlib errorbar and pcolormesh need regular array
        contents = self.contents
        if self.is_sparse:
            contents = self.contents.todense()

        if self._getitem_units:
            contents = self._strip_units(contents)

        # Handle the special case of a healpix axis
        if self.ndim == 1 and isinstance(self.axis, HealpixAxis):

            # Pad in case it is partial map
            contents = np.pad(contents,
                              (self.axis.lo_lim,
                               self.axis.npix - self.axis.hi_lim))

            m = HealpixMap(data = contents, base = self.axis)

            args = ()
            if ax is not None:
                args = (ax,)
                
            plot, ax = m.plot(*args,
                              cbar = colorbar,
                              **kwargs)

            return ax, plot            
            
        # Default errorbars
        if errorbars is None:
            if self.sumw2 is None:
                errorbars = False
            else:
                errorbars = True
                
        # Create axes if needed (with labels)
        if ax is None:
            fig,ax = plt.subplots()

        # Plot, depending on number of dimensions
        if self.ndim == 1:

            # We have two points per bin (lower edge+center), and 2 extra
            # point for under/overflow (these currently don't have errorbar,
            # they looked bad)
            if self.axis.unit is None:
                xdata = np.empty(2*self.nbins + 2)
            else:
                xdata = np.empty(2*self.nbins + 2)*self.axis.unit
                
            xdata[0] = self.axis.edges[0] # For underflow, first edge
            xdata[1::2] = self.axis.edges # In between edges. Last edge for overflow
            xdata[2::2] = self.axis.centers # For markers

            underflow = self[-1]
            overflow = self[self.nbins]

            if self._getitem_units:
                underflow = self._strip_units(underflow)
                overflow = self._strip_units(overflow)
            
            ydata = np.concatenate(([underflow],
                                    np.repeat(contents, 2),
                                    [overflow]))

            # Style
            drawstyle = kwargs.pop('drawstyle', 'steps-post')

            # Error bars
            yerr = None

            if errorbars:

                errors = self.bin_error.contents
                if self.is_sparse:
                    # No auto densify
                    errors = errors.todense()

                yerr = np.empty(2*self.nbins + 2)
                yerr[2::2] = errors
                yerr[0] = None # No underflow errorbar, looked bad
                yerr[1::2] = None # No overflow errorbar, looked bad
                
            # Plot
            plot = ax.errorbar(u.Quantity(xdata).value,
                               ydata,
                               yerr = yerr,
                               drawstyle = drawstyle,
                               **kwargs)

            # Label axes
            if label_axes:
                ax.set_xlabel(self.axis.label_with_unit)                

                if self.unit not in [None, u.dimensionless_unscaled]:
                    ax.set_ylabel(f"[{self.unit}]")
                
        elif self.ndim == 2:

            # No under/overflow
            plot = ax.pcolormesh(u.Quantity(self.axes[0].edges).value,
                                 u.Quantity(self.axes[1].edges).value,
                                 np.transpose(contents),
                                 **kwargs)

            if label_axes:
                ax.set_xlabel(self.axes[0].label_with_unit)
                ax.set_ylabel(self.axes[1].label_with_unit)

            if colorbar:

                cax = ax.get_figure().colorbar(plot, ax = ax)

                if self.unit not in [None, u.dimensionless_unscaled]:
                    cax.set_label(f"[{self.unit}]")
                    
        else:

            raise ValueError("Plotting only available for 1D and 2D histograms")
            
        if self.axes[0].axis_scale == 'log':
                ax.set_xscale('log')

        if self.ndim > 1:
            if self.axes[1].axis_scale == 'log':
                ax.set_yscale('log')
                
        return ax,plot

    draw = plot
    
[docs]    def fit(self, f, lo_lim = None, hi_lim = None, **kwargs):
        """
        Fit histogram data using least squares.

        This is a convenient call to scipy.optimize.curve_fit. Sigma corresponds
        to the output of `h.bin_error`. Empty bins (e.g. error equals 0) are 
        ignored

        Args:
            f (callable): Function f(x),... that takes the independent variable 
                x as first argument, and followed by the parameters to be fitted.
                For a k-dimensional histogram is should handle arrays of shape 
                (k,) or (k,N).
            lo_lim (float or array): Low axis limit to fit. One value per axis.
            lo_lim (float or array): High axis limit to fit. One value per axis.
            **kwargs: Passed to scipy.optimize.curve_fit
        """
        
        # Sanity checks
        for axis,lo,hi in  zip(self.axes,
                          np.broadcast_to(lo_lim, self.ndim),
                          np.broadcast_to(hi_lim, self.ndim)):

            if ((lo is not None and lo < axis.lo_lim) or
                (hi is not None and hi >= axis.hi_lim)):
                raise ValueError("Fit limits out of bounds")

        # Get bins that correspond to the fit limits
        lim_bins = tuple([slice(None if lo is None else axis.find_bin(lo),
                                None if hi is None else axis.find_bin(hi))
                          for axis,lo,hi
                          in zip(self.axes,
                                 np.broadcast_to(lo_lim, self.ndim),
                                 np.broadcast_to(hi_lim, self.ndim))])

        # Get data to fit
        x = [axis.centers[bins] for axis,bins in zip(self.axes,lim_bins)]
        x = np.meshgrid(*x, indexing='ij') # For multi-dimensional histograms
        y = self[lim_bins]
        sigma = self.bin_error[lim_bins]

        if self._getitem_units:
            y = y.value
        
        if self.is_sparse:
            y = y.todense()
            sigma = sigma.todense()
        
        # Ignore empty bins
        non_empty = sigma != 0 
        x = [centers[non_empty] for centers in x]
        y = y[non_empty]
        sigma = sigma[non_empty]

        # Flat matrices
        if self.ndim > 1:
            x = [centers.flatten() for centers in x]
            y = y.flatten()
            sigma = sigma.flatten()
        else:
            x = x[0]

        # Sanity checks
        if len(x) < len(signature(f).parameters)-1:
            raise RuntimeError("Less bins within limits than parameters to fit.")
            
        # Actual fit with scipy
        return curve_fit(f, x, y, sigma = sigma, **kwargs)
    
[docs]    def write(self, filename, name = "hist", overwrite = False):
        """
        Write histogram to disk.

        It will be save as a group in a HDF5 file. Appended if the file already 
        exists.

        Args:
            filename (str): Path to file
            name (str): Name of group to save histogram (can be any HDF5 path)
            overwrite (str): Delete and overwrite group if already exists.
        """

        with h5.File(filename, 'a') as f:

            # Will fail on existing group by default
            if name in f:
                if overwrite:
                    del f[name]
                else:
                    raise ValueError("Unable to write histogram. Another group "
                                     "with the same name already exists. Choose "
                                     "a different name or use overwrite")

            # Contents
            hist_group = f.create_group(name)

            if self.unit is not None:
                hist_group.attrs['unit'] = str(self.unit)
                
            if self.is_sparse:

                hist_group.attrs['format'] = 'coo'

                contents_group = hist_group.create_group('contents')

                contents = self._contents.asformat('coo')
                
                contents_group.create_dataset('coords',
                                              data = contents.coords,
                                              compression = "gzip")
                contents_group.create_dataset('data',
                                              data = contents.data,
                                              compression = "gzip")
                contents_group.create_dataset('shape', data = contents.shape)
                contents_group.create_dataset('fill_value', data = contents.fill_value)

                if self._sumw2 is not None:

                    sumw2_group = hist_group.create_group('sumw2')

                    sumw2_contents = self._sumw2._contents.asformat('coo')
                    
                    sumw2_group.create_dataset('coords',
                                               data = sumw2_contents.coords,
                                               compression = "gzip")
                    sumw2_group.create_dataset('data',
                                               data = sumw2_contents.data,
                                               compression ="gzip")
                    sumw2_group.create_dataset('shape',
                                              data = sumw2_contents.shape)
                    sumw2_group.create_dataset('fill_value',
                                              data = sumw2_contents.fill_value)

            else:
                    
                hist_group.attrs['format'] = 'dense'
                
                hist_group.create_dataset('contents', data = self._contents)

                if self._sumw2 is not None:
                    hist_group.create_dataset('sumw2', data = self._sumw2._contents)

            # Axes. Each one is a data set with attributes
            axes_group = hist_group.create_group('axes', track_order = True)

            for i,axis in enumerate(self.axes):

                axis._write(axes_group, str(i))
                
[docs]    @classmethod
    def open(cls, filename, name = 'hist'):
        """
        Read histogram from disk.

        Args:
            filename (str): Path to file
            name (str): Name of group where the histogram was saved.
        """

        with h5.File(filename, 'r') as f:

            hist_group = f[name]

            unit = None
            if 'unit' in hist_group.attrs:
                unit = u.Unit(hist_group.attrs['unit'])
                
            # Contents
            # Backwards compatible before sparse was supported
            if ('format' not in hist_group.attrs or
                hist_group.attrs['format'] == 'dense'):
            
                contents = np.array(hist_group['contents'])

                sumw2 = None
                if 'sumw2' in hist_group:
                    sumw2 = np.array(hist_group['sumw2'])

            elif hist_group.attrs['format'] == 'gcxs':

                contents_group = hist_group['contents']

                compressed_axes = None
                if 'compressed_axes' in contents_group:
                    compressed_axes = np.array(contents_group['compressed_axes'])
                
                contents = GCXS((np.array(contents_group['data']),
                                 np.array(contents_group['indices']),
                                 np.array(contents_group['indptr'])),
                                compressed_axes = compressed_axes,
                                shape = tuple(contents_group['shape']),
                                fill_value = np.array(contents_group['fill_value']).item())

                sumw2 = None
                if 'sumw2' in hist_group:
                    sumw2_group = hist_group['sumw2']
                
                    compressed_axes = None
                    if 'compressed_axes' in sumw2_group:
                        compressed_axes = np.array(sumw2_group['compressed_axes'])
                        
                    sumw2 = GCXS((np.array(sumw2_group['data']),
                                  np.array(sumw2_group['indices']),
                                  np.array(sumw2_group['indptr'])),
                                 compressed_axes = compressed_axes,
                                 shape = tuple(sumw2_group['shape']),
                                 fill_value = np.array(sumw2_group['fill_value']).item())
                    
            elif hist_group.attrs['format'] == 'coo':

                contents_group = hist_group['contents']
                
                contents = COO(coords = np.array(contents_group['coords']),
                               data = np.array(contents_group['data']),
                               shape = tuple(contents_group['shape']),
                               fill_value = np.array(contents_group['fill_value']).item())

                sumw2 = None
                if 'sumw2' in hist_group:
                    sumw2_group = hist_group['sumw2']
                
                    sumw2 = COO(coords = np.array(sumw2_group['coords']),
                                data = np.array(sumw2_group['data']),
                                shape = tuple(sumw2_group['shape']),
                                fill_value = np.array(sumw2_group['fill_value']).item())

            else:
                raise IOError(f"Format {hist_group.attrs['format']} unknown.")
                    
            # Axes
            axes_group = hist_group['axes']

            axes = []
            for axis in axes_group.values():

                # Get class. Backwards compatible with version
                # with only Axis
                axis_cls = Axis

                if '__class__' in axis.attrs:
                    class_module, class_name = axis.attrs['__class__']
                    axis_cls = getattr(sys.modules[class_module], class_name)

                axes += [axis_cls._open(axis)]
                
        return Histogram(axes, contents = contents, sumw2 = sumw2, unit = unit)