Source code for plothist.histogramming

from __future__ import annotations

import warnings
from collections.abc import Callable, Sequence

import boost_histogram as bh
import numpy as np


# Define a custom warning for range issues
[docs] class RangeWarning(Warning): pass
# Always show the range warnings warnings.filterwarnings("always", category=RangeWarning)
[docs] def create_axis( bins: int | list[float] | np.ndarray, range: tuple[float | str, float | str] | None = None, data: list[float] | np.ndarray | None = None, overflow: bool = False, underflow: bool = False, ) -> bh.axis.Regular | bh.axis.Variable: """ Create an axis object for histogram binning based on the input data and parameters. Parameters ---------- bins : int or list[float] The number of bins or bin edges for the axis. range : None or tuple[float | str, float | str], optional The range of the axis. If None, it will be determined based on the data. Default is None. data : list[float] or np.ndarray, optional The input data for determining the axis range. Default is None. overflow : bool, optional Whether to include an overflow bin. If False, the upper edge of the last bin is inclusive. Default is False. underflow : bool, optional Whether to include an underflow bin. Default is False. Returns ------- Axis object An axis object for histogram binning. The axis type is determined based on the input parameters. If bins is an integer, a regular axis is returned. If bins is an array-like, a variable axis is returned. Raises ------ ValueError If the range parameter is invalid or not finite. ValueError If the number of bins is not positive. ValueError If the range parameter contains "min" or "max" but the data is empty. """ if data is None: data = np.array([]) is_variable_bins = isinstance(bins, (list, np.ndarray)) if is_variable_bins: if range is not None: warnings.warn( f"Custom binning -> ignore supplied range ({range}).", stacklevel=2 ) return bh.axis.Variable(bins, underflow=underflow, overflow=overflow) if isinstance(bins, int) and bins <= 0: raise ValueError(f"Number of bins must be positive, but got {bins}.") # Inspired from np.histograms if range is not None: if len(data) == 0 and (range[0] == "min" or range[1] == "max"): raise ValueError( "Cannot use 'min'/'max' range values with empty data. " "Please supply a range or provide data." ) x_min = min(data) if range[0] == "min" else float(range[0]) x_max = max(data) if range[1] == "max" else float(range[1]) if x_min > x_max: raise ValueError( f"Range of [{x_min}, {x_max}] is not valid. Max must be larger than min." ) if not (np.isfinite(x_min) and np.isfinite(x_max)): raise ValueError(f"Range of [{x_min}, {x_max}] is not finite.") elif len(data) == 0: # handle empty arrays. Can't determine range, so use 0-1. x_min, x_max = 0.0, 1.0 else: x_min = float(min(data)) x_max = float(max(data)) if not (np.isfinite(x_min) and np.isfinite(x_max)): raise ValueError(f"Autodetected range of [{x_min}, {x_max}] is not finite.") # expand empty range to avoid divide by zero if x_min == x_max: x_min = x_min - 0.5 x_max = x_max + 0.5 return bh.axis.Regular(bins, x_min, x_max, underflow=underflow, overflow=overflow)
[docs] def make_hist( data: list[float] | np.ndarray | None = None, bins: int | list[float] | np.ndarray = 50, range: tuple[float | str, float | str] | None = None, weights: float | list[float] | np.ndarray = 1, mute_warning: bool = False, ) -> bh.Histogram: """ Create a histogram object and fill it with the provided data. Parameters ---------- data : list[float] or np.ndarray, optional 1D array-like data used to fill the histogram (default is None). If None is provided, an empty histogram is returned. bins : int or list[float], optional Binning specification for the histogram (default is 50). If an integer, it represents the number of bins. If a list, it should be the explicit list of all bin edges. range : tuple[float | str, float | str], optional The range of values to consider for the histogram bins (default is None). If None, the range is determined from the data. weights : float or list[float] or np.ndarray, optional Weight(s) to apply to the data points (default is 1). If a float, a single weight is applied to all data points. If an array-like, weights are applied element-wise. mute_warning : bool, optional Whether to mute warnings about data outside the binning range (default is False). Returns ------- histogram : bh.Histogram The filled histogram object. Warns ----- RangeWarning If more than 1% of the data is outside of the binning range. """ if data is None: data = np.array([]) axis = create_axis(bins, range, data) h = bh.Histogram(axis, storage=bh.storage.Weight()) if len(data) > 0: h.fill(data, weight=weights, threads=0) # Check what proportion of the data outside of the binning range n_data = ( len(data) * weights if isinstance(weights, (int, float)) else np.sum(np.asarray(weights)) ) range_coverage = h.sum().value / n_data # Issue a warning if more than 1% of the data is outside of the binning range if range_coverage < 0.99 and not mute_warning: warnings.warn( f"Only {100 * range_coverage:.2f}% of data contained in the binning range [{axis.edges[0]}, {axis.edges[-1]}].", category=RangeWarning, stacklevel=2, ) return h
[docs] def make_2d_hist( data: list[np.ndarray] | np.ndarray | None = None, bins: Sequence[int | Sequence[float]] | None = None, range: tuple[ tuple[float | str, float | str] | None, tuple[float | str, float | str] | None ] = (None, None), weights: float | list[float] | np.ndarray = 1, mute_warning: bool = False, ) -> bh.Histogram: """ Create a 2D histogram object and fill it with the provided data. Parameters ---------- data : list[np.ndarray] or np.ndarray, optional 2D array-like data used to fill the histogram (default is None). If None is provided, an empty histogram is returned. bins : Sequence[int | Sequence[float]], optional Binning specification for each dimension of the histogram (if None, it will be set to [50, 50]). Each element of the tuple represents the number of bins for the corresponding dimension. Also support explicit bin edges specification (for non-constant bin size). range : tuple[tuple[float | str, float | str] | None, tuple[float | str, float | str] | None], optional The range of values to consider for each dimension of the histogram (default is (None, None)). If None, the range is determined from the data for that dimension. The tuple should have the same length as the data. weights : float or list[float] or np.ndarray, optional Weight(s) to apply to the data points (default is 1). If a float, a single weight is applied to all data points. If an array-like, weights are applied element-wise. mute_warning : bool, optional Whether to mute warnings about data outside the binning range (default is False). Returns ------- histogram : bh.Histogram The filled 2D histogram object. Raises ------ ValueError If the data does not have two components or if the lengths of x and y are not equal. Warns ----- RangeWarning If more than 1% of the data is outside of the binning range. """ if data is None: data = np.array([[], []]) if len(data) != 2: raise ValueError("data should have two components, x and y") if len(data[0]) != len(data[1]): raise ValueError("x and y must have the same length.") if bins is None: bins = [50, 50] x_axis = create_axis(bins[0], range[0], data[0]) y_axis = create_axis(bins[1], range[1], data[1]) h = bh.Histogram( x_axis, y_axis, storage=bh.storage.Weight(), ) if len(data[0]) > 0: h.fill(*data, weight=weights, threads=0) # Check what proportion of the data outside of the binning range n_data = ( len(data[0]) * weights if isinstance(weights, (int, float)) else np.sum(np.asarray(weights)) ) range_coverage = h.sum().value / n_data # Issue a warning if more than 1% of the data is outside of the binning range if range_coverage < 0.99 and not mute_warning: warnings.warn( f"Only {100 * range_coverage:.2f}% of data contained in the binning range ([{x_axis.edges[0]}, {x_axis.edges[-1]}], [{y_axis.edges[0]}, {y_axis.edges[-1]}]).", category=RangeWarning, stacklevel=2, ) return h
def _check_counting_histogram(hist: bh.Histogram) -> None: """ Check that the histogram is a counting histogram. Parameters ---------- hist : bh.Histogram The histogram to check. Raise ----- ValueError If the histogram is not a counting histogram. """ if hist.kind != bh.Kind.COUNT: raise ValueError( f"The histogram must be a counting histogram, but the input histogram has kind {hist.kind}." ) def _make_hist_from_function( func: Callable[[np.ndarray], np.ndarray], ref_hist: bh.Histogram ) -> bh.Histogram: """ Create a histogram from a function and a reference histogram. The returned histogram has the same binning as the reference histogram and is filled with the function evaluated at the bin centers of the reference histogram. Parameters ---------- func : Callable[[np.ndarray], np.ndarray] 1D function. The function should support vectorization (i.e. accept a numpy array as input). ref_hist : bh.Histogram The reference 1D histogram to use for the binning. Returns ------- hist : bh.Histogram The histogram filled with the function. Raises ------ ValueError If the reference histogram is not 1D. """ if len(ref_hist.axes) != 1: raise ValueError("The reference histogram must be 1D.") hist = bh.Histogram(ref_hist.axes[0], storage=bh.storage.Weight()) hist[:] = np.c_[ func(ref_hist.axes[0].centers), np.zeros_like(ref_hist.axes[0].centers) ] return hist
[docs] def flatten_2d_hist(hist: bh.Histogram) -> bh.Histogram: """ Flatten a 2D histogram into a 1D histogram. Parameters ---------- hist : bh.Histogram The 2D histogram to be flattened. Returns ------- bh.Histogram The flattened 1D histogram. Raises ------ ValueError If the input histogram is not 2D. """ _check_counting_histogram(hist) if len(hist.axes) != 2: raise ValueError("The input histogram must be 2D.") n_bins = hist.axes[0].size * hist.axes[1].size flatten_hist = bh.Histogram( bh.axis.Regular(n_bins, 0, n_bins), storage=bh.storage.Weight() ) flatten_hist[:] = np.c_[hist.values().flatten(), hist.variances().flatten()] return flatten_hist