From e0583dd2b959928ed89766d25ca6fc684b757cae Mon Sep 17 00:00:00 2001
From: Tomas Pereira de Vasconcelos <tomasvasconcelos1@gmail.com>
Date: Fri, 26 Dec 2025 18:42:27 +0800
Subject: [PATCH] Improve test_hist.py

Signed-off-by: Tomas Pereira de Vasconcelos <tomasvasconcelos1@gmail.com>
---
 tests/unit/test_hist.py | 188 +++++++++++++++++++++++++++-------------
 1 file changed, 130 insertions(+), 58 deletions(-)

diff --git a/tests/unit/test_hist.py b/tests/unit/test_hist.py
index 0008bbbe..2f5fbd60 100644
--- a/tests/unit/test_hist.py
+++ b/tests/unit/test_hist.py
@@ -8,89 +8,161 @@
     bin_trace_samples,
 )
 
-# Example data
+# ==============================================================
+# ---  bin_trace_samples()
+# ==============================================================
 
-SAMPLES_IN = [1, 2, 2, 3, 4]
-NBINS = 4
-# NOTE: The x values in DENSITIES_OUT correspond to the centers of
-#       equally spaced bins over the range [1, 4].
-#       This can be counterintuitive for count data, as the bins
-#       do not align with the integer sample values.
-DENSITIES_OUT = [(1.375, 1), (2.125, 2), (2.875, 1), (3.625, 1)]
-X_OUT, Y_OUT = zip(*DENSITIES_OUT, strict=True)
 
-WEIGHTS = [1, 1, 1, 1, 9]
+# --- Basic functionality ---
+
+
+@pytest.mark.parametrize(
+    ("samples", "nbins", "expected"),
+    [
+        # Basic case with repeated values
+        ([1, 2, 2, 3, 4], 4, [(1.375, 1), (2.125, 2), (2.875, 1), (3.625, 1)]),
+        # Single bin aggregates all samples
+        ([1, 2, 3], 1, [(2.0, 3)]),
+        # Uniform distribution
+        ([0, 1, 2, 3], 4, [(0.375, 1), (1.125, 1), (1.875, 1), (2.625, 1)]),
+        # All identical samples go to rightmost bin
+        ([3, 3, 3], 2, [(2.75, 0), (3.25, 3)]),
+        # Negative values
+        ([-2, -1, 0, 1], 2, [(-1.25, 2), (0.25, 2)]),
+    ],
+    ids=["basic", "single_bin", "uniform", "identical", "negative"],
+)
+def test_basic_binning(
+    samples: list[float], nbins: int, expected: list[tuple[float, float]]
+) -> None:
+    result = bin_trace_samples(samples, nbins=nbins)
+    assert result == expected
 
-# ==============================================================
-# ---  estimate_density_trace()
-# ==============================================================
+
+def test_float_samples_binning() -> None:
+    result = bin_trace_samples([0.1, 0.5, 0.9], nbins=3)
+    x_vals, y_vals = zip(*result, strict=True)
+    assert x_vals == pytest.approx((0.233, 0.5, 0.767), rel=1e-2)
+    assert y_vals == (1.0, 1.0, 1.0)
 
 
-def test_bin_trace_samples_simple() -> None:
-    density_trace = bin_trace_samples(trace_samples=SAMPLES_IN, nbins=NBINS)
-    x, y = zip(*density_trace, strict=True)
-    assert x == X_OUT
-    assert y == Y_OUT
+@pytest.mark.parametrize("nbins", [1, 2, 5, 10, 50])
+def test_output_length_matches_nbins(nbins: int) -> None:
+    result = bin_trace_samples([1, 2, 3, 4, 5], nbins=nbins)
+    assert len(result) == nbins
 
 
-@pytest.mark.parametrize("nbins", [2, 5, 8, 11])
-def test_bin_trace_samples_nbins(nbins: int) -> None:
-    density_trace = bin_trace_samples(trace_samples=SAMPLES_IN, nbins=nbins)
-    assert len(density_trace) == nbins
+@pytest.mark.parametrize(
+    "input_type",
+    [list, tuple, np.array],
+    ids=["list", "tuple", "ndarray"],
+)
+def test_accepts_various_input_types(input_type: type) -> None:
+    samples = input_type([1, 2, 3])
+    result = bin_trace_samples(samples, nbins=2)
+    assert len(result) == 2
+    assert all(isinstance(x, float) and isinstance(y, float) for x, y in result)
 
 
-@pytest.mark.parametrize("non_finite_value", [np.inf, np.nan, float("inf"), float("nan")])
-def test_bin_trace_samples_fails_for_non_finite_values(non_finite_value: float) -> None:
-    err_msg = "The samples array should not contain any infs or NaNs."
-    with pytest.raises(ValueError, match=err_msg):
-        bin_trace_samples(trace_samples=[*SAMPLES_IN[:-1], non_finite_value], nbins=NBINS)
+def test_counts_sum_to_sample_size() -> None:
+    samples = list(range(100))
+    result = bin_trace_samples(samples, nbins=7)
+    total_count = sum(y for _, y in result)
+    assert total_count == len(samples)
 
 
-def test_bin_trace_samples_weights() -> None:
-    density_trace = bin_trace_samples(
-        trace_samples=SAMPLES_IN,
-        nbins=NBINS,
-        weights=WEIGHTS,
-    )
-    x, y = zip(*density_trace, strict=True)
-    assert x == X_OUT
-    assert np.argmax(y) == len(y) - 1
+def test_bin_centers_within_data_range() -> None:
+    samples = [10, 20, 30, 40, 50]
+    result = bin_trace_samples(samples, nbins=5)
+    centers = [x for x, _ in result]
+    assert all(min(samples) <= c <= max(samples) for c in centers)
 
 
-def test_bin_trace_samples_weights_not_same_length() -> None:
-    with pytest.raises(
-        ValueError, match="The weights array should have the same length as the samples array"
-    ):
-        bin_trace_samples(trace_samples=SAMPLES_IN, nbins=NBINS, weights=[1, 1, 1])
+# --- Weights ---
 
 
-@pytest.mark.parametrize("non_finite_value", [np.inf, np.nan, float("inf"), float("nan")])
-def test_bin_trace_samples_weights_fails_for_non_finite_values(
-    non_finite_value: float,
+@pytest.mark.parametrize(
+    ("samples", "weights", "nbins", "expected_counts"),
+    [
+        # Weights shift distribution
+        ([1, 2, 3], [10, 1, 1], 3, [10, 1, 1]),
+        # Zero weights effectively exclude samples
+        ([1, 2, 3], [1, 0, 1], 3, [1, 0, 1]),
+        # Fractional weights
+        ([1, 2], [0.5, 1.5], 2, [0.5, 1.5]),
+    ],
+    ids=["heavy_first", "zero_weight", "fractional"],
+)
+def test_weights_affect_counts(
+    samples: list[float],
+    weights: list[float],
+    nbins: int,
+    expected_counts: list[float],
 ) -> None:
-    err_msg = "The weights array should not contain any infs or NaNs."
-    with pytest.raises(ValueError, match=err_msg):
-        bin_trace_samples(
-            trace_samples=SAMPLES_IN,
-            nbins=NBINS,
-            weights=[*WEIGHTS[:-1], non_finite_value],
-        )
+    result = bin_trace_samples(samples, nbins=nbins, weights=weights)
+    counts = [y for _, y in result]
+    assert counts == pytest.approx(expected_counts)
+
+
+def test_weighted_counts_sum_to_weight_sum() -> None:
+    samples = [1, 2, 3, 4, 5]
+    weights = [2.0, 3.0, 1.5, 0.5, 4.0]
+    result = bin_trace_samples(samples, nbins=3, weights=weights)
+    assert sum(y for _, y in result) == pytest.approx(sum(weights))
+
+
+# --- Error handling ---
+
+
+@pytest.mark.parametrize(
+    "non_finite",
+    [np.inf, -np.inf, np.nan, float("inf"), float("nan")],
+    ids=["inf", "neg_inf", "nan", "float_inf", "float_nan"],
+)
+def test_rejects_non_finite_samples(non_finite: float) -> None:
+    with pytest.raises(ValueError, match="samples array should not contain any infs or NaNs"):
+        bin_trace_samples([1, 2, non_finite], nbins=2)
+
+
+@pytest.mark.parametrize(
+    "non_finite",
+    [np.inf, -np.inf, np.nan, float("inf"), float("nan")],
+    ids=["inf", "neg_inf", "nan", "float_inf", "float_nan"],
+)
+def test_rejects_non_finite_weights(non_finite: float) -> None:
+    with pytest.raises(ValueError, match="weights array should not contain any infs or NaNs"):
+        bin_trace_samples([1, 2, 3], nbins=2, weights=[1, non_finite, 1])
+
+
+@pytest.mark.parametrize(
+    ("samples", "weights"),
+    [
+        ([1, 2, 3], [1, 2]),
+        ([1, 2], [1, 2, 3]),
+        ([1], []),
+    ],
+    ids=["weights_short", "weights_long", "empty_weights"],
+)
+def test_rejects_mismatched_weights_length(samples: list[float], weights: list[float]) -> None:
+    with pytest.raises(ValueError, match="weights array should have the same length"):
+        bin_trace_samples(samples, nbins=2, weights=weights)
 
 
 # ==============================================================
-# ---  estimate_densities()
+# ---  bin_samples()
 # ==============================================================
 
 
 def test_bin_samples() -> None:
-    densities = bin_samples(
-        samples=[[SAMPLES_IN], [SAMPLES_IN]],
-        nbins=NBINS,
-    )
+    samples = [1, 2, 2, 3, 4]
+    nbins = 4
+    expected = [(1.375, 1), (2.125, 2), (2.875, 1), (3.625, 1)]
+    x_out, y_out = zip(*expected, strict=True)
+    densities = bin_samples(samples=[[samples], [samples]], nbins=nbins)
     assert len(densities) == 2
     for densities_row in densities:
         assert len(densities_row) == 1
         density_trace = next(iter(densities_row))
         x, y = zip(*density_trace, strict=True)
-        assert x == X_OUT
-        assert y == Y_OUT
+        assert x == x_out
+        assert y == y_out