Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions scoringrules/_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def es_ensemble(
ens_w: "Array" = None,
estimator: str = "nrg",
backend: "Backend" = None,
**kwargs,
) -> "Array":
Comment on lines 16 to 26
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

es_ensemble now accepts arbitrary **kwargs, but the docstring doesn't document any additional keyword arguments and (for non-akr_kband estimators) extra kwargs are silently ignored. This makes it easy for user typos to go unnoticed. Prefer adding an explicit keyword-only k: int = 1 parameter (documented in the Parameters section) and rejecting unexpected kwargs (or, if you keep **kwargs, validate that kwargs is empty when the estimator doesn't consume them).

Copilot uses AI. Check for mistakes.
r"""Compute the Energy Score for a finite multivariate ensemble.

Expand Down Expand Up @@ -71,18 +72,39 @@ def es_ensemble(
Some theoretical background on scoring rules for multivariate forecasts.
"""
obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)

if estimator == "akr_kband":
k = kwargs.get("k", 1)

if ens_w is None:
if backend == "numba":
estimator_check(estimator, energy.estimator_gufuncs)
return energy.estimator_gufuncs[estimator](obs, fct)
if estimator == "akr_kband":
return energy.estimator_gufuncs[estimator](obs, fct, k)
else:
return energy.estimator_gufuncs[estimator](obs, fct)
Comment on lines +76 to +85
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

k is pulled from kwargs and then passed into the numba gufunc path without any validation. If a caller supplies k=0 (or a non-int), the gufunc implementations will hit division-by-zero (1/(M*k) or 1/k) or type errors rather than a clean ValueError like the non-numba path. Add validation here (e.g., ensure k is an int >= 1, and optionally k <= M-1) before dispatching to energy.estimator_gufuncs[...].

Copilot uses AI. Check for mistakes.
else:
if estimator == "akr_kband":
return energy.es(obs, fct, estimator=estimator, backend=backend, k=k)
return energy.es(obs, fct, estimator=estimator, backend=backend)
else:
ens_w = multivariate_weight_check(ens_w, fct, m_axis, backend=backend)
if backend == "numba":
estimator_check(estimator, energy.estimator_gufuncs_w)
return energy.estimator_gufuncs_w[estimator](obs, fct, ens_w)
if estimator == "akr_kband":
return energy.estimator_gufuncs_w[estimator](obs, fct, k, ens_w)
else:
return energy.estimator_gufuncs_w[estimator](obs, fct, ens_w)
else:
if estimator == "akr_kband":
return energy.es_w(
obs,
fct,
ens_w,
estimator=estimator,
backend=backend,
k=k,
)
return energy.es_w(obs, fct, ens_w, estimator=estimator, backend=backend)


Expand Down
18 changes: 18 additions & 0 deletions scoringrules/core/energy/_gufuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,22 @@ def _energy_score_akr_circperm_gufunc(
out[0] = e_1 / M - 0.5 * 1 / M * e_2


@guvectorize("(d),(m,d),()->()")
def _energy_score_akr_kband_gufunc(
obs: np.ndarray, fct: np.ndarray, k: int, out: np.ndarray
):
"""Compute the Energy Score for a finite ensemble using the AKR with k-band approximation."""
M = fct.shape[0]
e_1 = 0.0
e_2 = 0.0
for i in range(M):
e_1 += float(np.linalg.norm(fct[i] - obs))
for j in range(1, k + 1):
e_2 += 2 * float(np.linalg.norm(fct[i] - fct[(i + j) % M]))

out[0] = e_1 / M - 0.5 * 1 / (M * k) * e_2
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new numba gufunc divides by k (1/(M*k)), but there is no guard for k <= 0. Because scoringrules/_energy.py currently forwards k without validation on the numba path, k=0 will trigger division-by-zero / invalid output instead of a ValueError. Ensure k is validated in the Python wrapper before calling this gufunc (or add a safe guard if possible).

Suggested change
out[0] = e_1 / M - 0.5 * 1 / (M * k) * e_2
if k <= 0:
# Avoid division by zero / invalid k; propagate a sentinel value.
out[0] = np.nan
else:
out[0] = e_1 / M - 0.5 * 1 / (M * k) * e_2

Copilot uses AI. Check for mistakes.


@guvectorize("(d),(m,d),(),(m)->()")
def _owenergy_score_gufunc(
obs: np.ndarray,
Expand Down Expand Up @@ -133,6 +149,7 @@ def _vrenergy_score_gufunc(

estimator_gufuncs = {
"akr_circperm": lazy_gufunc_wrapper_mv(_energy_score_akr_circperm_gufunc),
"akr_kband": lazy_gufunc_wrapper_mv(_energy_score_akr_kband_gufunc),
"akr": lazy_gufunc_wrapper_mv(_energy_score_akr_gufunc),
"fair": lazy_gufunc_wrapper_mv(_energy_score_fair_gufunc),
"nrg": lazy_gufunc_wrapper_mv(_energy_score_nrg_gufunc),
Expand All @@ -142,6 +159,7 @@ def _vrenergy_score_gufunc(

__all__ = [
"_energy_score_akr_circperm_gufunc",
"_energy_score_akr_kband_gufunc",
"_energy_score_akr_gufunc",
"_energy_score_fair_gufunc",
"_energy_score_nrg_gufunc",
Expand Down
24 changes: 24 additions & 0 deletions scoringrules/core/energy/_gufuncs_w.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,28 @@ def _energy_score_akr_circperm_gufunc_w(
out[0] = e_1 - 0.5 * e_2


@guvectorize("(d),(m,d),(),(m)->()")
def _energy_score_akr_kband_gufunc_w(
obs: np.ndarray, fct: np.ndarray, k: int, ens_w: np.ndarray, out: np.ndarray
):
"""Compute the Energy Score for a finite ensemble using the AKR with k-band approximation."""
M = fct.shape[0]

e_1 = 0.0
e_2 = 0.0
for i in range(M):
e_1 += float(np.linalg.norm(fct[i] - obs)) * ens_w[i]
for j in range(1, k + 1):
e_2 += (
2
* float(np.linalg.norm(fct[i] - fct[(i + j) % M]))
* ens_w[i]
* ens_w[(i + j) % M]
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new _energy_score_akr_kband_gufunc_w multiplies by ens_w[i] * ens_w[(i+j)%M], but the existing weighted AKR/circperm estimators in this file weight only by ens_w[i]. With ens_w uniform, this changes the scale by ~1/M compared to the unweighted akr_kband, and will fail the test that uniform weights reproduce the unweighted score. Either adjust the weighting (e.g., weight only by ens_w[i] like the other AKR variants) or adjust the unweighted implementation/normalization so the two agree for uniform weights.

Suggested change
* ens_w[(i + j) % M]

Copilot uses AI. Check for mistakes.
)

out[0] = e_1 - 0.5 * 1 / k * e_2

Comment on lines +82 to +101
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new numba gufunc divides by k (1/k), but there is no guard for k <= 0. Since the public wrapper currently accepts k via **kwargs and doesn't validate it on the numba path, k=0 will trigger division-by-zero / invalid output. Validate k in the Python wrapper before calling this gufunc (or add a safe guard here if feasible).

Copilot uses AI. Check for mistakes.

@guvectorize("(d),(m,d),(),(m),(m)->()")
def _owenergy_score_gufunc_w(
obs: np.ndarray,
Expand Down Expand Up @@ -144,6 +166,7 @@ def _vrenergy_score_gufunc_w(

estimator_gufuncs_w = {
"akr_circperm": lazy_gufunc_wrapper_mv(_energy_score_akr_circperm_gufunc_w),
"akr_kband": lazy_gufunc_wrapper_mv(_energy_score_akr_kband_gufunc_w),
"akr": lazy_gufunc_wrapper_mv(_energy_score_akr_gufunc_w),
"fair": lazy_gufunc_wrapper_mv(_energy_score_fair_gufunc_w),
"nrg": lazy_gufunc_wrapper_mv(_energy_score_nrg_gufunc_w),
Expand All @@ -153,6 +176,7 @@ def _vrenergy_score_gufunc_w(

__all__ = [
"_energy_score_akr_circperm_gufunc_w",
"_energy_score_akr_kband_gufunc_w",
"_energy_score_akr_gufunc_w",
"_energy_score_fair_gufunc_w",
"_energy_score_nrg_gufunc_w",
Expand Down
32 changes: 30 additions & 2 deletions scoringrules/core/energy/_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@


def es_ensemble(
obs: "Array", fct: "Array", estimator: str = "nrg", backend=None
obs: "Array",
fct: "Array",
estimator: str = "nrg",
backend=None,
k: int = 1,
) -> "Array":
"""
Compute the energy score based on a finite ensemble.
Expand All @@ -22,9 +26,11 @@ def es_ensemble(
out = _es_ensemble_akr(obs, fct, backend=backend)
elif estimator == "akr_circperm":
out = _es_ensemble_akr_circperm(obs, fct, backend=backend)
elif estimator == "akr_kband":
out = _es_ensemble_akr_kband(obs, fct, k=k, backend=backend)
else:
raise ValueError(
f"For the energy score, {estimator} must be one of 'nrg', 'fair', 'akr', and 'akr_circperm'."
f"For the energy score, {estimator} must be one of 'nrg', 'fair', 'akr', 'akr_circperm', and 'akr_kband'."
)

return out
Expand Down Expand Up @@ -89,6 +95,28 @@ def _es_ensemble_akr_circperm(
return E_1 - 0.5 * E_2


def _es_ensemble_akr_kband(
obs: "Array", fct: "Array", k: int = 1, backend: "Backend" = None
) -> "Array":
"""Compute the Energy Score for a finite ensemble using the AKR with k-band approximation."""
B = backends.active if backend is None else backends[backend]
M: int = fct.shape[-2]

if k < 1:
raise ValueError("For estimator='akr_kband', k must be >= 1.")

err_norm = B.norm(fct - B.expand_dims(obs, -2), -1)
E_1 = B.sum(err_norm, -1) / M

E_2 = 0.0
for j in range(1, k + 1):
spread_norm = B.norm(fct - B.roll(fct, shift=-j, axis=-2), -1)
E_2 += 2 * B.sum(spread_norm, -1)
E_2 = E_2 / (M * k)
Comment on lines +108 to +115
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

k is only validated for k >= 1, but values k >= M (ensemble size) will cause repeated offsets (and shifts that effectively compare members with themselves when j is a multiple of M), while still dividing by k. That changes the estimator in a hard-to-interpret way and adds unnecessary work. Consider validating/clamping k to 1 <= k <= M-1 (or to the maximum unique band width you intend) before the loop.

Suggested change
err_norm = B.norm(fct - B.expand_dims(obs, -2), -1)
E_1 = B.sum(err_norm, -1) / M
E_2 = 0.0
for j in range(1, k + 1):
spread_norm = B.norm(fct - B.roll(fct, shift=-j, axis=-2), -1)
E_2 += 2 * B.sum(spread_norm, -1)
E_2 = E_2 / (M * k)
# Clamp k to the maximum meaningful band width (number of unique nontrivial offsets).
# For M == 1, this keeps k_eff at least 1 to avoid division by zero; spread terms are zero anyway.
max_bandwidth = max(1, M - 1)
k_eff = min(k, max_bandwidth)
err_norm = B.norm(fct - B.expand_dims(obs, -2), -1)
E_1 = B.sum(err_norm, -1) / M
E_2 = 0.0
for j in range(1, k_eff + 1):
spread_norm = B.norm(fct - B.roll(fct, shift=-j, axis=-2), -1)
E_2 += 2 * B.sum(spread_norm, -1)
E_2 = E_2 / (M * k_eff)

Copilot uses AI. Check for mistakes.

return E_1 - 0.5 * E_2


def owes_ensemble(
obs: "Array", # (... D)
fct: "Array", # (... M D)
Expand Down
37 changes: 35 additions & 2 deletions scoringrules/core/energy/_score_w.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@


def es_ensemble_w(
obs: "Array", fct: "Array", ens_w: "Array", estimator: str = "nrg", backend=None
obs: "Array",
fct: "Array",
ens_w: "Array",
estimator: str = "nrg",
backend=None,
k: int = 1,
) -> "Array":
"""
Compute the energy score based on a finite ensemble.
Expand All @@ -22,9 +27,11 @@ def es_ensemble_w(
out = _es_ensemble_akr_w(obs, fct, ens_w, backend=backend)
elif estimator == "akr_circperm":
out = _es_ensemble_akr_circperm_w(obs, fct, ens_w, backend=backend)
elif estimator == "akr_kband":
out = _es_ensemble_akr_kband_w(obs, fct, ens_w, k=k, backend=backend)
else:
raise ValueError(
f"For the energy score, {estimator} must be one of 'nrg', 'fair', 'akr', and 'akr_circperm'."
f"For the energy score, {estimator} must be one of 'nrg', 'fair', 'akr', 'akr_circperm', and 'akr_kband'."
)

return out
Expand Down Expand Up @@ -102,6 +109,32 @@ def _es_ensemble_akr_circperm_w(
return E_1 - 0.5 * E_2


def _es_ensemble_akr_kband_w(
obs: "Array",
fct: "Array",
ens_w: "Array",
k: int = 1,
backend: "Backend" = None,
) -> "Array":
"""Compute the weighted Energy Score using the AKR with k-band approximation."""
B = backends.active if backend is None else backends[backend]

if k < 1:
raise ValueError("For estimator='akr_kband', k must be >= 1.")

err_norm = B.norm(fct - B.expand_dims(obs, -2), -1)
E_1 = B.sum(err_norm * ens_w, -1)

E_2 = 0.0
for j in range(1, k + 1):
fct_shift = B.roll(fct, shift=-j, axis=-2)
ens_w_shift = B.roll(ens_w, shift=-j, axis=-1)
spread_norm = B.norm(fct - fct_shift, -1)
E_2 += 2 * B.sum(spread_norm * ens_w * ens_w_shift, -1)
Comment on lines +131 to +133
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_es_ensemble_akr_kband_w weights the spread term by ens_w * ens_w_shift, which makes the weighted score disagree with the unweighted score even for uniform weights (unlike the other estimators in this module). This will break the existing invariant tested in tests/test_energy.py that ens_w=np.ones(...) matches the unweighted result. Consider aligning with _es_ensemble_akr_w / _es_ensemble_akr_circperm_w by weighting the k-band spread term with ens_w only (or otherwise adjust the normalization so uniform weights reproduce the unweighted akr_kband result).

Suggested change
ens_w_shift = B.roll(ens_w, shift=-j, axis=-1)
spread_norm = B.norm(fct - fct_shift, -1)
E_2 += 2 * B.sum(spread_norm * ens_w * ens_w_shift, -1)
spread_norm = B.norm(fct - fct_shift, -1)
# Weight the spread term with ens_w only, consistent with other AKR estimators.
E_2 += 2 * B.sum(spread_norm * ens_w, -1)

Copilot uses AI. Check for mistakes.

return E_1 - 0.5 * E_2 / k
Comment on lines +121 to +135
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like the unweighted implementation, this only checks k >= 1. For k >= M (ensemble size), the rolled offsets repeat and (when j is a multiple of M) compare members with themselves, while still dividing by k. Consider validating/clamping k to 1 <= k <= M-1 (or the maximum unique band width you intend) to avoid duplicated work and hard-to-interpret scaling.

Suggested change
if k < 1:
raise ValueError("For estimator='akr_kband', k must be >= 1.")
err_norm = B.norm(fct - B.expand_dims(obs, -2), -1)
E_1 = B.sum(err_norm * ens_w, -1)
E_2 = 0.0
for j in range(1, k + 1):
fct_shift = B.roll(fct, shift=-j, axis=-2)
ens_w_shift = B.roll(ens_w, shift=-j, axis=-1)
spread_norm = B.norm(fct - fct_shift, -1)
E_2 += 2 * B.sum(spread_norm * ens_w * ens_w_shift, -1)
return E_1 - 0.5 * E_2 / k
M: int = fct.shape[-2]
if M < 2:
raise ValueError(
"For estimator='akr_kband', ensemble size M must be >= 2."
)
if k < 1:
raise ValueError("For estimator='akr_kband', k must be >= 1.")
# Clamp k to the maximum unique band width (M - 1) to avoid
# repeated cyclic permutations and self-comparisons when k >= M.
k_eff = min(k, M - 1)
err_norm = B.norm(fct - B.expand_dims(obs, -2), -1)
E_1 = B.sum(err_norm * ens_w, -1)
E_2 = 0.0
for j in range(1, k_eff + 1):
fct_shift = B.roll(fct, shift=-j, axis=-2)
ens_w_shift = B.roll(ens_w, shift=-j, axis=-1)
spread_norm = B.norm(fct - fct_shift, -1)
E_2 += 2 * B.sum(spread_norm * ens_w * ens_w_shift, -1)
return E_1 - 0.5 * E_2 / k_eff

Copilot uses AI. Check for mistakes.


def owes_ensemble_w(
obs: "Array", # (... D)
fct: "Array", # (... M D)
Expand Down
12 changes: 9 additions & 3 deletions tests/test_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
N = 20
N_VARS = 3

ESTIMATORS = ["nrg", "fair", "akr", "akr_circperm"]
ESTIMATORS = ["nrg", "fair", "akr", "akr_circperm", "akr_kband"]


@pytest.mark.parametrize("estimator", ESTIMATORS)
Expand All @@ -18,13 +18,19 @@ def test_energy_score(estimator, backend):
with pytest.raises(ValueError):
obs = np.random.randn(N, N_VARS)
fct = np.random.randn(N, ENSEMBLE_SIZE, N_VARS - 1)
sr.es_ensemble(obs, fct, estimator=estimator, backend=backend)
if estimator == "akr_kband":
sr.es_ensemble(obs, fct, estimator=estimator, backend=backend, k=2)
else:
sr.es_ensemble(obs, fct, estimator=estimator, backend=backend)

# undefined estimator
with pytest.raises(ValueError):
fct = np.random.randn(N, ENSEMBLE_SIZE, N_VARS)
est = "undefined_estimator"
sr.es_ensemble(obs, fct, estimator=est, backend=backend)
if estimator == "akr_kband":
sr.es_ensemble(obs, fct, estimator=est, backend=backend, k=2)
else:
sr.es_ensemble(obs, fct, estimator=est, backend=backend)

# test output

Expand Down
Loading