fix: address PR review comments — percent_change prior_count, negative guard, usage docs

sullivanj91 · claude · sullivanj91 · commit 64655e785f23 · 2026-04-01T17:09:19.000-07:00
- _math.py: add prior_count param to percent_change(); formula becomes
  (x - y) / (y + prior_count), dampening explosion when ref_mean ≈ 0
- __init__.py: add ValueError guard for negative prior_count; thread
  prior_count through all three percent_change call sites (_pdex_ref,
  _pdex_all, _pdex_on_target); expand prior_count docstring with
  recommended usage (start with 0.5, combine with min_mean_expression
  for full suppression)
- CLAUDE.md: update percent_change schema formula to show prior_count
- tests/test_math.py: add TestPercentChangeWithPriorCount (4 unit tests)
- tests/test_pdex.py: add test_negative_prior_count_raises to validation suite

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -80,7 +80,7 @@ The returned Polars DataFrame (or pandas DataFrame when `as_pandas=True`) has co
 | `target_membership` | int   | Number of cells in the target group                                   |
 | `ref_membership`    | int   | Number of cells in the reference                                      |
 | `fold_change`       | float | log2((target_mean + prior_count) / (ref_mean + prior_count)) — computed from pseudobulk means |
-| `percent_change`    | float | (target_mean - ref_mean) / ref_mean — computed from pseudobulk means  |
+| `percent_change`    | float | (target_mean - ref_mean) / (ref_mean + prior_count) — computed from pseudobulk means |
 | `p_value`           | float | Mann-Whitney U p-value (per-cell vectors)                             |
 | `statistic`         | float | Mann-Whitney U statistic                                              |
 | `fdr`               | float | FDR-corrected p-value, applied per-group across genes. For `on_target` mode, applied across all groups.                 |
diff --git a/src/pdex/__init__.py b/src/pdex/__init__.py
@@ -204,10 +204,20 @@ def pdex(
         :class:`polars.DataFrame`. Requires ``pyarrow``.
     prior_count:
         Pseudocount added to both ``target_mean`` and ``ref_mean`` before computing
-        ``fold_change``. When ``prior_count > 0``, extreme fold changes from near-zero
-        reference means (scRNA-seq sparsity artifact) are dampened toward zero.
-        Has no effect on the Mann-Whitney U p-value or FDR.
+        ``fold_change`` and ``percent_change``. When ``prior_count > 0``, extreme
+        values from near-zero reference means (scRNA-seq sparsity artifact) are
+        dampened toward zero. Has no effect on the Mann-Whitney U p-value or FDR.
         Default ``0.0`` preserves existing behaviour.
+
+        **Recommended usage:** For scRNA-seq CRISPRi/CRISPRa screens where many
+        genes are unexpressed in the reference group, start with ``prior_count=0.5``.
+        This provides modest dampening without substantially compressing fold changes
+        for well-expressed genes. For complete suppression of the sparsity artifact,
+        combine with a ``min_mean_expression`` pre-filter on the reference group —
+        ``prior_count`` alone cannot eliminate low p-values arising from per-cell
+        distributional shifts in near-zero genes.
+
+        Must be non-negative. Raises :class:`ValueError` if negative.
     **kwargs:
         Mode-specific keyword arguments:
 
@@ -246,6 +256,9 @@ def pdex(
         adata.n_vars,
     )
 
+    if prior_count < 0:
+        raise ValueError(f"prior_count must be non-negative, got {prior_count}")
+
     # Set the global threadpool for numba
     set_numba_threadpool(threads)
 
@@ -365,7 +378,7 @@ def _pdex_ref(
         )
 
         fc = fold_change(group_bulk, ref_bulk, prior_count)
-        pc = percent_change(group_bulk, ref_bulk)
+        pc = percent_change(group_bulk, ref_bulk, prior_count)
         mwu_result = mwu(group_matrix, ref_data)
 
         mwu_statistic = mwu_result.statistic
@@ -427,7 +440,7 @@ def _pdex_all(
         )
 
         fc = fold_change(group_bulk, rest_bulk, prior_count)
-        pc = percent_change(group_bulk, rest_bulk)
+        pc = percent_change(group_bulk, rest_bulk, prior_count)
         mwu_result = mwu(group_matrix, rest_matrix)
 
         mwu_statistic = mwu_result.statistic
@@ -517,7 +530,7 @@ def _pdex_on_target(
         fc = float(
             fold_change(np.array([target_mean]), np.array([ref_mean]), prior_count)[0]
         )
-        pc = float(percent_change(np.array([target_mean]), np.array([ref_mean]))[0])
+        pc = float(percent_change(np.array([target_mean]), np.array([ref_mean]), prior_count)[0])
 
         mwu_result = mwu(group_col, ref_col)
         p_value = float(np.clip(np.asarray(mwu_result.pvalue).ravel()[0], 0, 1))
diff --git a/src/pdex/_math.py b/src/pdex/_math.py
@@ -117,9 +117,14 @@ def fold_change(x: np.ndarray, y: np.ndarray, prior_count: float = 0.0) -> np.nd
 
 
 @nb.njit(parallel=True)
-def percent_change(x: np.ndarray, y: np.ndarray) -> np.ndarray:
-    """Calculates the change between two arrays."""
-    return (x - y) / y
+def percent_change(x: np.ndarray, y: np.ndarray, prior_count: float = 0.0) -> np.ndarray:
+    """Calculates the percent change between two arrays.
+
+    When ``prior_count > 0``, adds a pseudocount to the denominator before
+    computing the ratio, dampening extreme values when the reference mean is
+    near zero (scRNA-seq sparsity artifact).
+    """
+    return (x - y) / (y + prior_count)
 
 
 def mwu(
diff --git a/tests/test_math.py b/tests/test_math.py
@@ -86,6 +86,37 @@ def test_equal_means_still_zero(self):
         np.testing.assert_allclose(result, [0.0, 0.0])
 
 
+class TestPercentChangeWithPriorCount:
+    def test_zero_prior_count_matches_baseline(self):
+        """prior_count=0.0 must be identical to calling without it."""
+        x = np.array([4.0, 8.0, 0.1])
+        y = np.array([2.0, 4.0, 0.001])
+        np.testing.assert_array_equal(percent_change(x, y), percent_change(x, y, 0.0))
+
+    def test_dampens_extreme_pc_from_near_zero_denominator(self):
+        """prior_count=0.5 pulls extreme percent change toward zero."""
+        x = np.array([0.1])
+        y = np.array([0.001])
+        pc_raw = percent_change(x, y)[0]
+        pc_dampened = percent_change(x, y, 0.5)[0]
+        assert abs(pc_dampened) < abs(pc_raw)
+        np.testing.assert_allclose(pc_dampened, (0.1 - 0.001) / (0.001 + 0.5), rtol=1e-5)
+
+    def test_preserves_direction(self):
+        """prior_count should not flip the sign of percent change."""
+        x = np.array([2.0, 0.5])
+        y = np.array([1.0, 1.0])
+        result = percent_change(x, y, 0.5)
+        assert result[0] > 0
+        assert result[1] < 0
+
+    def test_equal_means_still_zero(self):
+        """When target_mean == ref_mean, percent_change should be 0 regardless of prior_count."""
+        x = np.array([0.5, 2.0])
+        result = percent_change(x, x, 0.5)
+        np.testing.assert_allclose(result, [0.0, 0.0])
+
+
 class TestBulkMatrixGeometric:
     """Tests for bulk_matrix_geometric."""
 
diff --git a/tests/test_pdex.py b/tests/test_pdex.py
@@ -478,6 +478,10 @@ def test_unknown_gene_name_warns_and_skips(self, on_target_adata):
 
 
 class TestPdexValidation:
+    def test_negative_prior_count_raises(self, small_adata):
+        with pytest.raises(ValueError, match="prior_count must be non-negative"):
+            pdex(small_adata, groupby="guide", is_log1p=False, prior_count=-0.1)
+
     def test_invalid_mode(self, small_adata):
         with pytest.raises(ValueError, match="Invalid mode"):
             pdex(