ArcInstitute · sullivanj91 · May 4, 2026 · May 2, 2026 · May 2, 2026 · May 2, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -79,7 +79,8 @@ The returned Polars DataFrame (or pandas DataFrame when `as_pandas=True`) has co
 | `ref_mean`          | float | Pseudobulk mean for the reference, always in natural (count) space    |
 | `target_membership` | int   | Number of cells in the target group                                   |
 | `ref_membership`    | int   | Number of cells in the reference                                      |
-| `fold_change`       | float | log2((target_mean + epsilon) / (ref_mean + epsilon)) — computed from pseudobulk means |
+| `fold_change`       | float | **Deprecated** alias for `log2_fold_change` (identical values). Retained for one release; emits a `DeprecationWarning` on every `pdex(...)` call and will be removed in pdex 0.3.0. |
+| `log2_fold_change`  | float | log2((target_mean + epsilon) / (ref_mean + epsilon)) — computed from pseudobulk means |
 | `percent_change`    | float | (target_mean - ref_mean) / (ref_mean + epsilon) — computed from pseudobulk means |
 | `p_value`           | float | Mann-Whitney U p-value (per-cell vectors)                             |
 | `statistic`         | float | Mann-Whitney U statistic                                              |

diff --git a/README.md b/README.md
@@ -95,7 +95,8 @@ Returns a Polars DataFrame (or pandas if `as_pandas=True`) with one row per (gro
 | `ref_mean`          | Pseudobulk mean for the reference (count space)    |
 | `target_membership` | Number of cells in the target group                |
 | `ref_membership`    | Number of cells in the reference                   |
-| `fold_change`       | log2(target_mean / ref_mean)                       |
+| `fold_change`       | **Deprecated alias** for `log2_fold_change` (identical values). Will be removed in pdex 0.3.0. |
+| `log2_fold_change`  | log2(target_mean / ref_mean)                       |
 | `percent_change`    | (target_mean - ref_mean) / ref_mean                |
 | `p_value`           | Mann-Whitney U p-value                             |
 | `statistic`         | Mann-Whitney U statistic                           |

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "pdex"
-version = "0.2.1"
+version = "0.2.2"
 description = "Parallel differential expression for single-cell perturbation sequencing"
 readme = "README.md"
 authors = [{ name = "noam teyssier", email = "noam.teyssier@arcinstitute.org" }]

diff --git a/src/pdex/__init__.py b/src/pdex/__init__.py
@@ -12,7 +12,7 @@
 from scipy.stats import false_discovery_control
 from tqdm import tqdm
 
-from pdex._math import fold_change, mwu, percent_change, pseudobulk
+from pdex._math import log2_fold_change, mwu, percent_change, pseudobulk
 
 from ._utils import _detect_is_log1p, set_numba_threadpool
 
@@ -233,14 +233,21 @@ def pdex(
     pl.DataFrame | pd.DataFrame
         One row per (group, feature) pair with columns: ``target``, ``feature``,
         ``target_mean``, ``ref_mean``, ``target_membership``, ``ref_membership``,
-        ``fold_change``, ``percent_change``, ``p_value``, ``statistic``, ``fdr``.
+        ``fold_change``, ``log2_fold_change``, ``percent_change``, ``p_value``,
+        ``statistic``, ``fdr``.
 
         ``target_mean`` and ``ref_mean`` are always in **natural (count) space**.
 
-        ``fold_change`` and ``percent_change`` are derived from the pseudobulk
-        means (not from the per-cell MWU test inputs): ``fold_change`` is
+        ``log2_fold_change`` and ``percent_change`` are derived from the pseudobulk
+        means (not from the per-cell MWU test inputs): ``log2_fold_change`` is
         ``log2(target_mean / ref_mean)`` and ``percent_change`` is
-        ``(target_mean - ref_mean) / ref_mean``.  The MWU ``p_value`` and
+        ``(target_mean - ref_mean) / ref_mean``.
-        ``log2(target_mean / ref_mean)`` and ``percent_change`` is
-        ``(target_mean - ref_mean) / ref_mean``.  The MWU ``p_value`` and
-        ``(target_mean - ref_mean) / ref_mean``.
+        ``log2((target_mean + epsilon) / (ref_mean + epsilon))`` and ``percent_change`` is
+        ``(target_mean - ref_mean) / (ref_mean + epsilon)``.
-        ``log2(target_mean / ref_mean)`` and ``percent_change`` is
-        ``(target_mean - ref_mean) / ref_mean``.  The MWU ``p_value`` and
-        ``(target_mean - ref_mean) / ref_mean``.
+        ``log2((target_mean + epsilon) / (ref_mean + epsilon))`` and ``percent_change`` is
+        ``(target_mean - ref_mean) / (ref_mean + epsilon)``.
+
+        ``fold_change`` is a **deprecated** alias for ``log2_fold_change``
+        (identical values). It is retained for one release to ease migration
+        and will be removed in pdex 0.3.0. New code should read
+        ``log2_fold_change`` directly. A :class:`DeprecationWarning` is emitted
-        ``log2_fold_change`` directly. A :class:`DeprecationWarning` is emitted
+        ``log2_fold_change`` directly. A :class:`FutureWarning` is emitted
-        ``log2_fold_change`` directly. A :class:`DeprecationWarning` is emitted
+        ``log2_fold_change`` directly. A :class:`FutureWarning` is emitted
+        on every ``pdex(...)`` call.  The MWU ``p_value`` and
         ``statistic`` are computed directly on the per-cell expression vectors.
 
         For ``mode="ref"``, the reference group itself is excluded from the output.
@@ -259,6 +266,14 @@ def pdex(
     if epsilon < 0:
         raise ValueError(f"epsilon must be non-negative, got {epsilon}")
 
+    warnings.warn(
+        "The `fold_change` column in pdex output is deprecated and will be "
+        "removed in pdex 0.3.0. Use `log2_fold_change` instead — it contains "
+        "the same values (`log2(target_mean / ref_mean)`).",
+        DeprecationWarning,
-        "The `fold_change` column in pdex output is deprecated and will be "
-        "removed in pdex 0.3.0. Use `log2_fold_change` instead — it contains "
-        "the same values (`log2(target_mean / ref_mean)`).",
-        DeprecationWarning,
+        "The `fold_change` column in pdex output is deprecated and will be "
+        "removed in pdex 0.3.0. Use `log2_fold_change` instead — it contains "
+        "the same values.",
+        FutureWarning,
-        "The `fold_change` column in pdex output is deprecated and will be "
-        "removed in pdex 0.3.0. Use `log2_fold_change` instead — it contains "
-        "the same values (`log2(target_mean / ref_mean)`).",
-        DeprecationWarning,
+        "The `fold_change` column in pdex output is deprecated and will be "
+        "removed in pdex 0.3.0. Use `log2_fold_change` instead — it contains "
+        "the same values.",
+        FutureWarning,
+        stacklevel=2,
+    )
+
     # Set the global threadpool for numba
     set_numba_threadpool(threads)
 
@@ -377,7 +392,7 @@ def _pdex_ref(
             group_matrix, geometric_mean=geometric_mean, is_log1p=is_log1p
         )
 
-        fc = fold_change(group_bulk, ref_bulk, epsilon)
+        lfc = log2_fold_change(group_bulk, ref_bulk, epsilon)
         pc = percent_change(group_bulk, ref_bulk, epsilon)
         mwu_result = mwu(group_matrix, ref_data)
 
@@ -394,7 +409,8 @@ def _pdex_ref(
                     "ref_mean": np.asarray(ref_bulk).ravel(),
                     "target_membership": group_mask.size,
                     "ref_membership": ref_membership,
-                    "fold_change": fc,
+                    "fold_change": lfc,
+                    "log2_fold_change": lfc,
                     "percent_change": pc,
                     "p_value": mwu_pvalue,
                     "statistic": mwu_statistic,
@@ -439,7 +455,7 @@ def _pdex_all(
             rest_matrix, geometric_mean=geometric_mean, is_log1p=is_log1p
         )
 
-        fc = fold_change(group_bulk, rest_bulk, epsilon)
+        lfc = log2_fold_change(group_bulk, rest_bulk, epsilon)
         pc = percent_change(group_bulk, rest_bulk, epsilon)
         mwu_result = mwu(group_matrix, rest_matrix)
 
@@ -456,7 +472,8 @@ def _pdex_all(
                     "ref_mean": np.asarray(rest_bulk).ravel(),
                     "target_membership": group_mask.size,
                     "ref_membership": rest_mask.size,
-                    "fold_change": fc,
+                    "fold_change": lfc,
+                    "log2_fold_change": lfc,
                     "percent_change": pc,
                     "p_value": mwu_pvalue,
                     "statistic": mwu_statistic,
@@ -527,8 +544,8 @@ def _pdex_on_target(
             pseudobulk(ref_col, geometric_mean=geometric_mean, is_log1p=is_log1p)[0]
         )
 
-        fc = float(
-            fold_change(np.array([target_mean]), np.array([ref_mean]), epsilon)[0]
+        lfc = float(
+            log2_fold_change(np.array([target_mean]), np.array([ref_mean]), epsilon)[0]
         )
         pc = float(
             percent_change(np.array([target_mean]), np.array([ref_mean]), epsilon)[0]
@@ -546,7 +563,8 @@ def _pdex_on_target(
                 "ref_mean": ref_mean,
                 "target_membership": group_mask.size,
                 "ref_membership": ref_membership,
-                "fold_change": fc,
+                "fold_change": lfc,
+                "log2_fold_change": lfc,
                 "percent_change": pc,
                 "p_value": p_value,
                 "statistic": statistic,

diff --git a/src/pdex/_math.py b/src/pdex/_math.py
@@ -106,7 +106,7 @@ def bulk_matrix_geometric(
 
 
 @nb.njit(parallel=True)
-def fold_change(x: np.ndarray, y: np.ndarray, epsilon: float = 0.0) -> np.ndarray:
+def log2_fold_change(x: np.ndarray, y: np.ndarray, epsilon: float = 0.0) -> np.ndarray:
     """Calculates the log2-fold change between two arrays.
 
     When ``epsilon > 0``, adds a small pseudocount to both numerator and

diff --git a/tests/test_math.py b/tests/test_math.py
@@ -1,32 +1,32 @@
-"""Tests for pdex._math (fold_change, percent_change, bulk_matrix_geometric)."""
+"""Tests for pdex._math (log2_fold_change, percent_change, bulk_matrix_geometric)."""
 
 import numpy as np
 
-from pdex._math import bulk_matrix_geometric, fold_change, percent_change
+from pdex._math import bulk_matrix_geometric, log2_fold_change, percent_change
 
 
 class TestFoldChange:
     def test_ratio_of_two(self):
         x = np.array([4.0, 8.0])
         y = np.array([2.0, 4.0])
-        result = fold_change(x, y)
+        result = log2_fold_change(x, y)
         np.testing.assert_allclose(result, [1.0, 1.0])
 
     def test_equal_values(self):
         x = np.array([3.0, 5.0])
-        result = fold_change(x, x)
+        result = log2_fold_change(x, x)
         np.testing.assert_allclose(result, [0.0, 0.0])
 
     def test_half(self):
         x = np.array([1.0])
         y = np.array([2.0])
-        result = fold_change(x, y)
+        result = log2_fold_change(x, y)
         np.testing.assert_allclose(result, [-1.0])
 
     def test_known_values(self):
         x = np.array([1.0, 2.0, 4.0, 8.0])
         y = np.array([1.0, 1.0, 1.0, 1.0])
-        result = fold_change(x, y)
+        result = log2_fold_change(x, y)
         np.testing.assert_allclose(result, [0.0, 1.0, 2.0, 3.0])
 
 
@@ -60,29 +60,31 @@ def test_zero_epsilon_matches_baseline(self):
         """epsilon=0.0 must be identical to calling without it."""
         x = np.array([4.0, 8.0, 0.1])
         y = np.array([2.0, 4.0, 0.001])
-        np.testing.assert_array_equal(fold_change(x, y), fold_change(x, y, 0.0))
+        np.testing.assert_array_equal(
+            log2_fold_change(x, y), log2_fold_change(x, y, 0.0)
+        )
 
     def test_dampens_extreme_fc_from_near_zero_denominator(self):
         """epsilon=0.5 pulls extreme FC toward zero."""
         x = np.array([0.1])
         y = np.array([0.001])
-        fc_raw = fold_change(x, y)[0]
-        fc_dampened = fold_change(x, y, 0.5)[0]
+        fc_raw = log2_fold_change(x, y)[0]
+        fc_dampened = log2_fold_change(x, y, 0.5)[0]
         assert abs(fc_dampened) < abs(fc_raw)
         np.testing.assert_allclose(fc_dampened, np.log2(0.6 / 0.501), rtol=1e-5)
 
     def test_preserves_direction(self):
         """epsilon should not flip the sign of fold change."""
         x = np.array([2.0, 0.5])
         y = np.array([1.0, 1.0])
-        result = fold_change(x, y, 0.5)
+        result = log2_fold_change(x, y, 0.5)
         assert result[0] > 0
         assert result[1] < 0
 
     def test_equal_means_still_zero(self):
         """When target_mean == ref_mean, FC should be 0 regardless of epsilon."""
         x = np.array([0.5, 2.0])
-        result = fold_change(x, x, 0.5)
+        result = log2_fold_change(x, x, 0.5)
         np.testing.assert_allclose(result, [0.0, 0.0])
 
 

diff --git a/tests/test_pdex.py b/tests/test_pdex.py
@@ -15,6 +15,7 @@
     "target_membership",
     "ref_membership",
     "fold_change",
+    "log2_fold_change",
     "percent_change",
     "p_value",
     "statistic",
@@ -664,3 +665,20 @@ def test_all_mode_backed_matches_inmemory(self, small_adata, small_adata_backed)
                 rtol=1e-6,
                 err_msg=f"Mismatch in column {col}",
             )
+
+
+class TestLog2FoldChangeColumn:
+    """Regression test for the `log2_fold_change` column semantics."""
+
+    @pytest.mark.parametrize("mode", ["ref", "all"])
+    def test_log2_fold_change_equals_log2_ratio(self, small_adata, mode):
+        """log2_fold_change == log2(target_mean / ref_mean) on finite entries."""
+        result = pdex(small_adata, groupby="guide", mode=mode, is_log1p=False)
+        target = result["target_mean"].to_numpy()
+        ref = result["ref_mean"].to_numpy()
+        actual = result["log2_fold_change"].to_numpy()
+        with np.errstate(divide="ignore", invalid="ignore"):
+            expected = np.log2(target / ref)
+        finite = np.isfinite(expected) & np.isfinite(actual)
+        assert finite.any()
+        np.testing.assert_allclose(actual[finite], expected[finite], rtol=1e-6)