Merge pull request #36 from ArcInstitute/31-set-default-values-for-unstable-fold-changes

noamteyssier · web-flow · commit 87d485c45f2b · 2025-06-12T14:05:12.000-07:00
31 set default values for unstable fold changes
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "pdex"
-version = "0.1.17"
+version = "0.1.18"
 description = "Parallel differential expression for single-cell perturbation sequencing"
 readme = "README.md"
 authors = [{ name = "noam teyssier", email = "noam.teyssier@arcinstitute.org" }]
diff --git a/src/pdex/_single_cell.py b/src/pdex/_single_cell.py
@@ -93,11 +93,43 @@ def _process_target_batch_shm(
     tie_correct: bool = False,
     is_log1p: bool = False,
     exp_post_agg: bool = True,
+    clip_value: float | int | None = 20,
     **kwargs,
 ) -> list[dict[str, float]]:
     """Process a batch of target gene and feature combinations.
 
     This is the function that is parallelized across multiple workers.
+
+    Arguments
+    =========
+    batch_tasks: list[tuple]
+        List of tuples containing target mask, reference mask, variable index,
+        target name, reference name, and variable name.
+    shm_name: str
+        Name of the shared memory object.
+    shape: tuple[int, int]
+        Shape of the matrix.
+    dtype: np.dtype
+        Data type of the matrix.
+    metric: str
+        Metric to use for processing.
+    tie_correct: bool = False
+        Whether to correct for ties.
+    is_log1p: bool = False
+        Whether to apply log1p transformation.
+    exp_post_agg: bool = True
+        Whether to apply exponential post-aggregation.
+    clip_value: float | int | None
+        Default clip value used when log-fold-changes would be NaN or Inf.
+        Ignore clipping if set to None.
+        fold_change = (
+            1/default_clip_value
+            if fold_change == inf
+            else default_clip_value
+            if fold_change == 0
+            else fold_change
+        )
+    **kwargs: Additional keyword arguments.
     """
     # Open shared memory once for the batch
     existing_shm = SharedMemory(name=shm_name)
@@ -121,7 +153,7 @@ def _process_target_batch_shm(
         μ_tgt = _sample_mean(x_tgt, is_log1p=is_log1p, exp_post_agg=exp_post_agg)
         μ_ref = _sample_mean(x_ref, is_log1p=is_log1p, exp_post_agg=exp_post_agg)
 
-        fc = _fold_change(μ_tgt, μ_ref)
+        fc = _fold_change(μ_tgt, μ_ref, clip_value=clip_value)
         pcc = _percent_change(μ_tgt, μ_ref)
 
         (pval, stat) = (1.0, np.nan)  # default output in case of failure
@@ -217,10 +249,18 @@ def _sample_mean(
 def _fold_change(
     μ_tgt: float,
     μ_ref: float,
+    clip_value: float | int | None = 20,
 ) -> float:
     """Calculate the fold change between two means."""
+    # The fold change is infinite so clip to default value
     if μ_ref == 0:
-        return np.nan
+        return np.nan if clip_value is None else clip_value
+
+    # The fold change is zero so clip to 1 / default value
+    if μ_tgt == 0:
+        return 0 if clip_value is None else 1 / clip_value
+
+    # Return the fold change
     return μ_tgt / μ_ref
 
 
@@ -245,6 +285,7 @@ def parallel_differential_expression(
     tie_correct: bool = True,
     is_log1p: bool | None = None,
     exp_post_agg: bool = True,
+    clip_value: float | int | None = 20.0,
     as_polars: bool = False,
     **kwargs,
 ) -> pd.DataFrame | pl.DataFrame:
@@ -274,6 +315,8 @@ def parallel_differential_expression(
     exp_post_agg: bool
         Whether to perform exponential post-aggregation for calculating fold change
         (default: perform exponential post-aggregation)
+    clip_value: float | int | None
+        Value to clip fold change to if it is infinite or NaN (default: 20.0). Set to None to disable clipping.
     as_polars: bool
         return the output dataframe as a polars dataframe
     **kwargs:
@@ -349,6 +392,7 @@ def parallel_differential_expression(
         tie_correct=tie_correct,
         is_log1p=is_log1p,
         exp_post_agg=exp_post_agg,
+        clip_value=clip_value,
         **kwargs,
     )