Skip to content

Commit 87d485c

Browse files
authored
Merge pull request #36 from ArcInstitute/31-set-default-values-for-unstable-fold-changes
31 set default values for unstable fold changes
2 parents cfba1cc + 8c40668 commit 87d485c

2 files changed

Lines changed: 47 additions & 3 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "pdex"
3-
version = "0.1.17"
3+
version = "0.1.18"
44
description = "Parallel differential expression for single-cell perturbation sequencing"
55
readme = "README.md"
66
authors = [{ name = "noam teyssier", email = "noam.teyssier@arcinstitute.org" }]

src/pdex/_single_cell.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,43 @@ def _process_target_batch_shm(
9393
tie_correct: bool = False,
9494
is_log1p: bool = False,
9595
exp_post_agg: bool = True,
96+
clip_value: float | int | None = 20,
9697
**kwargs,
9798
) -> list[dict[str, float]]:
9899
"""Process a batch of target gene and feature combinations.
99100
100101
This is the function that is parallelized across multiple workers.
102+
103+
Arguments
104+
=========
105+
batch_tasks: list[tuple]
106+
List of tuples containing target mask, reference mask, variable index,
107+
target name, reference name, and variable name.
108+
shm_name: str
109+
Name of the shared memory object.
110+
shape: tuple[int, int]
111+
Shape of the matrix.
112+
dtype: np.dtype
113+
Data type of the matrix.
114+
metric: str
115+
Metric to use for processing.
116+
tie_correct: bool = False
117+
Whether to correct for ties.
118+
is_log1p: bool = False
119+
Whether to apply log1p transformation.
120+
exp_post_agg: bool = True
121+
Whether to apply exponential post-aggregation.
122+
clip_value: float | int | None
123+
Default clip value used when log-fold-changes would be NaN or Inf.
124+
Ignore clipping if set to None.
125+
fold_change = (
126+
1/default_clip_value
127+
if fold_change == inf
128+
else default_clip_value
129+
if fold_change == 0
130+
else fold_change
131+
)
132+
**kwargs: Additional keyword arguments.
101133
"""
102134
# Open shared memory once for the batch
103135
existing_shm = SharedMemory(name=shm_name)
@@ -121,7 +153,7 @@ def _process_target_batch_shm(
121153
μ_tgt = _sample_mean(x_tgt, is_log1p=is_log1p, exp_post_agg=exp_post_agg)
122154
μ_ref = _sample_mean(x_ref, is_log1p=is_log1p, exp_post_agg=exp_post_agg)
123155

124-
fc = _fold_change(μ_tgt, μ_ref)
156+
fc = _fold_change(μ_tgt, μ_ref, clip_value=clip_value)
125157
pcc = _percent_change(μ_tgt, μ_ref)
126158

127159
(pval, stat) = (1.0, np.nan) # default output in case of failure
@@ -217,10 +249,18 @@ def _sample_mean(
217249
def _fold_change(
218250
μ_tgt: float,
219251
μ_ref: float,
252+
clip_value: float | int | None = 20,
220253
) -> float:
221254
"""Calculate the fold change between two means."""
255+
# The fold change is infinite so clip to default value
222256
if μ_ref == 0:
223-
return np.nan
257+
return np.nan if clip_value is None else clip_value
258+
259+
# The fold change is zero so clip to 1 / default value
260+
if μ_tgt == 0:
261+
return 0 if clip_value is None else 1 / clip_value
262+
263+
# Return the fold change
224264
return μ_tgt / μ_ref
225265

226266

@@ -245,6 +285,7 @@ def parallel_differential_expression(
245285
tie_correct: bool = True,
246286
is_log1p: bool | None = None,
247287
exp_post_agg: bool = True,
288+
clip_value: float | int | None = 20.0,
248289
as_polars: bool = False,
249290
**kwargs,
250291
) -> pd.DataFrame | pl.DataFrame:
@@ -274,6 +315,8 @@ def parallel_differential_expression(
274315
exp_post_agg: bool
275316
Whether to perform exponential post-aggregation for calculating fold change
276317
(default: perform exponential post-aggregation)
318+
clip_value: float | int | None
319+
Value to clip fold change to if it is infinite or NaN (default: 20.0). Set to None to disable clipping.
277320
as_polars: bool
278321
return the output dataframe as a polars dataframe
279322
**kwargs:
@@ -349,6 +392,7 @@ def parallel_differential_expression(
349392
tie_correct=tie_correct,
350393
is_log1p=is_log1p,
351394
exp_post_agg=exp_post_agg,
395+
clip_value=clip_value,
352396
**kwargs,
353397
)
354398

0 commit comments

Comments
 (0)