@@ -93,11 +93,43 @@ def _process_target_batch_shm(
9393 tie_correct : bool = False ,
9494 is_log1p : bool = False ,
9595 exp_post_agg : bool = True ,
96+ clip_value : float | int | None = 20 ,
9697 ** kwargs ,
9798) -> list [dict [str , float ]]:
9899 """Process a batch of target gene and feature combinations.
99100
100101 This is the function that is parallelized across multiple workers.
102+
103+ Arguments
104+ =========
105+ batch_tasks: list[tuple]
106+ List of tuples containing target mask, reference mask, variable index,
107+ target name, reference name, and variable name.
108+ shm_name: str
109+ Name of the shared memory object.
110+ shape: tuple[int, int]
111+ Shape of the matrix.
112+ dtype: np.dtype
113+ Data type of the matrix.
114+ metric: str
115+ Metric to use for processing.
116+ tie_correct: bool = False
117+ Whether to correct for ties.
118+ is_log1p: bool = False
119+ Whether to apply log1p transformation.
120+ exp_post_agg: bool = True
121+ Whether to apply exponential post-aggregation.
122+ clip_value: float | int | None
123+ Default clip value used when log-fold-changes would be NaN or Inf.
124+ Ignore clipping if set to None.
125+ fold_change = (
126+ 1/default_clip_value
127+ if fold_change == inf
128+ else default_clip_value
129+ if fold_change == 0
130+ else fold_change
131+ )
132+ **kwargs: Additional keyword arguments.
101133 """
102134 # Open shared memory once for the batch
103135 existing_shm = SharedMemory (name = shm_name )
@@ -121,7 +153,7 @@ def _process_target_batch_shm(
121153 μ_tgt = _sample_mean (x_tgt , is_log1p = is_log1p , exp_post_agg = exp_post_agg )
122154 μ_ref = _sample_mean (x_ref , is_log1p = is_log1p , exp_post_agg = exp_post_agg )
123155
124- fc = _fold_change (μ_tgt , μ_ref )
156+ fc = _fold_change (μ_tgt , μ_ref , clip_value = clip_value )
125157 pcc = _percent_change (μ_tgt , μ_ref )
126158
127159 (pval , stat ) = (1.0 , np .nan ) # default output in case of failure
@@ -217,10 +249,18 @@ def _sample_mean(
217249def _fold_change (
218250 μ_tgt : float ,
219251 μ_ref : float ,
252+ clip_value : float | int | None = 20 ,
220253) -> float :
221254 """Calculate the fold change between two means."""
255+ # The fold change is infinite so clip to default value
222256 if μ_ref == 0 :
223- return np .nan
257+ return np .nan if clip_value is None else clip_value
258+
259+ # The fold change is zero so clip to 1 / default value
260+ if μ_tgt == 0 :
261+ return 0 if clip_value is None else 1 / clip_value
262+
263+ # Return the fold change
224264 return μ_tgt / μ_ref
225265
226266
@@ -245,6 +285,7 @@ def parallel_differential_expression(
245285 tie_correct : bool = True ,
246286 is_log1p : bool | None = None ,
247287 exp_post_agg : bool = True ,
288+ clip_value : float | int | None = 20.0 ,
248289 as_polars : bool = False ,
249290 ** kwargs ,
250291) -> pd .DataFrame | pl .DataFrame :
@@ -274,6 +315,8 @@ def parallel_differential_expression(
274315 exp_post_agg: bool
275316 Whether to perform exponential post-aggregation for calculating fold change
276317 (default: perform exponential post-aggregation)
318+ clip_value: float | int | None
319+ Value to clip fold change to if it is infinite or NaN (default: 20.0). Set to None to disable clipping.
277320 as_polars: bool
278321 return the output dataframe as a polars dataframe
279322 **kwargs:
@@ -349,6 +392,7 @@ def parallel_differential_expression(
349392 tie_correct = tie_correct ,
350393 is_log1p = is_log1p ,
351394 exp_post_agg = exp_post_agg ,
395+ clip_value = clip_value ,
352396 ** kwargs ,
353397 )
354398
0 commit comments