Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "pdex"
version = "0.1.15"
version = "0.1.16"
description = "Parallel differential expression for single-cell perturbation sequencing"
readme = "README.md"
authors = [{ name = "noam teyssier", email = "noam.teyssier@arcinstitute.org" }]
Expand Down
26 changes: 15 additions & 11 deletions src/pdex/_single_cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import numpy as np
import pandas as pd
import polars as pl
from adjustpy import adjust
from scipy.sparse import csr_matrix
from adjustpy import adjust # type: ignore
from scipy.sparse import csc_matrix, csr_matrix
from scipy.stats import anderson_ksamp, mannwhitneyu, ttest_ind
from tqdm import tqdm

Expand All @@ -24,13 +24,17 @@


def _build_shared_matrix(
data: np.ndarray | np.matrix | csr_matrix,
data: np.ndarray | np.matrix | csr_matrix | csc_matrix,
) -> tuple[str, tuple[int, int], np.dtype]:
"""Create a shared memory matrix from a numpy array."""
if isinstance(data, np.matrix):
data = np.asarray(data)
elif isinstance(data, csr_matrix):
elif isinstance(data, csr_matrix) or isinstance(data, csc_matrix):
data = data.toarray()

# data should be a numpy array at this point
assert isinstance(data, np.ndarray)

shared_matrix = SharedMemory(create=True, size=data.nbytes)
matrix = np.ndarray(data.shape, dtype=data.dtype, buffer=shared_matrix.buf)
matrix[:] = data
Expand All @@ -48,8 +52,8 @@ def _combinations_generator(
target_masks: dict[str, np.ndarray],
var_indices: dict[str, int],
reference: str,
target_list: list[str],
feature_list: list[str],
target_list: list[str] | np.ndarray,
feature_list: list[str] | np.ndarray,
) -> Iterator[tuple]:
"""Generate all combinations of target genes and features."""
for target in target_list:
Expand Down Expand Up @@ -137,10 +141,10 @@ def _process_target_batch_shm(
pval, stat = (de_result.pvalue, de_result.statistic)
case "anderson":
de_result = anderson_ksamp([x_tgt, x_ref], **kwargs)
pval, stat = (de_result.pvalue, de_result.statistic)
pval, stat = (de_result.pvalue, de_result.statistic) # type: ignore (has attributes pvalue and statistic)
case "t-test":
de_result = ttest_ind(x_tgt, x_ref, **kwargs)
pval, stat = (de_result.pvalue, de_result.statistic)
pval, stat = (de_result.pvalue, de_result.statistic) # type: ignore (has attributes pvalue and statistic)
case _:
raise KeyError(f"Unknown Metric: {metric}")
except ValueError:
Expand Down Expand Up @@ -282,14 +286,14 @@ def parallel_differential_expression(
if metric not in KNOWN_METRICS:
raise ValueError(f"Unknown metric: {metric} :: Expecting: {KNOWN_METRICS}")

unique_targets = adata.obs[groupby_key].unique()
unique_targets = np.array(adata.obs[groupby_key].unique())
if groups is not None:
unique_targets = [
target
for target in unique_targets
if target in groups or target == reference
]
unique_features = adata.var.index
unique_features = np.array(adata.var.index)

if not is_log1p:
is_log1p = guess_is_log(adata)
Expand Down Expand Up @@ -318,7 +322,7 @@ def parallel_differential_expression(

# Isolate the data matrix from the AnnData object
logger.info("Creating shared memory memory matrix for parallel computing")
(shm_name, shape, dtype) = _build_shared_matrix(data=adata.X)
(shm_name, shape, dtype) = _build_shared_matrix(data=adata.X) # type: ignore

logger.info(f"Creating generator of all combinations: N={n_combinations}")
combinations = _combinations_generator(
Expand Down
4 changes: 2 additions & 2 deletions tests/test_pbdex.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ def build_random_anndata(
]

return ad.AnnData(
X=np.random.randint(0, MAX_UMI, size=(n_cells, n_genes)),
X=np.random.randint(0, int(MAX_UMI), size=(n_cells, n_genes)),
obs=obs,
var=pd.DataFrame(index=[f"gene.{j}" for j in np.arange(N_GENES)]),
var=pd.DataFrame(index=np.array([f"gene.{j}" for j in np.arange(N_GENES)])),
)


Expand Down
8 changes: 4 additions & 4 deletions tests/test_pdex.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def build_random_anndata(
if random_state is not None:
np.random.seed(random_state)
return ad.AnnData(
X=np.random.randint(0, MAX_UMI, size=(n_cells, n_genes)),
X=np.random.randint(0, int(MAX_UMI), size=(n_cells, n_genes)),
obs=pd.DataFrame(
{
pert_col: np.random.choice(
Expand All @@ -55,7 +55,7 @@ def test_dex_dense_array():

def test_dex_dense_array_log():
adata = build_random_anndata()
adata.X = np.log1p(adata.X)
adata.X = np.log1p(adata.X) # type: ignore
results = parallel_differential_expression(
adata,
reference=CONTROL_VAR,
Expand All @@ -66,7 +66,7 @@ def test_dex_dense_array_log():

def test_dex_dense_array_log_post_agg():
adata = build_random_anndata()
adata.X = np.log1p(adata.X)
adata.X = np.log1p(adata.X) # type: ignore
results = parallel_differential_expression(
adata,
reference=CONTROL_VAR,
Expand All @@ -78,7 +78,7 @@ def test_dex_dense_array_log_post_agg():

def test_dex_dense_matrix():
adata = build_random_anndata()
adata.X = np.matrix(adata.X)
adata.X = np.matrix(adata.X) # type: ignore
results = parallel_differential_expression(
adata,
reference=CONTROL_VAR,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def build_anndata(log=False) -> ad.AnnData:
return ad.AnnData(
X=np.random.random(size=dim)
if log
else np.random.randint(0, MAX_COUNT, size=dim)
else np.random.randint(0, int(MAX_COUNT), size=dim)
)


Expand Down