Skip to content

include process mixture (GMM assignment) as a standalone function #34

@noamteyssier

Description

@noamteyssier
import numpy as np
import anndata as ad
import polars as pl
from glob import glob

from geomux import gaussian_mixture

def process_mixture(crispr_path: str, gex_path: str):
    # Load anndata
    cr_adata = ad.read_h5ad(crispr_path)

    # Process mixture
    experiments = cr_adata.obs.experiment.unique()
    barcodes = cr_adata.obs.bc_idx.unique()
    lanes = cr_adata.obs.lane_id.unique()

    assignments = []
    for exp in experiments:
        for bc_idx in barcodes:
            for lane_id in lanes:
                subset = cr_adata[
                    (cr_adata.obs.bc_idx == bc_idx)
                    & (cr_adata.obs.lane_id == lane_id)
                    & (cr_adata.obs.experiment == exp)
                ]
                subset_assignment = gaussian_mixture(subset).with_columns(
                    bc_idx=pl.lit(bc_idx).str.replace("CR", "BC"),
                    lane_id=pl.lit(lane_id),
                    experiment=pl.lit(exp),
                )
                assignments.append(subset_assignment)
    assignments = pl.concat(assignments)

    # load gex as backed
    gex_adata = ad.read_h5ad(gex_path, backed="r")
    gex_adata.obs["cell"] = gex_adata.obs.index.values

    # merge the two
    df = assignments.drop(["cell_id", "guide_ids_original"]).to_pandas()
    gex_adata.obs = gex_adata.obs.merge(
        df,
        how="left",
        on=["cell", "bc_idx", "lane_id", "experiment"],
        suffixes=["_geomux", "_mixture"],
    )
    gex_adata.obs["assignment"] = gex_adata.obs["assignment_mixture"]
    gex_adata.obs["moi"] = gex_adata.obs["moi_mixture"]
    gex_adata.obs["umis"] = gex_adata.obs["umis_mixture"]

    # write gex out
    gex_adata.write_h5ad(
        gex_path.replace("_gex.h5ad", "_gex-mixture.h5ad")
    )



for crispr_path in glob("./aggr/*/*_crispr.h5ad"):
    gex_path = crispr_path.replace("_crispr.h5ad", "_gex.h5ad")
    print(f"Processing: {crispr_path} and {gex_path}")
    process_mixture(crispr_path, gex_path)

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions