Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pycytominer/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def consensus(
features : list
A list of strings corresponding to feature measurement column names in the
`profiles` DataFrame. All features listed must be found in `profiles`.
Defaults to "infer". If "infer", then assume cell painting features are those
Defaults to "infer". If "infer", then assume CellProfiler features are those
prefixed with "Cells", "Nuclei", or "Cytoplasm".
output_file : str, optional
If provided, will write consensus profiles to file. If not specified, will
Expand Down
15 changes: 10 additions & 5 deletions pycytominer/cyto_utils/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def infer_cp_features(
metadata=False,
image_features=False,
):
"""Given a dataframe, output features that we expect to be Cell Painting features.
"""Given a CellProfiler input dataframe, output feature column names as a list.

Parameters
----------
Expand All @@ -90,6 +90,8 @@ def infer_cp_features(
Compartments from which Cell Painting features were extracted.
metadata : bool, default False
Whether or not to infer metadata features.
If metadata is set to True, find column names that begin with the `Metadata_` prefix.
This convention is expected by CellProfiler defaults.
image_features : bool, default False
Whether or not the profiles contain image features.

Expand All @@ -115,9 +117,12 @@ def infer_cp_features(
population_df.columns.str.startswith("Metadata_")
].tolist()

assert ( # noqa: S101
len(features) > 0
), "No CP features found. Are you sure this dataframe is from CellProfiler?"
if len(features) == 0:
raise ValueError(
"No features or metadata found. Pycytominer expects CellProfiler column names by default. "
"If you're using non-CellProfiler data, please do not 'infer' features. "
"Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually."
)

return features

Expand Down Expand Up @@ -150,7 +155,7 @@ def drop_outlier_features(
population_df : pandas.core.frame.DataFrame
DataFrame that includes metadata and observation features.
features : list of str or str, default "infer"
Features present in the population dataframe. If "infer", then assume Cell Painting features are those that start with "Cells_", "Nuclei_", or "Cytoplasm_"
Features present in the population dataframe. If "infer", then assume CellProfiler feature conventions (start with "Cells_", "Nuclei_", or "Cytoplasm_")
samples : str, default "all"
List of samples to perform operation on. The function uses a pd.DataFrame.query()
function, so you should structure samples in this fashion. An example is
Expand Down
7 changes: 4 additions & 3 deletions pycytominer/cyto_utils/modz.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,10 @@ def modz(
a string or list of column(s) in the population dataframe that
indicate replicate level information
features : list, default "infer"
List of features present in the population dataframe [default: "infer"]
if "infer", then assume cell painting features are those that start with
"Cells_", "Nuclei_", or "Cytoplasm_".
A list of strings corresponding to feature measurement column names in the
`population_df` DataFrame. All features listed must be found in `population_df`.
Defaults to "infer". If "infer", then assume CellProfiler features are those
prefixed with "Cells", "Nuclei", or "Cytoplasm".
method : str, default "spearman"
indicating which correlation metric to use.
min_weight : float, default 0.01
Expand Down
2 changes: 1 addition & 1 deletion pycytominer/cyto_utils/write_gct.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def write_gct(
features : list
A list of strings corresponding to feature measurement column names in the
`profiles` DataFrame. All features listed must be found in `profiles`.
Defaults to "infer". If "infer", then assume cell painting features are those
Defaults to "infer". If "infer", then assume CellProfiler features are those
prefixed with "Cells", "Nuclei", or "Cytoplasm".
meta_features : list
A list of strings corresponding to metadata column names in the `profiles`
Expand Down
4 changes: 2 additions & 2 deletions pycytominer/feature_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ def feature_select(
----------
profiles : pandas.core.frame.DataFrame or file
DataFrame or file of profiles.
features : list
features : list, default "infer"
A list of strings corresponding to feature measurement column names in the
`profiles` DataFrame. All features listed must be found in `profiles`.
Defaults to "infer". If "infer", then assume cell painting features are those
Defaults to "infer". If "infer", then assume CellProfiler features are those
prefixed with "Cells", "Nuclei", or "Cytoplasm".
image_features: bool, default False
Whether the profiles contain image features.
Expand Down
7 changes: 4 additions & 3 deletions pycytominer/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,15 @@ def normalize(
features : list
A list of strings corresponding to feature measurement column names in the
`profiles` DataFrame. All features listed must be found in `profiles`.
Defaults to "infer". If "infer", then assume cell painting features are those
Defaults to "infer". If "infer", then assume CellProfiler features are those
prefixed with "Cells", "Nuclei", or "Cytoplasm".
image_features: bool, default False
Whether the profiles contain image features.
meta_features : list
A list of strings corresponding to metadata column names in the `profiles`
DataFrame. All features listed must be found in `profiles`. Defaults to "infer".
If "infer", then assume metadata features are those prefixed with "Metadata"
If "infer", then assume CellProfiler metadata features, identified by
column names that begin with the `Metadata_` prefix."
samples : str
The metadata column values to use as a normalization reference. We often use
control samples. The function uses a pd.query() function, so you should
Expand Down Expand Up @@ -114,7 +115,7 @@ def normalize(
normalized_df = normalize(
profiles=data_df,
features=["x", "y", "z", "zz"],
meta_features="infer",
meta_features=["Metadata_plate", "Metadata_treatment"],
samples="Metadata_treatment == 'control'",
method="standardize"
)
Expand Down
7 changes: 4 additions & 3 deletions pycytominer/operations/correlation_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ def correlation_threshold(
population_df : pandas.core.frame.DataFrame
DataFrame that includes metadata and observation features.
features : list, default "infer"
List of features present in the population dataframe [default: "infer"]
if "infer", then assume cell painting features are those that start with
"Cells_", "Nuclei_", or "Cytoplasm_".
A list of strings corresponding to feature measurement column names in the
`population_df` DataFrame. All features listed must be found in `population_df`.
Defaults to "infer". If "infer", then assume CellProfiler features are those
prefixed with "Cells", "Nuclei", or "Cytoplasm".
samples : str, default "all"
List of samples to perform operation on. The function uses a pd.DataFrame.query()
function, so you should structure samples in this fashion. An example is
Expand Down
11 changes: 6 additions & 5 deletions pycytominer/operations/get_na_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ def get_na_columns(population_df, features="infer", samples="all", cutoff=0.05):
population_df : pandas.core.frame.DataFrame
DataFrame that includes metadata and observation features.
features : list, default "infer"
List of features present in the population dataframe [default: "infer"]
if "infer", then assume cell painting features are those that start with
"Cells_", "Nuclei_", or "Cytoplasm_".
A list of strings corresponding to feature measurement column names in the
`profiles` DataFrame. All features listed must be found in `profiles`.
Defaults to "infer". If "infer", then assume CellProfiler features are those
prefixed with "Cells", "Nuclei", or "Cytoplasm".
samples : str, default "all"
List of samples to perform operation on. The function uses a pd.DataFrame.query()
function, so you should structure samples in this fashion. An example is
Expand All @@ -36,8 +37,8 @@ def get_na_columns(population_df, features="infer", samples="all", cutoff=0.05):

if features == "infer":
features = infer_cp_features(population_df)
else:
population_df = population_df.loc[:, features]

population_df = population_df.loc[:, features]

num_rows = population_df.shape[0]
na_prop_df = population_df.isna().sum() / num_rows
Expand Down
7 changes: 4 additions & 3 deletions pycytominer/operations/noise_removal.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ def noise_removal(
The list of unique perturbations corresponding to the rows in population_df. For example,
perturb1_well1 and perturb1_well2 would both be "perturb1".
features : list, default "infer"
List of features present in the population dataframe [default: "infer"]
if "infer", then assume cell painting features are those that start with
"Cells_", "Nuclei_", or "Cytoplasm_".
A list of strings corresponding to feature measurement column names in the
`population_df` DataFrame. All features listed must be found in `population_df`.
Defaults to "infer". If "infer", then assume CellProfiler features are those
prefixed with "Cells", "Nuclei", or "Cytoplasm".
samples : str, default "all"
List of samples to perform operation on. The function uses a pd.DataFrame.query()
function, so you should structure samples in this fashion. An example is
Expand Down
7 changes: 4 additions & 3 deletions pycytominer/operations/variance_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ def variance_threshold(
population_df : pandas.core.frame.DataFrame
DataFrame that includes metadata and observation features.
features : list, default "infer"
List of features present in the population dataframe [default: "infer"]
if "infer", then assume cell painting features are those that start with
"Cells_", "Nuclei_", or "Cytoplasm_".
A list of strings corresponding to feature measurement column names in the
`population_df` DataFrame. All features listed must be found in `population_df`.
Defaults to "infer". If "infer", then assume CellProfiler features are those
prefixed with "Cells", "Nuclei", or "Cytoplasm".
samples : str, default "all"
List of samples to perform operation on. The function uses a pd.DataFrame.query()
function, so you should structure samples in this fashion. An example is
Expand Down
9 changes: 7 additions & 2 deletions tests/test_cyto_utils/test_feature_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,15 @@ def test_feature_infer():


def test_feature_infer_nocp():
with pytest.raises(AssertionError) as nocp:
with pytest.raises(ValueError) as nocp:
infer_cp_features(population_df=non_cp_data_df)

assert "No CP features found." in str(nocp.value)
expected_message = (
"No features or metadata found. Pycytominer expects CellProfiler column names by default. "
"If you're using non-CellProfiler data, please do not 'infer' features. "
"Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually."
)
assert expected_message in str(nocp.value)


def test_metadata_feature_infer():
Expand Down
9 changes: 7 additions & 2 deletions tests/test_operations/test_correlation_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def test_correlation_threshold_samples():


def test_correlation_threshold_featureinfer():
with pytest.raises(AssertionError) as nocp:
with pytest.raises(ValueError) as nocp:
correlation_threshold_result = correlation_threshold(
population_df=data_df,
features="infer",
Expand All @@ -84,7 +84,12 @@ def test_correlation_threshold_featureinfer():
method="pearson",
)

assert "No CP features found." in str(nocp.value)
expected_message = (
"No features or metadata found. Pycytominer expects CellProfiler column names by default. "
"If you're using non-CellProfiler data, please do not 'infer' features. "
"Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually."
)
assert expected_message in str(nocp.value)

data_cp_df = data_df.copy()
data_cp_df.columns = [f"Cells_{x}" for x in data_df.columns]
Expand Down
9 changes: 7 additions & 2 deletions tests/test_operations/test_get_na_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,14 @@ def test_get_na_columns_sample():


def test_get_na_columns_featureinfer():
with pytest.raises(AssertionError) as nocp:
with pytest.raises(ValueError) as nocp:
get_na_columns(
population_df=data_df, samples="all", features="infer", cutoff=0.1
)

assert "No CP features found." in str(nocp.value)
expected_message = (
"No features or metadata found. Pycytominer expects CellProfiler column names by default. "
"If you're using non-CellProfiler data, please do not 'infer' features. "
"Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually."
)
assert expected_message in str(nocp.value)
9 changes: 7 additions & 2 deletions tests/test_operations/test_variance_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,17 @@ def test_variance_threshold():

def test_variance_threshold_featureinfer():
unique_cut = 0.01
with pytest.raises(AssertionError) as nocp:
with pytest.raises(ValueError) as nocp:
excluded_features = variance_threshold(
population_df=data_unique_test_df, features="infer", unique_cut=unique_cut
)

assert "No CP features found." in str(nocp.value)
expected_message = (
"No features or metadata found. Pycytominer expects CellProfiler column names by default. "
"If you're using non-CellProfiler data, please do not 'infer' features. "
"Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually."
)
assert expected_message in str(nocp.value)

data_cp_df = data_unique_test_df.copy()
data_cp_df.columns = [f"Cells_{x}" for x in data_unique_test_df.columns]
Expand Down