neuroinformatics-unit · HenryCrosswell · Jun 3, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/oscar/breeding_scheme.py b/oscar/breeding_scheme.py
@@ -34,6 +34,11 @@ def from_string(cls, genotype_str: str) -> tuple[Self, ...]:
         tuple[Self, ...]
             Converted tuple of genotypes
         """
+        if not isinstance(genotype_str, str):
+            raise TypeError(
+                f"expected type - string, actual type - {type(genotype_str)}"
+            )
+
         genotype_strings = genotype_str.split("_")
         genotypes = [
             cls[genotype_string.upper()]

diff --git a/oscar/colony_management/pyrat/standardise.py b/oscar/colony_management/pyrat/standardise.py
@@ -3,7 +3,7 @@
 
 import pandas as pd
 
-from oscar.breeding_scheme import Genotype
+from oscar.breeding_scheme import BreedingScheme, Genotype
 
 
 class Identifier(Enum):
@@ -94,6 +94,9 @@ def standardise_pyrat_csv(
     id_offspring_col = standard_csv.pop("ID_offspring")
     standard_csv.insert(0, "ID_offspring", id_offspring_col)
 
+    to_ignore = standard_csv.apply(_remove_impossible_breeding_schemes, axis=1)
+    standard_csv = standard_csv[~to_ignore]
-    to_ignore = standard_csv.apply(_remove_impossible_breeding_schemes, axis=1)
-    standard_csv = standard_csv[~to_ignore]
+    impossible_breeding_schemes = standard_csv.apply(_is_impossible_breeding_scheme, axis=1)
+    standard_csv = standard_csv[~impossible_breeding_schemes]
-    to_ignore = standard_csv.apply(_remove_impossible_breeding_schemes, axis=1)
-    standard_csv = standard_csv[~to_ignore]
+    impossible_breeding_schemes = standard_csv.apply(_is_impossible_breeding_scheme, axis=1)
+    standard_csv = standard_csv[~impossible_breeding_schemes]
+
     return standard_csv
 
 
@@ -384,3 +387,41 @@ def _make_combined_genotype_column_for_identifier(
     line_data.loc[genotyped_rows, new_col_name] = pivoted_mutations.loc[
         genotyped_rows, unique_mutations
     ].agg("_".join, axis=1)
+
+
+def _remove_impossible_breeding_schemes(
-def _remove_impossible_breeding_schemes(
+def _is_impossible_breeding_scheme(
-def _remove_impossible_breeding_schemes(
+def _is_impossible_breeding_scheme(
+    standardised_df_row: pd.Series,
+) -> bool:
+    """Retrieves parent genotypes and pulls the mendalian ratios from
+    BreedingScheme.
+    Compares offspring to these ratios, removing those which are not possible.
+    e.g. hom x hom parents cannot produce wt x wt offspring.
-) -> bool:
-    """Retrieves parent genotypes and pulls the mendalian ratios from
-    BreedingScheme.
-    Compares offspring to these ratios, removing those which are not possible.
-    e.g. hom x hom parents cannot produce wt x wt offspring.
+) -> bool:
+    """
+    Checks whether the given row contains an impossible breeding scheme.
+    
+    Retrieves parent genotypes and pulls the mendalian ratios from
+    BreedingScheme. Compares offspring to these ratios, returning True for 
+    those which are not possible.
+    e.g. hom x hom parents cannot produce wt offspring.
-) -> bool:
-    """Retrieves parent genotypes and pulls the mendalian ratios from
-    BreedingScheme.
-    Compares offspring to these ratios, removing those which are not possible.
-    e.g. hom x hom parents cannot produce wt x wt offspring.
+) -> bool:
+    """
+    Checks whether the given row contains an impossible breeding scheme.
+    
+    Retrieves parent genotypes and pulls the mendalian ratios from
+    BreedingScheme. Compares offspring to these ratios, returning True for 
+    those which are not possible.
+    e.g. hom x hom parents cannot produce wt offspring.
+
+    Parameters
+    ----------
+    standardised_df_row : pd.Series
+        rom from standardised_dataframe (pd.DataFrame): standardised PyRAT df
-        rom from standardised_dataframe (pd.DataFrame): standardised PyRAT df
+        row from standardised_dataframe (pd.DataFrame): standardised PyRAT df
-        rom from standardised_dataframe (pd.DataFrame): standardised PyRAT df
+        row from standardised_dataframe (pd.DataFrame): standardised PyRAT df
+
+    Returns
+    -------
+    bool
+        bool of whether or not that row contains an impossible breeding scheme
+    """
+
+    pop = False
+
+    genotype_father = standardised_df_row["genotype_father"]
+    genotype_mother = standardised_df_row["genotype_mother"]
+    genotype_offspring = standardised_df_row["genotype_offspring"]
+
+    typed_offspring = Genotype.from_string(genotype_offspring)
+
+    scheme = BreedingScheme(genotype_father, genotype_mother)
+    ratio = scheme.mendelian_ratio()
+
+    if typed_offspring not in ratio:
+        pop = True
+    elif ratio[typed_offspring] == 0:
+        pop = True
+
+    return pop
diff --git a/tests/pooch_registry.txt b/tests/pooch_registry.txt
@@ -5,7 +5,9 @@ standardised-data-2-mutations.csv 7bf5409814715013806602afbfc87f9a710f3ecf7aa864
 pyrat-data-3-mutations.csv e6f95c47a3d4db6d1c7ec75e89de013f1b1e3331d8609fcd4be17c61ba21bece
 standardised-data-3-mutations.csv f598f88afe11aa6bd99cddd6b30fa2a2ba36b473daa22ad46f9171bcad3a1d71
 pyrat-data-forbidden-genotypes.csv 4fccb7fda7167d7e3253bad338e6fda03c5d8012362bbf7fc11d7ee4c0ec4487
+pyrat-data-forbidden-schemes.csv c5c14c144b036eadcbea5c3ba5ce99ea5948fd51b27f4c6e3fbdd5f13ce4a7a5
 standardised-data-forbidden-genotypes.csv 4859e1b97ec296a707f1d490464e4a8d18939ff1c26c9c9a604c7744580c791b
+standardised-data-forbidden-schemes.csv d87d28762d4d2ab1bac1390e4c11927516fa881a3cef1bfe21e03540721072b7
 pyrat-api-single-response.csv 14314c56a153d048c0c86d2253ce8c85a965fcbb2e9d2a8f37f4d5a58793b287
 pyrat-api-single-response-father.json 9bab0bd1c9810841dcf76e83f788f6ee61431211643252eb3fe4c6e0a365483e
 pyrat-api-single-response-mother.json cffb747c5b09a73b37c13e281b16c7292e5577cf02e0c0b35588f5c5640d9fd9

diff --git a/tests/pooch_test_data.py b/tests/pooch_test_data.py
@@ -4,7 +4,7 @@
 
 GIN_REPO = pooch.create(
     path=Path(__file__).parents[1] / "test_data",
-    base_url="https://gin.g-node.org/neuroinformatics/oscar-test-data/raw/master/",
+    base_url="https://gin.g-node.org/neuroinformatics/oscar-test-data/raw/hc/add-forbidden-schemes",
     registry=None,
     retry_if_failed=5,
 )

diff --git a/tests/test_unit/pyrat/test_standardise.py b/tests/test_unit/pyrat/test_standardise.py
@@ -62,6 +62,27 @@ def test_standardise_genotypes():
         pooch_data_path("standardised-data-forbidden-genotypes.csv")
     )
 
+    with pytest.raises(TypeError):
+        standard_csv = standardise_pyrat_csv(pyrat_csv)
+        pd.testing.assert_frame_equal(
+            standard_csv.reset_index(drop=True),
+            expected_csv.reset_index(drop=True),
+        )
+
+
+def test_remove_impossible_breeding_schemes():
+    """
+    Test that impossible breeding schemes are removed from raw data.
+    (e.g. hom x hom parents cannot make wt offspring)
+    """
+
+    pyrat_csv = pd.read_csv(
+        pooch_data_path("pyrat-data-forbidden-schemes.csv")
+    )
+    expected_csv = pd.read_csv(
+        pooch_data_path("standardised-data-forbidden-schemes.csv")
+    )
+
     standard_csv = standardise_pyrat_csv(pyrat_csv)
     pd.testing.assert_frame_equal(
         standard_csv.reset_index(drop=True),