Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 47 additions & 6 deletions pymrio/core/mriosystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@
# warnings.warn(message, DeprecationWarning, stacklevel=2)


# Constants for column name validation
ALTERNATIVE_REGION_NAMES = ["country", "countries", "regions", "reg", "location"]
ALTERNATIVE_SECTOR_NAMES = ["sectors", "industry", "industries", "sec", "activity"]


# Exceptions
class ResetError(Exception):
"""Base class for errors while reseting the system."""
Expand Down Expand Up @@ -1788,12 +1793,48 @@ def characterize(
)
return ret_value(validation=validation, extension=None)

fac_calc = (
factors.set_index(index_col + [characterized_name_column])
.loc[:, characterization_factors_column]
.unstack(characterized_name_column)
.fillna(0)
)
# Check for duplicate indices before unstacking
# This can happen if region/sector columns are not named correctly
factors_indexed = factors.set_index(index_col + [characterized_name_column])

if factors_indexed.index.duplicated().any():
# Build helpful error message
error_msg = (
"Duplicate indices found in characterization factors. "
"This typically occurs when region or sector specific characterization factors "
"are provided but the column names don't match pymrio's expectations.\n\n"
"Expected column names:\n"
" - 'region' (lowercase) for region-specific factors\n"
" - 'sector' (lowercase) for sector-specific factors\n\n"
f"Current columns in factors dataframe: {list(factors.columns)}\n\n"
)

# Check for case mismatches
possible_region_cols = [col for col in factors.columns if col.lower() == "region" and col != "region"]
possible_sector_cols = [col for col in factors.columns if col.lower() == "sector" and col != "sector"]

if possible_region_cols:
error_msg += f"Found possible region column with different case: {possible_region_cols}\n"
error_msg += "Please rename it to 'region' (lowercase).\n"
if possible_sector_cols:
error_msg += f"Found possible sector column with different case: {possible_sector_cols}\n"
error_msg += "Please rename it to 'sector' (lowercase).\n"

# Check for alternative column names if no case mismatch found
if not possible_region_cols and not possible_sector_cols:
found_region_alternatives = [col for col in factors.columns if col.lower() in ALTERNATIVE_REGION_NAMES]
found_sector_alternatives = [col for col in factors.columns if col.lower() in ALTERNATIVE_SECTOR_NAMES]

if found_region_alternatives:
error_msg += f"Found possible alternative region column names: {found_region_alternatives}\n"
error_msg += "Please rename to 'region' (lowercase) if these are region identifiers.\n"
if found_sector_alternatives:
error_msg += f"Found possible alternative sector column names: {found_sector_alternatives}\n"
error_msg += "Please rename to 'sector' (lowercase) if these are sector identifiers.\n"

raise ValueError(error_msg)

fac_calc = factors_indexed.loc[:, characterization_factors_column].unstack(characterized_name_column).fillna(0)

new_ext = Extension(name=name)

Expand Down
50 changes: 50 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,56 @@ def test_characterize_extension_over_extensions(fix_testmrio):
pdt.assert_frame_equal(ex_reg_one.unit, ex_reg_method.unit)


def test_characterize_error_wrong_column_names(fix_testmrio):
"""Test improved error message when region/sector column names are wrong."""
tmrio = fix_testmrio.testmrio

# Load region-specific factors
factors_reg_spec = pd.read_csv(
Path(PYMRIO_PATH["test_mrio"] / Path("concordance") / "emissions_charact_reg_spec.tsv"),
sep="\t",
)

# Test case 1: Wrong case for region column (Region instead of region)
factors_wrong_case = factors_reg_spec.copy()
factors_wrong_case = factors_wrong_case.rename(columns={"region": "Region"})

with pytest.raises(ValueError) as exc_info:
tmrio.emissions.characterize(factors_wrong_case)

error_msg = str(exc_info.value)
assert "Duplicate indices found" in error_msg
assert "column names don't match pymrio's expectations" in error_msg
assert "'region' (lowercase)" in error_msg
assert "Found possible region column with different case: ['Region']" in error_msg

# Test case 2: Alternative column name (country instead of region)
factors_country = factors_reg_spec.copy()
factors_country = factors_country.rename(columns={"region": "country"})

with pytest.raises(ValueError) as exc_info:
tmrio.emissions.characterize(factors_country)

error_msg = str(exc_info.value)
assert "Duplicate indices found" in error_msg
assert "Found possible alternative region column names: ['country']" in error_msg

# Test case 3: Abbreviated column name (reg instead of region)
factors_reg = factors_reg_spec.copy()
factors_reg = factors_reg.rename(columns={"region": "reg"})

with pytest.raises(ValueError) as exc_info:
tmrio.emissions.characterize(factors_reg)

error_msg = str(exc_info.value)
assert "Duplicate indices found" in error_msg
assert "Found possible alternative region column names: ['reg']" in error_msg

# Test case 4: Verify correct column names still work
result = tmrio.emissions.characterize(factors_reg_spec)
assert result.extension is not None


def test_extension_convert_simple(fix_testmrio):
"""Testing the convert function within extensions object."""
tt_pre = fix_testmrio.testmrio.copy()
Expand Down