pysal · FirePheonix · Jun 6, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 20, 2026
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -57,4 +57,16 @@ Tooling to determine the optimal bandwidths of geographically weighted models.
 .. autosummary::
    :toctree: generated/
 
-   BandwidthSearch
+   BandwidthSearch
+
+Decomposition
+-------------
+
+Geographically weighted decomposition models.
+
+.. currentmodule:: spatialml.decomposition
+.. autosummary::
+   :toctree: generated/
+
+   BaseDecomposition
+   GWPCA
diff --git a/gwpca_demo.ipynb b/gwpca_demo.ipynb
diff --git a/spatialml/__init__.py b/spatialml/__init__.py
@@ -1,7 +1,7 @@
 import contextlib
 from importlib.metadata import PackageNotFoundError, version
 
-from . import base, ensemble, linear_model, search
+from . import base, decomposition, ensemble, linear_model, search
 
 with contextlib.suppress(PackageNotFoundError):
     __version__ = version("spatialml")
diff --git a/spatialml/base.py b/spatialml/base.py
@@ -12,11 +12,18 @@
 from joblib import dump, load
 from libpysal import graph
 from scipy.spatial import KDTree
-from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
+from sklearn.base import (
+    BaseEstimator,
+    ClassifierMixin,
+    RegressorMixin,
+)
 from sklearn.model_selection import train_test_split
 from sklearn.utils.parallel import Parallel, delayed
 
-__all__ = ["BaseClassifier", "BaseRegressor"]
+__all__ = [
+    "BaseClassifier",
+    "BaseRegressor",
+]
 
 
 def _triangular(distances: np.ndarray, bandwidth: np.ndarray | float) -> np.ndarray:
@@ -181,13 +188,18 @@ def _setup_model_storage(self):
     def _fit_models_batch(
         self,
         X: pd.DataFrame,
-        y: pd.Series,
+        y: pd.Series | None,
         weights: graph.Graph,
     ) -> list:
-        """Fit models in batches or all at once"""
+        """Fit models in batches or all at once.
+
+        When ``y is None`` (unsupervised path, e.g. :class:`BaseDecomposition`),
+        no target is injected into the per-neighbourhood frames and no
+        invariance check is performed.
+        """
         if self.batch_size:
             training_output = []
-            num_groups = len(y)
+            num_groups = len(X)
             indices = X.index
             for i in range(0, num_groups, self.batch_size):
                 if self.verbose:
@@ -217,29 +229,38 @@ def _fit_models_batch(
     def _batch_fit(
         self,
         X: pd.DataFrame,
-        y: pd.Series,
+        y: pd.Series | None,
         index: pd.MultiIndex,
         _weight: np.ndarray,
         X_focals: np.ndarray,
     ) -> list:
-        """Fit a batch of local models"""
+        """Fit a batch of local models.
+
+        When ``y`` is provided (supervised path), the per-neighbourhood frame
+        carries a ``_y`` column and an invariance check is run.  When ``y is
+        None`` (unsupervised path used by :class:`BaseDecomposition`), neither
+        the ``_y`` injection nor the invariance check is applied — local fits
+        only need ``X`` and ``_weight``.
+        """
         data = X.copy()
-        data["_y"] = y
+        if y is not None:
+            data["_y"] = y
         data = data.loc[index.get_level_values(1)]
         data["_weight"] = _weight
         grouper = data.groupby(index.get_level_values(0), sort=False)
 
-        invariant = grouper["_y"].nunique() == 1
-        if invariant.any():
-            if self.strict:
-                raise ValueError(
-                    f"y at locations {invariant.index[invariant]} is invariant."
-                )
-            elif self.strict is None:
-                warnings.warn(
-                    f"y at locations {invariant.index[invariant]} is invariant.",
-                    stacklevel=3,
-                )
+        if y is not None:
+            invariant = grouper["_y"].nunique() == 1
+            if invariant.any():
+                if self.strict:
+                    raise ValueError(
+                        f"y at locations {invariant.index[invariant]} is invariant."
+                    )
+                elif self.strict is None:
+                    warnings.warn(
+                        f"y at locations {invariant.index[invariant]} is invariant.",
+                        stacklevel=3,
+                    )
 
         return Parallel(n_jobs=self.n_jobs, temp_folder=self.temp_folder)(
             delayed(self._fit_local)(
@@ -252,8 +273,19 @@ def _batch_fit(
             for (name, group), focal_x in zip(grouper, X_focals, strict=False)
         )
 
-    def _fit_global_model(self, X: pd.DataFrame, y: pd.Series):
-        """Fit global baseline model"""
+    def _fit_global_model(self, X: pd.DataFrame, y: pd.Series | None):
+        """Fit global baseline model for supervised estimators.
+
+        This base-class implementation handles supervised models (classifiers /
+        regressors) that require both ``X`` and ``y``.  The early return when
+        ``y is None`` is intentional: unsupervised subclasses (e.g. ``GWPCA``)
+        override this method entirely — ``GWPCA._fit_global_model`` fits a
+        global :class:`sklearn.decomposition.PCA` using only ``X``, and is
+        called directly from :meth:`GWPCA.fit` so this base-class path is
+        never reached for decompositions.
+        """
+        if y is None:
+            return
         if self._model_type == "random_forest":
             self._model_kwargs["oob_score"] = True
         # fit global model as a baseline
@@ -530,29 +562,40 @@ def _predict_local_ensemble(
     def _validate_fit_inputs(
         self,
         X: pd.DataFrame,
-        y: pd.Series,
+        y: pd.Series | None,
         geometry: gpd.GeoSeries | None,
     ) -> None:
         """
         Validate input data and configuration parameters before model fitting.
 
         This method performs structural and spatial consistency checks to ensure that:
-        - Feature matrix `X` and target vector `y` have matching lengths.
-        - At least one spatial structure (`geometry` or `graph`) is provided.
-        - The provided geometry, if any, matches the number of observations in `X`.
+        - For supervised estimators (classifiers / regressors): ``y`` is not ``None``
+          and has the same length as ``X``.  A ``ValueError`` is raised even if the
+          user accidentally passes ``y=None`` to a supervised model.
+        - For unsupervised decomposition estimators (e.g. ``GWPCA``): ``y`` is
+          unconditionally ignored — the check is skipped entirely.
+        - At least one spatial structure (``geometry`` or ``graph``) is provided.
+        - The provided geometry, if any, matches the number of observations in ``X``.
         - Bandwidth is positive when specified.
-        - Adaptive bandwidth (`fixed=False`) is an integer.
+        - Adaptive bandwidth (``fixed=False``) is an integer.
 
         Raises
         ------
         ValueError
             If any of the validation conditions fail.
         """
-        # Length checks
-        if len(X) != len(y):
-            raise ValueError(
-                f"X and y must have the same length. Got {len(X)} and {len(y)}."
-            )
+        # For supervised estimators y is mandatory — raise if the caller forgot it.
+        # Decomposition estimators do not use y; _requires_y reflects this.
+        if self._requires_y:
+            if y is None:
+                raise ValueError(
+                    "y must be provided for supervised estimators "
+                    f"({type(self).__name__}). Got None."
+                )
+            if len(X) != len(y):
+                raise ValueError(
+                    f"X and y must have the same length. Got {len(X)} and {len(y)}."
+                )
 
         # Geometry presence
         if self.graph is None and geometry is None:
@@ -594,6 +637,18 @@ def _validate_fit_inputs(
                 "kernel must be either a valid string or a callable function."
             )
 
+    @property
+    def _requires_y(self) -> bool:
+        """Whether this estimator requires and uses ``y``.
+
+        Returns ``True`` for all supervised estimators (classifiers,
+        regressors) and ``False`` for unsupervised decompositions such as
+        :class:`BaseDecomposition`.  Callers (e.g.
+        :class:`spatialml.search.BandwidthSearch`) use this flag to
+        decide whether to pass ``strict`` and other supervised-only kwargs.
+        """
+        return True
+
     # Abstract methods that subclasses must implement
     def _fit_local(
         self,
@@ -605,7 +660,12 @@ def _fit_local(
     ) -> list[Hashable]:
         raise NotImplementedError("Subclasses must implement _fit_local")
 
-    def fit(self, X: pd.DataFrame, y: pd.Series, geometry: gpd.GeoSeries | None = None):
+    def fit(
+        self,
+        X: pd.DataFrame,
+        y: pd.Series,
+        geometry: gpd.GeoSeries | None = None,
+    ):
         raise NotImplementedError("Subclasses must implement fit")
 
     def _get_score_data(
@@ -870,7 +930,10 @@ def __init__(
         self._empty_feature_imp = None
 
     def fit(
-        self, X: pd.DataFrame, y: pd.Series, geometry: gpd.GeoSeries | None = None
+        self,
+        X: pd.DataFrame,
+        y: pd.Series,
+        geometry: gpd.GeoSeries | None = None,
     ) -> "BaseClassifier":
         """Fit geographically weighted local classification models.
 
@@ -1604,7 +1667,10 @@ def __init__(
         self._empty_score_data = (np.array([]), np.array([]))
 
     def fit(
-        self, X: pd.DataFrame, y: pd.Series, geometry: gpd.GeoSeries | None = None
+        self,
+        X: pd.DataFrame,
+        y: pd.Series,
+        geometry: gpd.GeoSeries | None = None,
     ) -> "BaseRegressor":
         """Fit geographically weighted local regression models.
 

diff --git a/spatialml/decomposition/__init__.py b/spatialml/decomposition/__init__.py
@@ -0,0 +1,4 @@
+from ._base import BaseDecomposition
+from .pca import GWPCA
+
+__all__ = ["BaseDecomposition", "GWPCA"]