Skip to content
Draft
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 201 additions & 3 deletions baybe/recommenders/pure/bayesian/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,57 @@
from __future__ import annotations

import gc
import warnings
from abc import ABC
from typing import TYPE_CHECKING
from collections.abc import Callable, Iterable
from typing import TYPE_CHECKING, Any

import numpy as np
import pandas as pd
from attrs import define, field
from attrs import define, field, fields
from attrs.converters import optional
from attrs.validators import ge, gt, instance_of
from typing_extensions import override

from baybe.acquisition import qLogEI, qLogNEHVI
from baybe.acquisition.base import AcquisitionFunction
from baybe.acquisition.utils import convert_acqf
from baybe.exceptions import (
IncompatibleAcquisitionFunctionError,
InfeasibilityError,
)
from baybe.objectives.base import Objective
from baybe.recommenders.pure.base import PureRecommender
from baybe.searchspace import SearchSpace
from baybe.recommenders.pure.bayesian.continuous import (
recommend_continuous_torch,
)
from baybe.recommenders.pure.bayesian.discrete import (
recommend_discrete_with_subsets,
recommend_discrete_without_subsets,
)
from baybe.recommenders.pure.bayesian.hybrid import (
recommend_hybrid_with_subsets,
recommend_hybrid_without_subsets,
)
from baybe.recommenders.pure.bayesian.botorch.optimizers.base import OptimizerProtocol
from baybe.recommenders.pure.bayesian.botorch.optimizers.basic import GradientOptimizer
from baybe.searchspace import (
SearchSpace,
SubspaceContinuous,
SubspaceDiscrete,
)
from baybe.settings import Settings
from baybe.surrogates import GaussianProcessSurrogate
from baybe.surrogates.base import (
Surrogate,
SurrogateProtocol,
)
from baybe.utils.validation import preprocess_dataframe, validate_object_names
from baybe.utils.sampling_algorithms import DiscreteSamplingMethod

if TYPE_CHECKING:
from botorch.acquisition import AcquisitionFunction as BoAcquisitionFunction
from torch import Tensor


def _autoreplicate(surrogate: SurrogateProtocol, /) -> SurrogateProtocol:
Expand All @@ -55,6 +79,39 @@ class BayesianRecommender(PureRecommender, ABC):
)
"""The acquisition function. When omitted, a default is used."""

optimizer: OptimizerProtocol = field(
alias="optimizer",
default=GradientOptimizer(),
)
"""The acquisition function optimizer."""

#TODO: Move fields to respective optimizers

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fields moved from BotorchRecommender to BayesianRecommender for now, ultimate goal is to store them in the respective Optimizers where needed; preparation to remove BotorchRecommender (as discussed should be done in this PR, once we have decided on the BayesianRecommender looks)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reasonable to just have them here for the moment imo

hybrid_sampler: DiscreteSamplingMethod | None = field(
converter=optional(DiscreteSamplingMethod), default=None
)
"""Strategy used for sampling the discrete subspace when performing hybrid search
space optimization."""

sampling_percentage: float = field(default=1.0)
"""Percentage of discrete search space that is sampled when performing hybrid search
space optimization. Ignored when ``hybrid_sampler="None"``."""

n_restarts: int = field(validator=[instance_of(int), gt(0)], default=10)
"""Number of times gradient-based optimization is restarted from different initial
points. **Does not affect purely discrete optimization**.
"""

n_raw_samples: int = field(validator=[instance_of(int), gt(0)], default=64)
"""Number of raw samples drawn for the initialization heuristic in gradient-based
optimization. **Does not affect purely discrete optimization**.
"""

max_n_subsets: int = field(default=10, validator=[instance_of(int), ge(1)])
"""Maximum number of subsets to evaluate when subset-generating constraints are
present (e.g., continuous cardinality constraints). If the total number of
subsets exceeds this limit, a random subset of that size is sampled for
optimization instead of performing an exhaustive search."""

# TODO: The objective is currently only required for validating the recommendation
# context. Once multi-target support is complete, we might want to refactor
# the validation mechanism, e.g. by
Expand Down Expand Up @@ -196,6 +253,147 @@ def recommend(
else:
raise

@override
def _recommend_discrete(

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dispatching logic moved from BotorchRecommender to BayesianRecommender to prepare for removal of BotorchRecommender as discussed

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just one question: Do we then still need the @OverRide annotations? I do not think that those internal functions specialized to the individual search space types are defined on the level of the PureRecommender, could you verify?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are functions with that name in the PureRecommender, but they raise NonImplementedErrors

self,
subspace_discrete: SubspaceDiscrete,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.Index:
"""Generate recommendations from a discrete search space.

Dispatches to the appropriate optimization routine depending on whether
subset constraints are present.

Args:
subspace_discrete: The discrete subspace from which to generate
recommendations.
candidates_exp: The experimental representation of all discrete candidate
points to be considered.
batch_size: The size of the recommendation batch.

Returns:
The dataframe indices of the recommended points in the provided
experimental representation.
"""
if subspace_discrete.n_subsets > 0:
return recommend_discrete_with_subsets(
self, subspace_discrete, candidates_exp, batch_size
)
return recommend_discrete_without_subsets(
self, subspace_discrete, candidates_exp, batch_size
)

@override
def _recommend_continuous(
self,
subspace_continuous: SubspaceContinuous,
batch_size: int,
) -> pd.DataFrame:
"""Generate recommendations from a continuous search space.

Args:
subspace_continuous: The continuous subspace from which to generate
recommendations.
batch_size: The size of the recommendation batch.

Raises:
IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition
function is used with a batch size > 1.

Returns:
A dataframe containing the recommendations as individual rows.
"""
assert self._objective is not None
if (
batch_size > 1
and not self._get_acquisition_function(self._objective).supports_batching
):
raise IncompatibleAcquisitionFunctionError(
f"The '{self.__class__.__name__}' only works with Monte Carlo "
f"acquisition functions for batch sizes > 1."
)

points, _ = recommend_continuous_torch(self, subspace_continuous, batch_size)

return pd.DataFrame(points, columns=subspace_continuous.parameter_names)

@override
def _recommend_hybrid(
self,
searchspace: SearchSpace,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.DataFrame:
"""Generate recommendations from a hybrid search space.

Dispatches to the appropriate optimization routine depending on whether
subset constraints are present.

Args:
searchspace: The search space in which the recommendations should be made.
candidates_exp: The experimental representation of the candidates
of the discrete subspace.
batch_size: The size of the calculated batch.

Returns:
The recommended points.
"""
if searchspace.n_subsets > 0:
return recommend_hybrid_with_subsets(
self, searchspace, candidates_exp, batch_size
)
return recommend_hybrid_without_subsets(
self, searchspace, candidates_exp, batch_size
)

def _optimize_over_subsets(
self,
subset_callables: Iterable[Callable[[], tuple[Any, Tensor]]],
) -> tuple[Any, Tensor]:
"""Optimize across subsets and return the result with the best acqf value.

Each callable performs optimization for one subset configuration and returns
a ``(result, acquisition_value)`` tuple. Subsets that raise
``InfeasibilityError`` are silently skipped.

Args:
subset_callables: An iterable of zero-argument callables. Each callable
runs the optimization for one subset and returns
``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the
subset is infeasible.

Raises:
InfeasibilityError: If none of the subsets has a feasible solution.

Returns:
The result and acquisition value of the best subset.
"""
from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError

results_all: list = []
acqf_values_all: list[Tensor] = []

for optimize_fn in subset_callables:
try:
result, acqf_value = optimize_fn()
results_all.append(result)
acqf_values_all.append(acqf_value)
except (BoInfeasibilityError, InfeasibilityError):
pass

if not results_all:
raise InfeasibilityError(
"No feasible solution could be found. Potentially the specified "
"constraints are too restrictive, i.e. there may be too many "
"constraints or thresholds may have been set too tightly. "
"Consider relaxing the constraints to improve the chances "
"of finding a feasible solution."
)

best_idx = np.argmax(acqf_values_all)
return results_all[best_idx], acqf_values_all[best_idx]

def acquisition_values(
self,
candidates: pd.DataFrame,
Expand Down
37 changes: 6 additions & 31 deletions baybe/recommenders/pure/bayesian/botorch/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
)
from baybe.parameters.numerical import _FixedNumericalContinuousParameter
from baybe.searchspace import SubspaceContinuous
from baybe.utils.basic import flatten
from baybe.searchspace.core import SearchSpace

if TYPE_CHECKING:
from torch import Tensor
Expand Down Expand Up @@ -147,9 +147,6 @@ def recommend_continuous_without_cardinality_constraints(
Raises:
ValueError: If the continuous search space has cardinality constraints.
"""
import torch
from botorch.optim import optimize_acqf

if subspace_continuous.n_subsets > 0:
raise ValueError(
f"'{recommend_continuous_without_cardinality_constraints.__name__}' "
Expand Down Expand Up @@ -181,32 +178,10 @@ def recommend_continuous_without_cardinality_constraints(
# because it is unclear if the corresponding presence checks for these
# arguments is correctly implemented in all invoked BoTorch subroutines.
# For details: https://github.com/pytorch/botorch/issues/2042
points, acqf_values = optimize_acqf(
acq_function=recommender._botorch_acqf,
bounds=torch.from_numpy(
subspace_continuous.comp_rep_bounds.to_numpy(copy=True)
),
q=batch_size,
num_restarts=recommender.n_restarts,
raw_samples=recommender.n_raw_samples,
fixed_features=fixed_parameters or None,
equality_constraints=flatten(
c.to_botorch(
subspace_continuous.parameters,
batch_size=batch_size if c.is_interpoint else None,
)
for c in subspace_continuous.constraints_lin_eq
)
or None,
inequality_constraints=flatten(
c.to_botorch(
subspace_continuous.parameters,
batch_size=batch_size if c.is_interpoint else None,
)
for c in subspace_continuous.constraints_lin_ineq
)
or None,
sequential=recommender.sequential_continuous,
points, acqf_values = recommender.optimizer(
batch_size=batch_size,
acquisition_function=recommender._botorch_acqf,
searchspace=SearchSpace(continuous=subspace_continuous),
fixed_parameters=fixed_parameters,
)
assert acqf_values is not None # for mypy; guaranteed by optimize_acqf defaults
return points, acqf_values
2 changes: 2 additions & 0 deletions baybe/recommenders/pure/bayesian/botorch/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
recommend_hybrid_with_subsets,
recommend_hybrid_without_subsets,
)
from baybe.recommenders.pure.bayesian.botorch.optimizers.basic import GradientOptimizer
from baybe.searchspace import (
SearchSpace,
SearchSpaceType,
Expand Down Expand Up @@ -246,6 +247,7 @@ def _recommend_hybrid(
self, searchspace, candidates_exp, batch_size
)

@override
def _optimize_over_subsets(
self,
subset_callables: Iterable[Callable[[], tuple[Any, Tensor]]],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Acquisition function optimizers."""

from baybe.recommenders.pure.bayesian.botorch.optimizers.basic import GradientOptimizer

__all__ = [
"GradientOptimizer",
]
48 changes: 48 additions & 0 deletions baybe/recommenders/pure/bayesian/botorch/optimizers/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Base protocol for all optimizers."""

from __future__ import annotations

from typing import TYPE_CHECKING, Protocol, runtime_checkable, TypeAlias, ClassVar
from collections.abc import Callable

from baybe.searchspace import SearchSpace
from baybe.searchspace.core import SearchSpaceType

Optimand: TypeAlias = Callable[[Tensor], Tensor]
"Type alias for the callable to be optimized."

if TYPE_CHECKING:
from botorch.acquisition import AcquisitionFunction as BoAcquisitionFunction
from torch import Tensor


@runtime_checkable
class OptimizerProtocol(Protocol):

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Give this a Flag about the supported SearchSpaceType and add validation for this.

"""Type protocol specifying the interface optimizers need to implement."""

# Use slots so that derived classes also remain slotted
# See also: https://www.attrs.org/en/stable/glossary.html#term-slotted-classes
__slots__ = ()

compatibility: ClassVar[SearchSpaceType]
"""Class variable reflecting the search space compatibility."""

def __call__(
Comment thread
StefanPSchmid marked this conversation as resolved.
self,
batch_size: int,
acquisition_function: Optimand,

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I appreciate the type alias but I think the name Optimand is very confusing, I would not know what is meant by that. I however also struggle to find a better alternative since the natural term (Objective) is already occupied. Do you have other ideas? Because for now, I have to say that I'd prefer the raw type hint over a name that is not clear.

@StefanPSchmid StefanPSchmid Jun 26, 2026

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After some brainstorming with @fabianliebig, how about Score? OptimizableScore?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe something like CandidateScorer?

searchspace: SearchSpace,
fixed_parameters: dict[int, float] | None = None,
) -> tuple[Tensor, Tensor]:
"""Recommend a batch of points from the given search space.

Args:
batch_size: The size of the recommendation batch.
acquisition_function: The acquisition function to be optimized.
searchspace: The search space from which to generate recommendations.
fixed_parameters: A dictionary mapping parameter indices to fixed values.

Returns:
The recommendations and corresponding acquisition values.
"""
...
Loading
Loading