Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ If you want to run certain models implemented within mteb you will often need so

If a specific model requires a dependency it will raise an error with the recommended installation. To see full list of available models you can look at the [models overview](overview/available_models/text.md).

### Automatically installing model dependencies

Instead of installing extras manually for every model, you can let mteb install the missing ones for you by setting the `MTEB_AUTO_INSTALL_EXTRAS` environment variable. When it is set to a truthy value (`1`, `true`, `yes`, `on`), loading a model whose extras are missing will install them automatically (using `uv` if it is available, otherwise `pip`) before raising:

```bash
export MTEB_AUTO_INSTALL_EXTRAS=1
```

## Audio Tasks

If you want to run audio tasks, install the audio dependencies:
Expand Down
82 changes: 73 additions & 9 deletions mteb/models/model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
import importlib
import json
import logging
import os
import shutil
import subprocess
import sys
import warnings
from collections.abc import Callable, Mapping, Sequence
from dataclasses import field
Expand Down Expand Up @@ -71,6 +75,42 @@

logger = logging.getLogger(__name__)


def _auto_install_extras_enabled() -> bool:
"""Whether mteb should try to install missing optional dependencies automatically.

Controlled by the ``MTEB_AUTO_INSTALL_EXTRAS`` environment variable.
"""
return os.environ.get("MTEB_AUTO_INSTALL_EXTRAS", "0").lower() in {
"1",
"true",
"yes",
"on",
}


def _install_extras(model_name: str | None, groups: Sequence[str]) -> None:
"""Install the given mteb extras groups using ``uv`` if available, else ``pip``.

Uses ``uv pip install`` when ``uv`` is on the PATH (faster), otherwise falls back
to ``python -m pip install``.
"""
target = f"mteb[{','.join(groups)}]"
if shutil.which("uv") is not None:
command = ["uv", "pip", "install", target]
else:
command = [sys.executable, "-m", "pip", "install", target]

logger.info(
"Auto-installing missing dependencies for model %s: %s",
model_name,
" ".join(command),
)
subprocess.run(command, check=True)
# Ensure freshly installed distributions are visible to importlib.
importlib.invalidate_caches()


FRAMEWORKS = Literal[
"Sentence Transformers",
"PyTorch",
Expand Down Expand Up @@ -156,7 +196,9 @@ class ModelMeta(BaseModel): # noqa: PLR0904
contacts: The people to contact in case of a problem in the model, preferably a GitHub handle.
experiment_kwargs: A dictionary of parameters used in the experiment that are not covered by other fields. This is used to create experiment names for ablation studies and similar experiments.
output_dtypes: Output embedding data types (e.g. int8, binary, float) natively supported by the model. If None, it is assumed that the model only returns float embeddings.
extra_requirements_groups: Name of group of extra requirements.
extra_requirements_groups: Name of group of extra requirements (mteb extras) needed to run the model, e.g. `["flagembedding"]`.
When a required group is missing, loading the model raises with install instructions. Set the environment variable
`MTEB_AUTO_INSTALL_EXTRAS=1` to let mteb install the missing extras automatically (using `uv` if available, otherwise `pip`).
"""

model_config = ConfigDict(extra="forbid")
Expand Down Expand Up @@ -414,6 +456,28 @@ def load_model(
return model

def _check_requirements(self) -> None:
groups = self._resolve_extras_groups()
if not groups:
return

self._validate_extras_groups(groups)

missing_dependencies = self._missing_dependencies(groups)
if missing_dependencies and _auto_install_extras_enabled():
_install_extras(self.name, groups)
missing_dependencies = self._missing_dependencies(groups)

if missing_dependencies:
raise ImportError(
f"Model {self.name} is missing required dependencies: "
+ ", ".join(missing_dependencies)
+ f".\nYou can install it with `pip install mteb[{','.join(groups)}]`."
+ "\nAlternatively, set the environment variable "
"`MTEB_AUTO_INSTALL_EXTRAS=1` to let mteb install them automatically."
)

def _resolve_extras_groups(self) -> list[str]:
"""Collect the extras groups this model requires, including modality defaults."""
groups: list[str] = list(self.extra_requirements_groups or [])

# handle modality specific dependencies inside baseline functions
Expand All @@ -423,9 +487,11 @@ def _check_requirements(self) -> None:
if "audio" in self.modalities and "audio" not in groups:
groups.append("audio")

if not groups:
return
return groups

@staticmethod
def _validate_extras_groups(groups: Sequence[str]) -> None:
"""Raise if any group is not a valid mteb extra."""
available_extras = set(
distribution("mteb").metadata.get_all("Provides-Extra") or []
)
Expand All @@ -441,6 +507,9 @@ def _norm(s: str) -> str:
f"Available: {sorted(available_extras)}"
)

@staticmethod
def _missing_dependencies(groups: Sequence[str]) -> list[str]:
"""Return the requirement strings of the given groups that are not satisfied."""
missing_dependencies = []

mteb_requires = requires("mteb")
Expand Down Expand Up @@ -469,12 +538,7 @@ def _norm(s: str) -> str:
except InvalidVersion:
missing_dependencies.append(f"{req_str} (installed: {installed})")

if missing_dependencies:
raise ImportError(
f"Model {self.name} is missing required dependencies: "
+ ", ".join(missing_dependencies)
+ f".\nYou can install it with `pip install mteb[{','.join(groups)}]`."
)
return missing_dependencies

def model_name_as_path(self) -> str:
"""Returns the model name in a format that can be used as a file path.
Expand Down
45 changes: 45 additions & 0 deletions tests/test_models/test_model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,3 +483,48 @@ def test_model_meta_dependencies_not_installed_group():
),
):
model_meta._check_requirements()


def test_model_meta_auto_install_extras(monkeypatch):
"""When MTEB_AUTO_INSTALL_EXTRAS is set, missing deps trigger an install attempt."""
model_meta = mteb.get_model_meta("google/vggish").model_copy(
update={
"extra_requirements_groups": ["torch-vggish-yamnet"],
}
)
monkeypatch.setenv("MTEB_AUTO_INSTALL_EXTRAS", "1")

install_calls: list[tuple[str | None, list[str]]] = []

def fake_install(name, groups):
install_calls.append((name, list(groups)))

monkeypatch.setattr("mteb.models.model_meta._install_extras", fake_install)

# Install is a no-op in the test, so the deps remain missing and we still raise;
# the important assertion is that auto-install was attempted with the right groups.
with pytest.raises(ImportError):
model_meta._check_requirements()

assert len(install_calls) == 1
name, groups = install_calls[0]
assert name == "google/vggish"
assert "torch-vggish-yamnet" in groups


def test_model_meta_no_auto_install_by_default(monkeypatch):
"""Without the env var, no install is attempted and the error is raised directly."""
model_meta = mteb.get_model_meta("google/vggish").model_copy(
update={
"extra_requirements_groups": ["torch-vggish-yamnet"],
}
)
monkeypatch.delenv("MTEB_AUTO_INSTALL_EXTRAS", raising=False)

def fail_install(name, groups):
raise AssertionError("install should not be attempted")

monkeypatch.setattr("mteb.models.model_meta._install_extras", fail_install)

with pytest.raises(ImportError):
model_meta._check_requirements()