Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 35 additions & 2 deletions mteb/leaderboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,42 @@ def _produce_benchmark_link(benchmark_name: str, request: gr.Request) -> str:
return md


def _set_benchmark_on_load(request: gr.Request):
@functools.cache
def _leaderboard_benchmark_names() -> frozenset[str]:
names: set[str] = set()
pending: list[Benchmark | MenuEntry] = [
*GP_BENCHMARK_ENTRIES,
*R_BENCHMARK_ENTRIES,
]

while pending:
entry = pending.pop()
if isinstance(entry, Benchmark):
names.add(entry.name)
else:
pending.extend(entry.benchmarks)

return frozenset(names)


def _resolve_benchmark_name_from_query(benchmark_name: str | None) -> str:
if benchmark_name is None:
return DEFAULT_BENCHMARK_NAME

try:
resolved_name = mteb.get_benchmark(benchmark_name).name
except KeyError:
return DEFAULT_BENCHMARK_NAME

if resolved_name not in _leaderboard_benchmark_names():
return DEFAULT_BENCHMARK_NAME

return resolved_name


def _set_benchmark_on_load(request: gr.Request) -> str:
query_params = request.query_params
return query_params.get("benchmark_name", DEFAULT_BENCHMARK_NAME)
return _resolve_benchmark_name_from_query(query_params.get("benchmark_name"))


def _download_table(table: pd.DataFrame) -> str:
Expand Down
32 changes: 32 additions & 0 deletions tests/test_leaderboard/test_benchmark_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import pytest

pytest.importorskip("gradio", reason="Gradio not installed")

from mteb.leaderboard.app import _resolve_benchmark_name_from_query
from mteb.leaderboard.benchmark_selector import DEFAULT_BENCHMARK_NAME


def test_resolve_benchmark_name_from_query_defaults_when_missing():
assert _resolve_benchmark_name_from_query(None) == DEFAULT_BENCHMARK_NAME


def test_resolve_benchmark_name_from_query_defaults_when_invalid():
assert (
_resolve_benchmark_name_from_query("not-a-real-benchmark")
== DEFAULT_BENCHMARK_NAME
)


def test_resolve_benchmark_name_from_query_defaults_when_not_on_leaderboard():
assert (
_resolve_benchmark_name_from_query("MTEB(Multilingual, v1)")
== DEFAULT_BENCHMARK_NAME
)


def test_resolve_benchmark_name_from_query_keeps_valid_name():
assert _resolve_benchmark_name_from_query("MTEB(eng, v1)") == "MTEB(eng, v1)"


def test_resolve_benchmark_name_from_query_normalizes_alias():
assert _resolve_benchmark_name_from_query("MTEB(eng, classic)") == "MTEB(eng, v1)"