Skip to content
This repository was archived by the owner on Apr 23, 2026. It is now read-only.
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 26 additions & 26 deletions data/showdown.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"meta": {
"version": "2026.04.16",
"last_update": "2026-04-21T17:05:56Z",
"last_update": "2026-04-22T16:33:14Z",
"schema_version": "1.0"
},
"models": [
Expand Down Expand Up @@ -133,14 +133,14 @@
"average_per_1m": 10.0
},
"performance": {
"output_speed_tps": 50,
"latency_ttft_ms": 0,
"output_speed_tps": 87,
"latency_ttft_ms": 14120,
"source": "https://artificialanalysis.ai/models/claude-opus-4-7"
},
"editor_notes": "Thinking mode for Claude 4.7 Opus. Takes the #1 overall spot on LMArena.",
"benchmark_scores": {
"lmarena_en_elo": 1505,
"lmarena_coding_elo": null,
"lmarena_en_elo": 1504,
"lmarena_coding_elo": 1576,

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Use Text Arena coding score for lmarena_coding_elo

lmarena_coding_elo is defined in UPDATE.md to come from Text Arena Coding (/leaderboard/text/coding), but this update appears to use Code Arena WebDev values instead (e.g., 1576 here matches Code Arena, while Text Arena Coding is lower for the same model). The same source mix-up is repeated for the other updated lmarena_coding_elo entries (including gpt-5.4-high now set to 1457, which corresponds to the (codex-harness) row), so ranking calculations that compare coding Elo across models are now mixing two different benchmarks.

Useful? React with 👍 / 👎.

"lmarena_hard_elo": null,
"lmarena_math_elo": null,
"lmarena_creative_elo": null,
Expand All @@ -149,7 +149,7 @@
"swe_bench_pro": null,
"gpqa_diamond": null,
"humanity_last_exam": null,
"livebench": null,
"livebench": 76.91,
"math_500": null,
"aime": null,
"frontiermath": 43.8,
Expand All @@ -167,14 +167,14 @@
"mmmu": null,
"mmmu_pro": null,
"lmarena_zh_elo": null,
"lmarena_vision_elo": null,
"livebench_reasoning": null,
"livebench_coding": null,
"livebench_agentic_coding": null,
"livebench_math": null,
"livebench_data_analysis": null,
"livebench_language": null,
"livebench_if": null
"lmarena_vision_elo": 1307,
"livebench_reasoning": 87.69,
"livebench_coding": 82.09,
"livebench_agentic_coding": 60.0,
"livebench_math": 93.1,
"livebench_data_analysis": 78.26,
"livebench_language": 77.91,
"livebench_if": 59.34
}
},
{
Expand All @@ -196,8 +196,8 @@
},
"editor_notes": "Anthropic's latest flagship model. Expensive and slower than average but boasts leading intelligence scores with a 1M token context window.",
"benchmark_scores": {
"lmarena_en_elo": 1498,
"lmarena_coding_elo": null,
"lmarena_en_elo": 1497,
"lmarena_coding_elo": 1569,
"lmarena_hard_elo": null,
"lmarena_math_elo": null,
"lmarena_creative_elo": null,
Expand All @@ -224,7 +224,7 @@
"mmmu": null,
"mmmu_pro": null,
"lmarena_zh_elo": null,
"lmarena_vision_elo": null,
"lmarena_vision_elo": 1300,
"livebench_reasoning": null,
"livebench_coding": null,
"livebench_agentic_coding": null,
Expand Down Expand Up @@ -262,13 +262,13 @@
"humanity_last_exam": 40,
"live_code_bench": null,
"livebench": 61.81,
"lmarena_coding_elo": 1547,
"lmarena_coding_elo": 1544,
"lmarena_creative_elo": 1468,
"lmarena_en_elo": 1497,
"lmarena_en_elo": 1496,
"lmarena_hard_elo": 1529,
"lmarena_if_elo": 1500,
"lmarena_math_elo": 1501,
"lmarena_vision_elo": 1289,
"lmarena_vision_elo": 1293,
"lmarena_zh_elo": 1557,
"math_500": null,
"mathvista": null,
Expand Down Expand Up @@ -326,13 +326,13 @@
"humanity_last_exam": 53.1,
"live_code_bench": null,
"livebench": 76.33,
"lmarena_coding_elo": 1556,
"lmarena_coding_elo": 1549,
"lmarena_creative_elo": 1493,
"lmarena_en_elo": 1503,
"lmarena_en_elo": 1502,
"lmarena_hard_elo": 1536,
"lmarena_if_elo": 1512,
"lmarena_math_elo": 1512,
"lmarena_vision_elo": 1302,
"lmarena_vision_elo": 1304,
"lmarena_zh_elo": 1540,
"math_500": null,
"mathvista": null,
Expand Down Expand Up @@ -513,7 +513,7 @@
"humanity_last_exam": null,
"live_code_bench": null,
"livebench": null,
"lmarena_coding_elo": 1521,
"lmarena_coding_elo": 1525,
"lmarena_creative_elo": 1443,
"lmarena_en_elo": 1477,
"lmarena_hard_elo": 1498,
Expand Down Expand Up @@ -1171,7 +1171,7 @@
"humanity_last_exam": 36.24,
"live_code_bench": null,
"livebench": 80.28,
"lmarena_coding_elo": 1534,
"lmarena_coding_elo": 1457,
"lmarena_creative_elo": 1461,
"lmarena_en_elo": 1482,
"lmarena_hard_elo": 1507,
Expand Down Expand Up @@ -2009,7 +2009,7 @@
"livebench": 67.96,
"lmarena_coding_elo": null,
"lmarena_creative_elo": null,
"lmarena_en_elo": 1493,
"lmarena_en_elo": 1480,
"lmarena_hard_elo": null,
"lmarena_if_elo": null,
"lmarena_math_elo": null,
Expand Down
Loading