Skip to content

Commit d51934d

Browse files
feat(arena): show all BEIR metrics - NDCG@1/5/10, MAP@1/5/10, MRR@10
1 parent 3314e8f commit d51934d

2 files changed

Lines changed: 55 additions & 20 deletions

File tree

demo-web/src/app/arena/page.tsx

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ export default function ArenaPage() {
220220

221221
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
222222
{/* Result Card A */}
223-
<div className="bg-white p-6 rounded-2xl border border-slate-200 shadow-sm flex flex-col gap-6 relative overflow-hidden">
224-
{results.pipeline_a.mrr_10 > results.pipeline_b.mrr_10 && <div className="absolute top-0 right-0 w-16 h-16 bg-emerald-500 transform rotate-45 translate-x-8 -translate-y-8" />}
223+
<div className="bg-white p-6 rounded-2xl border border-slate-200 shadow-sm flex flex-col gap-4 relative overflow-hidden">
224+
{results.pipeline_a.ndcg_10 > results.pipeline_b.ndcg_10 && <div className="absolute top-0 right-0 w-16 h-16 bg-emerald-500 transform rotate-45 translate-x-8 -translate-y-8" />}
225225

226226
<div className="flex items-center gap-3">
227227
<div className="w-10 h-10 rounded-xl bg-slate-100 text-slate-600 flex items-center justify-center font-bold text-lg">A</div>
@@ -230,16 +230,31 @@ export default function ArenaPage() {
230230
<div className="text-xs text-slate-500">BM25 {pipeA.method !== "none" ? `+ ${pipeA.method} (${pipeA.model})` : "(No Reranker)"}</div>
231231
</div>
232232
</div>
233-
<div className="flex flex-col gap-4 mt-2">
234-
<MetricBar label="NDCG@10 (Ranking Quality)" value={results.pipeline_a.ndcg_10} max={100} format="percent" />
233+
234+
<div className="text-[10px] font-bold text-slate-400 uppercase tracking-wider mt-1">Ranking Quality</div>
235+
<div className="flex flex-col gap-3">
236+
<MetricBar label="NDCG@1" value={results.pipeline_a.ndcg_1} max={100} format="percent" />
237+
<MetricBar label="NDCG@5" value={results.pipeline_a.ndcg_5} max={100} format="percent" />
238+
<MetricBar label="NDCG@10" value={results.pipeline_a.ndcg_10} max={100} format="percent" />
239+
</div>
240+
241+
<div className="text-[10px] font-bold text-slate-400 uppercase tracking-wider mt-1">Mean Average Precision</div>
242+
<div className="flex flex-col gap-3">
243+
<MetricBar label="MAP@1" value={results.pipeline_a.map_1} max={100} format="percent" />
244+
<MetricBar label="MAP@5" value={results.pipeline_a.map_5} max={100} format="percent" />
245+
<MetricBar label="MAP@10" value={results.pipeline_a.map_10} max={100} format="percent" />
246+
</div>
247+
248+
<div className="text-[10px] font-bold text-slate-400 uppercase tracking-wider mt-1">Relevance & Efficiency</div>
249+
<div className="flex flex-col gap-3">
235250
<MetricBar label="MRR@10 (Top Relevance)" value={results.pipeline_a.mrr_10} max={100} format="percent" />
236-
<MetricBar label="Avg End-to-End Latency" value={results.pipeline_a.latency_ms} max={3000} format="ms" />
251+
<MetricBar label="Avg Reranking Latency" value={results.pipeline_a.latency_ms} max={3000} format="ms" />
237252
</div>
238253
</div>
239254

240255
{/* Result Card B */}
241-
<div className="bg-white p-6 rounded-2xl border border-indigo-200 shadow-md flex flex-col gap-6 relative overflow-hidden ring-1 ring-indigo-500 ring-opacity-20">
242-
{results.pipeline_b.mrr_10 > results.pipeline_a.mrr_10 && <div className="absolute top-0 right-0 w-16 h-16 bg-emerald-500 transform rotate-45 translate-x-8 -translate-y-8 flex items-end justify-center pb-1"><Trophy className="w-4 h-4 text-white -rotate-45" /></div>}
256+
<div className="bg-white p-6 rounded-2xl border border-indigo-200 shadow-md flex flex-col gap-4 relative overflow-hidden ring-1 ring-indigo-500 ring-opacity-20">
257+
{results.pipeline_b.ndcg_10 > results.pipeline_a.ndcg_10 && <div className="absolute top-0 right-0 w-16 h-16 bg-emerald-500 transform rotate-45 translate-x-8 -translate-y-8 flex items-end justify-center pb-1"><Trophy className="w-4 h-4 text-white -rotate-45" /></div>}
243258

244259
<div className="flex items-center gap-3">
245260
<div className="w-10 h-10 rounded-xl bg-indigo-100 text-indigo-700 flex items-center justify-center font-bold text-lg">B</div>
@@ -248,14 +263,30 @@ export default function ArenaPage() {
248263
<div className="text-xs text-slate-500">BM25 {pipeB.method !== "none" ? `+ ${pipeB.method} (${pipeB.model})` : "(No Reranker)"}</div>
249264
</div>
250265
</div>
251-
<div className="flex flex-col gap-4 mt-2">
252-
<MetricBar label="NDCG@10 (Ranking Quality)" value={results.pipeline_b.ndcg_10} max={100} format="percent" />
266+
267+
<div className="text-[10px] font-bold text-slate-400 uppercase tracking-wider mt-1">Ranking Quality</div>
268+
<div className="flex flex-col gap-3">
269+
<MetricBar label="NDCG@1" value={results.pipeline_b.ndcg_1} max={100} format="percent" />
270+
<MetricBar label="NDCG@5" value={results.pipeline_b.ndcg_5} max={100} format="percent" />
271+
<MetricBar label="NDCG@10" value={results.pipeline_b.ndcg_10} max={100} format="percent" />
272+
</div>
273+
274+
<div className="text-[10px] font-bold text-slate-400 uppercase tracking-wider mt-1">Mean Average Precision</div>
275+
<div className="flex flex-col gap-3">
276+
<MetricBar label="MAP@1" value={results.pipeline_b.map_1} max={100} format="percent" />
277+
<MetricBar label="MAP@5" value={results.pipeline_b.map_5} max={100} format="percent" />
278+
<MetricBar label="MAP@10" value={results.pipeline_b.map_10} max={100} format="percent" />
279+
</div>
280+
281+
<div className="text-[10px] font-bold text-slate-400 uppercase tracking-wider mt-1">Relevance & Efficiency</div>
282+
<div className="flex flex-col gap-3">
253283
<MetricBar label="MRR@10 (Top Relevance)" value={results.pipeline_b.mrr_10} max={100} format="percent" />
254-
<MetricBar label="Avg End-to-End Latency" value={results.pipeline_b.latency_ms} max={3000} format="ms" />
284+
<MetricBar label="Avg Reranking Latency" value={results.pipeline_b.latency_ms} max={3000} format="ms" />
255285
</div>
256286
</div>
257287
</div>
258288
</div>
289+
259290
)}
260291

261292
</div>

demo_server.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -564,20 +564,24 @@ def evaluate_pipeline(pipeline_cfg: ArenaPipeline, docs):
564564
# (same as the Gradio demo but passing local file path to bypass Java download)
565565
metrics_obj = Metrics(docs_copy)
566566
trec = metrics_obj.calculate_trec_metrics(
567-
ndcg_cuts=[10],
568-
map_cuts=[10],
567+
ndcg_cuts=[1, 5, 10],
568+
map_cuts=[1, 5, 10],
569569
mrr_cuts=[10],
570570
qrel=qrel_path, # local file path — framework checks os.path.exists()
571571
use_reordered=use_rr,
572572
)
573-
ndcg_10 = trec.get("ndcg@10", 0.0) * 100
574-
mrr_10 = trec.get("mrr@10", 0.0) * 100
575-
576-
logger.info(
577-
f"Pipeline [{pipeline_cfg.rerankerCategory}/{pipeline_cfg.rerankerModel}]: "
578-
f"NDCG@10={ndcg_10:.2f}% MRR@10={mrr_10:.2f}%"
579-
)
580-
return {"mrr_10": mrr_10, "ndcg_10": ndcg_10, "latency_ms": rr_latency}
573+
def pct(key): return round(trec.get(key, 0.0) * 100, 2)
574+
logger.info(f"Pipeline [{pipeline_cfg.rerankerCategory}/{pipeline_cfg.rerankerModel}]: NDCG@10={pct('ndcg@10')}% MRR@10={pct('mrr@10')}%")
575+
return {
576+
"ndcg_1": pct("ndcg@1"),
577+
"ndcg_5": pct("ndcg@5"),
578+
"ndcg_10": pct("ndcg@10"),
579+
"map_1": pct("map@1"),
580+
"map_5": pct("map@5"),
581+
"map_10": pct("map@10"),
582+
"mrr_10": pct("mrr@10"),
583+
"latency_ms": rr_latency,
584+
}
581585

582586
res_a = evaluate_pipeline(req.pipeline_a, eval_docs)
583587
res_b = evaluate_pipeline(req.pipeline_b, eval_docs)

0 commit comments

Comments
 (0)