OmniDocBench/evaluate.py at develop · jina-ai/OmniDocBench · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Evaluation entry point: builds a temporary YAML config from CLI args and runs
the existing pdf_validation stack (same dataset, task, and metrics).
"""
from __future__ import annotations

import io
import os
import tempfile
from pathlib import Path

import click
import yaml

# Default metrics layout (same as end2end.yaml)
DEFAULT_METRICS = {
    "text_block": {"metric": ["Edit_dist"]},
    "display_formula": {"metric": ["Edit_dist", "CDM_plain"]},
    "table": {"metric": ["TEDS", "Edit_dist"]},
    "reading_order": {"metric": ["Edit_dist"]},
}


def build_config(gt_path: str, exp_path: str, truncate_repeats: bool) -> dict:
    """Build end2end_eval config dict from paths and options."""
    exp_path = os.path.abspath(exp_path)
    gt_path = os.path.abspath(gt_path)
    return {
        "end2end_eval": {
            "metrics": DEFAULT_METRICS,
            "dataset": {
                "dataset_name": "end2end_dataset",
                "ground_truth": {"data_path": gt_path},
                "prediction": {"data_path": exp_path},
                "match_method": "quick_match",
                "truncated_repeats": truncate_repeats,
            },
        }
    }


def run_validation(cfg_path: str) -> None:
    """Load config and run the same validation loop as pdf_validation."""
    import dataset  # noqa: F401
    import task  # noqa: F401
    import metrics  # noqa: F401
    from registry.registry import DATASET_REGISTRY, EVAL_TASK_REGISTRY

    with io.open(os.path.abspath(cfg_path), "r", encoding="utf-8") as f:
        cfg = yaml.load(f, Loader=yaml.FullLoader)

    if cfg is None or not isinstance(cfg, dict):
        raise ValueError("Invalid config")

    for task_name in cfg.keys():
        if not cfg.get(task_name):
            click.echo(f"No config for task {task_name}", err=True)
            continue
        dataset_name = cfg[task_name]["dataset"]["dataset_name"]
        metrics_list = cfg[task_name]["metrics"]
        val_dataset = DATASET_REGISTRY.get(dataset_name)(cfg[task_name])
        val_task = EVAL_TASK_REGISTRY.get(task_name)
        if cfg[task_name]["dataset"]["prediction"].get("data_path"):
            save_name = (
                os.path.basename(cfg[task_name]["dataset"]["prediction"]["data_path"])
                + "_"
                + cfg[task_name]["dataset"].get("match_method", "quick_match")
            )
        else:
            save_name = os.path.basename(
                cfg[task_name]["dataset"]["ground_truth"]["data_path"]
            ).split(".")[0]
        click.echo(f"###### Process: {save_name}")
        gt = cfg[task_name]["dataset"]["ground_truth"]
        if gt.get("page_info"):
            val_task(val_dataset, metrics_list, gt["page_info"], save_name)
        else:
            val_task(val_dataset, metrics_list, gt["data_path"], save_name)


@click.command()
@click.option(
    "--exp-path",
    "exp_path",
    required=True,
    type=click.Path(exists=True, file_okay=False, path_type=Path),
    help="Result folder containing the prediction .md files.",
)
@click.option(
    "--gt-path",
    "gt_path",
    default=None,
    type=click.Path(path_type=Path),
    help="Path to the ground-truth JSON file (e.g. OmniDocBench.json). Default: OmniDocBench.json next to this script.",
)
@click.option(
    "--truncate-repeats/--no-truncate-repeats",
    "truncate_repeats",
    default=True,
    help="Whether to truncate repeated content at the end before computing metrics.",
)
def main(
    exp_path: Path,
    gt_path: Path | None,
    truncate_repeats: bool,
) -> None:
    """Run end-to-end evaluation: build a temp config from CLI args and run the existing stack.
    Results are written to ./result (default of the validation stack)."""
    script_dir = Path(__file__).resolve().parent
    if gt_path is None:
        gt_path = script_dir / "OmniDocBench.json"
    if not gt_path.exists():
        raise click.UsageError(
            f"Ground-truth file not found: {gt_path}. Pass --gt-path or place OmniDocBench.json next to evaluate.py."
        )
    gt_path = gt_path.resolve()
    # Run with OmniDocBench as cwd so registry imports and ./result resolve correctly
    os.chdir(script_dir)

    config = build_config(str(gt_path), str(exp_path.resolve()), truncate_repeats)
    with tempfile.NamedTemporaryFile(
        mode="w", suffix=".yaml", prefix="omnidocbench_eval_", delete=False
    ) as f:
        yaml.dump(config, f, default_flow_style=False, allow_unicode=True)
        temp_path = f.name
    try:
        run_validation(temp_path)
    finally:
        os.unlink(temp_path)


if __name__ == "__main__":
    main()