forked from opendatalab/OmniDocBench
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluate.py
More file actions
135 lines (120 loc) · 4.57 KB
/
Copy pathevaluate.py
File metadata and controls
135 lines (120 loc) · 4.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Evaluation entry point: builds a temporary YAML config from CLI args and runs
the existing pdf_validation stack (same dataset, task, and metrics).
"""
from __future__ import annotations
import io
import os
import tempfile
from pathlib import Path
import click
import yaml
# Default metrics layout (same as end2end.yaml)
DEFAULT_METRICS = {
"text_block": {"metric": ["Edit_dist"]},
"display_formula": {"metric": ["Edit_dist", "CDM_plain"]},
"table": {"metric": ["TEDS", "Edit_dist"]},
"reading_order": {"metric": ["Edit_dist"]},
}
def build_config(gt_path: str, exp_path: str, truncate_repeats: bool) -> dict:
"""Build end2end_eval config dict from paths and options."""
exp_path = os.path.abspath(exp_path)
gt_path = os.path.abspath(gt_path)
return {
"end2end_eval": {
"metrics": DEFAULT_METRICS,
"dataset": {
"dataset_name": "end2end_dataset",
"ground_truth": {"data_path": gt_path},
"prediction": {"data_path": exp_path},
"match_method": "quick_match",
"truncated_repeats": truncate_repeats,
},
}
}
def run_validation(cfg_path: str) -> None:
"""Load config and run the same validation loop as pdf_validation."""
import dataset # noqa: F401
import task # noqa: F401
import metrics # noqa: F401
from registry.registry import DATASET_REGISTRY, EVAL_TASK_REGISTRY
with io.open(os.path.abspath(cfg_path), "r", encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.FullLoader)
if cfg is None or not isinstance(cfg, dict):
raise ValueError("Invalid config")
for task_name in cfg.keys():
if not cfg.get(task_name):
click.echo(f"No config for task {task_name}", err=True)
continue
dataset_name = cfg[task_name]["dataset"]["dataset_name"]
metrics_list = cfg[task_name]["metrics"]
val_dataset = DATASET_REGISTRY.get(dataset_name)(cfg[task_name])
val_task = EVAL_TASK_REGISTRY.get(task_name)
if cfg[task_name]["dataset"]["prediction"].get("data_path"):
save_name = (
os.path.basename(cfg[task_name]["dataset"]["prediction"]["data_path"])
+ "_"
+ cfg[task_name]["dataset"].get("match_method", "quick_match")
)
else:
save_name = os.path.basename(
cfg[task_name]["dataset"]["ground_truth"]["data_path"]
).split(".")[0]
click.echo(f"###### Process: {save_name}")
gt = cfg[task_name]["dataset"]["ground_truth"]
if gt.get("page_info"):
val_task(val_dataset, metrics_list, gt["page_info"], save_name)
else:
val_task(val_dataset, metrics_list, gt["data_path"], save_name)
@click.command()
@click.option(
"--exp-path",
"exp_path",
required=True,
type=click.Path(exists=True, file_okay=False, path_type=Path),
help="Result folder containing the prediction .md files.",
)
@click.option(
"--gt-path",
"gt_path",
default=None,
type=click.Path(path_type=Path),
help="Path to the ground-truth JSON file (e.g. OmniDocBench.json). Default: OmniDocBench.json next to this script.",
)
@click.option(
"--truncate-repeats/--no-truncate-repeats",
"truncate_repeats",
default=True,
help="Whether to truncate repeated content at the end before computing metrics.",
)
def main(
exp_path: Path,
gt_path: Path | None,
truncate_repeats: bool,
) -> None:
"""Run end-to-end evaluation: build a temp config from CLI args and run the existing stack.
Results are written to ./result (default of the validation stack)."""
script_dir = Path(__file__).resolve().parent
if gt_path is None:
gt_path = script_dir / "OmniDocBench.json"
if not gt_path.exists():
raise click.UsageError(
f"Ground-truth file not found: {gt_path}. Pass --gt-path or place OmniDocBench.json next to evaluate.py."
)
gt_path = gt_path.resolve()
# Run with OmniDocBench as cwd so registry imports and ./result resolve correctly
os.chdir(script_dir)
config = build_config(str(gt_path), str(exp_path.resolve()), truncate_repeats)
with tempfile.NamedTemporaryFile(
mode="w", suffix=".yaml", prefix="omnidocbench_eval_", delete=False
) as f:
yaml.dump(config, f, default_flow_style=False, allow_unicode=True)
temp_path = f.name
try:
run_validation(temp_path)
finally:
os.unlink(temp_path)
if __name__ == "__main__":
main()