Skip to content

Commit 1c1430a

Browse files
committed
Add report generation
1 parent 6721a9a commit 1c1430a

3 files changed

Lines changed: 30 additions & 7 deletions

File tree

sklbench/report/implementation.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@
4949
],
5050
"higher is better": [
5151
"throughput[samples/ms]",
52+
# throughput mode
53+
"total_iterations",
54+
"total_throughput_iterations_per_sec",
55+
"mean_throughput_per_instance",
5256
# classification
5357
"accuracy",
5458
"balanced accuracy",
@@ -76,6 +80,11 @@
7680
"1st-mean run ratio",
7781
"time CV",
7882
"cpu load[%]",
83+
# throughput mode
84+
"std_throughput_per_instance",
85+
"min_iterations_per_instance",
86+
"max_iterations_per_instance",
87+
"measurement_wall_time_sec",
7988
],
8089
}
8190
MEMORY_TYPES = ["RAM", "VRAM"]
@@ -274,13 +283,18 @@ def get_result_tables_as_df(
274283
for bench_case in results["bench_cases"]
275284
]
276285
)
286+
# Drop columns that contain non-scalar data (e.g. per-instance details)
287+
for col in bench_cases.columns:
288+
if bench_cases[col].apply(lambda x: isinstance(x, (list, dict))).any():
289+
bench_cases.drop(columns=[col], inplace=True)
277290

278291
if compatibility_mode:
279292
bench_cases = transform_results_to_compatible(bench_cases)
280293

281294
for column in diffby_columns.copy():
282-
if bench_cases[column].nunique() == 1:
283-
bench_cases.drop(columns=[column], inplace=True)
295+
if column not in bench_cases.columns or bench_cases[column].nunique() == 1:
296+
if column in bench_cases.columns:
297+
bench_cases.drop(columns=[column], inplace=True)
284298
diffby_columns.remove(column)
285299

286300
return split_df_by_columns(bench_cases, splitby_columns)

sklbench/runner/implementation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ def run_benchmarks(args: argparse.Namespace) -> int:
128128
with open(args.result_file, "w") as fp:
129129
json.dump(result, fp, indent=4)
130130

131-
# output as pandas dataframe (skip for throughput mode which has nested results)
132-
if len(result["bench_cases"]) != 0 and not args.throughput_mode:
131+
# output as pandas dataframe
132+
if len(result["bench_cases"]) != 0:
133133
for key, df in get_result_tables_as_df(result).items():
134134
logger.info(f'{custom_format(key, bcolor="HEADER")}\n{df}')
135135

sklbench/runner/throughput.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,6 @@ def run_single_throughput_case(
254254
results = []
255255
estimator_name = get_bench_case_value(bench_case, "algorithm:estimator")
256256
library_name = get_bench_case_value(bench_case, "algorithm:library")
257-
from .commands_helper import generate_benchmark_command
258257

259258
from ..benchmarks.sklearn_estimator import estimator_to_task
260259

@@ -264,6 +263,11 @@ def run_single_throughput_case(
264263
quality_metrics = instance_outputs[0].get("quality_metrics", {})
265264
final_estimator_params = instance_outputs[0].get("estimator_params", {})
266265

266+
# Dataset info from bench_case
267+
from ..utils.bench_case import get_data_name
268+
269+
dataset_name = get_data_name(bench_case, shortened=True)
270+
267271
for stage in stages:
268272
stage_result = aggregate_stage_results(
269273
instance_outputs, stage, measurement_duration, core_assignments
@@ -279,21 +283,26 @@ def run_single_throughput_case(
279283
method = stage_data.get("method", "unknown")
280284
break
281285

286+
# Flatten aggregate metrics to top-level for report compatibility
287+
aggregate = stage_result.pop("aggregate")
288+
instances_detail = stage_result.pop("instances")
289+
282290
result_entry = {
283291
"mode": "throughput",
284292
"stage": stage,
285293
"method": method,
286294
"task": task,
287295
"estimator": estimator_name,
296+
"dataset": dataset_name,
288297
"library": library_name,
289298
"device": get_bench_case_value(bench_case, "algorithm:device"),
290299
"num_instances": num_instances,
291300
"cores_per_instance": cores_per_instance,
292301
"measurement_duration_seconds": measurement_duration,
293302
}
303+
result_entry.update(aggregate)
294304
result_entry.update(quality_metrics)
295-
result_entry.update(final_estimator_params)
296-
result_entry.update(stage_result)
305+
result_entry["instances"] = instances_detail
297306
results.append(result_entry)
298307

299308
return return_code, results

0 commit comments

Comments
 (0)