Skip to content

Commit bc32f89

Browse files
Merge branch 'main' into chao/fixqnngpu
2 parents 442a700 + 3e627e5 commit bc32f89

15 files changed

Lines changed: 425 additions & 91 deletions

File tree

src/winml/modelkit/commands/build.py

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -436,22 +436,23 @@ def _validate_loader_tasks_for_model(
436436
help="Output directory for all build artifacts",
437437
)
438438
@click.option(
439-
"--use-cache",
440-
is_flag=True,
439+
"--use-cache/--no-use-cache",
441440
default=False,
441+
show_default=True,
442442
help="Use WinML CLI global cache (~/.cache/winml/). Mutually exclusive with -o.",
443443
)
444444
@click.option(
445-
"--rebuild",
446-
is_flag=True,
445+
"--rebuild/--no-rebuild",
447446
default=False,
447+
show_default=True,
448448
help="Overwrite existing artifacts and rebuild",
449449
)
450450
@click.option(
451-
"--no-quant",
452-
is_flag=True,
453-
default=False,
454-
help="Skip quantization (overrides config)",
451+
"--quant/--no-quant",
452+
"quant",
453+
default=True,
454+
show_default=True,
455+
help="Enable quantization (use --no-quant to skip, overrides config)",
455456
)
456457
@click.option(
457458
"--no-compile/--compile",
@@ -472,16 +473,18 @@ def _validate_loader_tasks_for_model(
472473
optional_message="Default: auto-detect.",
473474
)
474475
@click.option(
475-
"--no-analyze",
476-
is_flag=True,
477-
default=False,
478-
help="Skip analyzer loop during build",
476+
"--analyze/--no-analyze",
477+
"analyze",
478+
default=True,
479+
show_default=True,
480+
help="Run analyzer loop during build (use --no-analyze to skip)",
479481
)
480482
@click.option(
481-
"--no-optimize",
482-
is_flag=True,
483-
default=False,
484-
help="Skip optimization (for pre-quantized ONNX models)",
483+
"--optimize/--no-optimize",
484+
"optimize",
485+
default=True,
486+
show_default=True,
487+
help="Run optimization (use --no-optimize to skip for pre-quantized ONNX models)",
485488
)
486489
@click.option(
487490
"--max-optim-iterations",
@@ -503,12 +506,12 @@ def build(
503506
output_dir: str | None,
504507
use_cache: bool,
505508
rebuild: bool,
506-
no_quant: bool,
509+
quant: bool,
507510
no_compile: bool | None,
508-
no_optimize: bool,
511+
optimize: bool,
509512
ep: EPNameOrAlias | None,
510513
device: str,
511-
no_analyze: bool,
514+
analyze: bool,
512515
max_optim_iterations: int | None,
513516
allow_unsupported_nodes: bool,
514517
trust_remote_code: bool,
@@ -579,7 +582,7 @@ def build(
579582
if config_file is not None:
580583
config_or_configs = _load_config(
581584
config_file,
582-
no_quant=no_quant,
585+
no_quant=not quant,
583586
no_compile=no_compile,
584587
)
585588
else:
@@ -592,7 +595,7 @@ def build(
592595
trust_remote_code=trust_remote_code,
593596
device=device,
594597
)
595-
if no_quant:
598+
if not quant:
596599
config_or_configs.quant = None
597600
# Auto-generated configs: compile disabled by default unless
598601
# --compile was explicitly passed (no_compile=False).
@@ -610,7 +613,7 @@ def _patch_device(cfg: WinMLBuildConfig) -> None:
610613
from ..config import resolve_quant_compile_config
611614

612615
resolved_quant, _ = resolve_quant_compile_config(device=device, ep=ep)
613-
if no_quant or resolved_quant is None:
616+
if not quant or resolved_quant is None:
614617
cfg.quant = None
615618
elif cfg.quant is None:
616619
# Populate calibration identifiers from the loader/model
@@ -643,9 +646,7 @@ def _patch_device(cfg: WinMLBuildConfig) -> None:
643646
# scratch state when the user passes the wrong file or a
644647
# hand-edited config (#P1 UX).
645648
_configs_to_validate: list[WinMLBuildConfig] = (
646-
config_or_configs
647-
if isinstance(config_or_configs, list)
648-
else [config_or_configs]
649+
config_or_configs if isinstance(config_or_configs, list) else [config_or_configs]
649650
)
650651
try:
651652
for _cfg in _configs_to_validate:
@@ -661,9 +662,9 @@ def _patch_device(cfg: WinMLBuildConfig) -> None:
661662

662663
# Build extra kwargs for pipeline control
663664
extra_kwargs: dict[str, Any] = {}
664-
if no_optimize:
665+
if not optimize:
665666
extra_kwargs["skip_optimize"] = True
666-
if no_analyze:
667+
if not analyze:
667668
extra_kwargs["hack_max_optim_iterations"] = 0
668669
elif max_optim_iterations is not None:
669670
extra_kwargs["hack_max_optim_iterations"] = max_optim_iterations

src/winml/modelkit/commands/compile.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@
8585
help="Path to QAIRT SDK root",
8686
)
8787
@click.option(
88-
"--embed",
89-
is_flag=True,
88+
"--embed/--no-embed",
9089
default=False,
90+
show_default=True,
9191
help="Embed EP context in ONNX file (default: external .bin file)",
9292
)
9393
@click.option(

src/winml/modelkit/commands/config.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,11 @@ def _apply_stage_overrides(cfg: Any, *, no_quant: bool, no_compile: bool) -> Non
126126
help="Source library for TasksManager (default: transformers)",
127127
)
128128
@click.option(
129-
"--no-quant",
130-
is_flag=True,
131-
default=False,
132-
help="Exclude quantization from generated config (sets quant=None)",
129+
"--quant/--no-quant",
130+
"quant",
131+
default=True,
132+
show_default=True,
133+
help="Include quantization in generated config (use --no-quant to exclude, sets quant=None)",
133134
)
134135
@click.option(
135136
"--no-compile/--compile",
@@ -156,7 +157,7 @@ def config(
156157
library_name: str,
157158
verbose: int,
158159
quiet: bool,
159-
no_quant: bool,
160+
quant: bool,
160161
no_compile: bool,
161162
trust_remote_code: bool,
162163
) -> None:
@@ -289,7 +290,7 @@ def config(
289290
)
290291

291292
# Apply --no-quant / --no-compile overrides
292-
_apply_stage_overrides(config_obj, no_quant=no_quant, no_compile=no_compile)
293+
_apply_stage_overrides(config_obj, no_quant=not quant, no_compile=no_compile)
293294

294295
output_data = config_obj.to_dict()
295296
_is_onnx_mode = True
@@ -319,7 +320,7 @@ def config(
319320
precision=precision,
320321
trust_remote_code=trust_remote_code,
321322
ep=ep,
322-
no_quant=no_quant,
323+
no_quant=not quant,
323324
no_compile=no_compile,
324325
output=output,
325326
console=console,
@@ -347,15 +348,15 @@ def config(
347348
if module:
348349
configs = generate_hf_build_config(module=module, **_shared_kwargs)
349350
for cfg in configs:
350-
_apply_stage_overrides(cfg, no_quant=no_quant, no_compile=no_compile)
351+
_apply_stage_overrides(cfg, no_quant=not quant, no_compile=no_compile)
351352
output_data = [cfg.to_dict() for cfg in configs]
352353
_n_modules = len(configs)
353354
# Use first config for display metadata
354355
config_obj = configs[0] if configs else None
355356
else:
356357
config_obj = generate_hf_build_config(**_shared_kwargs)
357358
configs = []
358-
_apply_stage_overrides(config_obj, no_quant=no_quant, no_compile=no_compile)
359+
_apply_stage_overrides(config_obj, no_quant=not quant, no_compile=no_compile)
359360
output_data = config_obj.to_dict()
360361
_n_modules = 0
361362

src/winml/modelkit/commands/eval.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@
112112
help="Shuffle dataset before sampling.",
113113
)
114114
@click.option(
115-
"--streaming",
116-
is_flag=True,
115+
"--streaming/--no-streaming",
117116
default=False,
117+
show_default=True,
118118
help="Stream dataset instead of downloading fully.",
119119
)
120120
@click.option(

src/winml/modelkit/commands/export.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,24 +71,31 @@ def _delete_onnx_with_external_data(onnx_path: Path) -> None:
7171
)
7272
@cli_utils.output_option("Output ONNX file path (e.g., model.onnx)", required=True)
7373
@click.option(
74-
"--with-report",
75-
is_flag=True,
74+
"--with-report/--no-with-report",
7675
default=False,
76+
show_default=True,
7777
help="Generate full export reports (markdown, JSON, console tree)",
7878
)
79+
@click.option(
80+
"--hierarchy/--no-hierarchy",
81+
"hierarchy",
82+
default=True,
83+
show_default=True,
84+
help="Embed hierarchy_tag metadata in ONNX output",
85+
)
7986
@click.option(
8087
"--clean-onnx",
81-
"--no-hierarchy",
82-
"no_hierarchy",
88+
"clean_onnx",
8389
is_flag=True,
8490
default=False,
85-
help="Skip embedding hierarchy_tag metadata in ONNX (clean ONNX output)",
91+
hidden=True,
92+
help="Deprecated alias for --no-hierarchy.",
8693
)
8794
@click.option(
88-
"--dynamo",
95+
"--dynamo/--no-dynamo",
8996
"dynamo",
90-
is_flag=True,
9197
default=False,
98+
show_default=True,
9299
help="Enable PyTorch 2.9+ dynamo export for rich node metadata",
93100
)
94101
@click.option(
@@ -132,7 +139,8 @@ def export(
132139
verbose: int,
133140
quiet: bool,
134141
with_report: bool,
135-
no_hierarchy: bool,
142+
hierarchy: bool,
143+
clean_onnx: bool,
136144
dynamo: bool,
137145
torch_module: str | None,
138146
task: str | None,
@@ -195,15 +203,27 @@ def export(
195203
_build_export_dict = ec
196204
if not cli_utils.is_cli_provided(ctx, "task") and "task" in lc:
197205
task = lc["task"]
198-
if not cli_utils.is_cli_provided(ctx, "no_hierarchy") and "enable_hierarchy_tags" in ec:
199-
no_hierarchy = not ec["enable_hierarchy_tags"]
206+
if (
207+
not cli_utils.is_cli_provided(ctx, "hierarchy")
208+
and not cli_utils.is_cli_provided(ctx, "clean_onnx")
209+
and "enable_hierarchy_tags" in ec
210+
):
211+
hierarchy = ec["enable_hierarchy_tags"]
200212
if not cli_utils.is_cli_provided(ctx, "dynamo") and "dynamo" in ec:
201213
dynamo = ec["dynamo"]
202214

203215
from ..export import InputTensorSpec, OutputTensorSpec, WinMLExportConfig
204216
from ..export import export_pytorch as export_onnx
205217
from ..loader import load_hf_model
206218

219+
if clean_onnx:
220+
click.echo(
221+
"warning: --clean-onnx is deprecated; use --no-hierarchy instead.",
222+
err=True,
223+
)
224+
if not cli_utils.is_cli_provided(ctx, "hierarchy"):
225+
hierarchy = False
226+
207227
# Configure logging — stderr only, shared format with the rest of the CLI.
208228
configure_logging(verbosity=verbose, quiet=quiet)
209229

@@ -332,8 +352,8 @@ def export(
332352
# Layer 2: --export-config file overrides
333353
config_kwargs.update(export_config_dict)
334354
# Layer 3: explicit CLI options (highest precedence)
335-
if cli_utils.is_cli_provided(ctx, "no_hierarchy"):
336-
config_kwargs["enable_hierarchy_tags"] = not no_hierarchy
355+
if cli_utils.is_cli_provided(ctx, "hierarchy") or cli_utils.is_cli_provided(ctx, "clean_onnx"):
356+
config_kwargs["enable_hierarchy_tags"] = hierarchy
337357
if cli_utils.is_cli_provided(ctx, "verbose"):
338358
config_kwargs["verbose"] = bool(verbose)
339359
if cli_utils.is_cli_provided(ctx, "dynamo"):

src/winml/modelkit/commands/inspect.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,10 @@ def _looks_like_local_path(model_id: str) -> bool:
105105
help="Override auto-detected task (e.g., image-classification, feature-extraction)",
106106
)
107107
@click.option(
108-
"-H",
109-
"--hierarchy",
110-
is_flag=True,
108+
"-H/-N",
109+
"--hierarchy/--no-hierarchy",
111110
default=False,
111+
show_default=True,
112112
help="Show HF module hierarchy (uses random weights, no weight download)",
113113
)
114114
@click.option(

src/winml/modelkit/commands/perf.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,21 +1067,22 @@ def _run_simple_loop(
10671067
help='JSON file with shape overrides (e.g., {"height": 480, "width": 480}).',
10681068
)
10691069
@click.option(
1070-
"--no-quantize",
1071-
is_flag=True,
1072-
default=False,
1073-
help="Skip quantization during model build",
1070+
"--quantize/--no-quantize",
1071+
"quantize",
1072+
default=True,
1073+
show_default=True,
1074+
help="Include quantization during model build (use --no-quantize to skip)",
10741075
)
10751076
@click.option(
1076-
"--rebuild",
1077-
is_flag=True,
1077+
"--rebuild/--no-rebuild",
10781078
default=False,
1079+
show_default=True,
10791080
help="Force rebuild even if cached artifacts exist",
10801081
)
10811082
@click.option(
1082-
"--ignore-cache",
1083-
is_flag=True,
1083+
"--ignore-cache/--no-ignore-cache",
10841084
default=False,
1085+
show_default=True,
10851086
help="Build from scratch in a temp folder (discard after benchmarking)",
10861087
)
10871088
@cli_utils.skip_build_option()
@@ -1097,9 +1098,9 @@ def _run_simple_loop(
10971098
"not '--module encoder.layer.0.attention' (a path, will not match).",
10981099
)
10991100
@click.option(
1100-
"--monitor",
1101-
is_flag=True,
1101+
"--monitor/--no-monitor",
11021102
default=False,
1103+
show_default=True,
11031104
help="Show live hardware utilization chart for the benchmarked device (NPU, GPU, or CPU)",
11041105
)
11051106
@click.option(
@@ -1125,7 +1126,7 @@ def perf(
11251126
output: Path | None,
11261127
batch_size: int,
11271128
shape_config_path: Path | None,
1128-
no_quantize: bool,
1129+
quantize: bool,
11291130
rebuild: bool,
11301131
ignore_cache: bool,
11311132
skip_build: bool,
@@ -1216,7 +1217,7 @@ def perf(
12161217
iterations=iterations,
12171218
warmup=warmup,
12181219
batch_size=batch_size,
1219-
no_quantize=no_quantize,
1220+
no_quantize=not quantize,
12201221
output=output,
12211222
verbose=bool(verbose),
12221223
console=console,
@@ -1262,7 +1263,7 @@ def perf(
12621263
warmup=warmup,
12631264
batch_size=batch_size,
12641265
output_path=output,
1265-
no_quantize=no_quantize,
1266+
no_quantize=not quantize,
12661267
rebuild=rebuild,
12671268
ignore_cache=ignore_cache,
12681269
skip_build=skip_build,

0 commit comments

Comments
 (0)