Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions olive/cli/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,21 @@ def _run_workflow(self):
print(f"Model is saved at {self.args.output_path}")
return workflow_output

@staticmethod
def _parse_extra_options(kv_items):
from onnxruntime_genai import __version__ as OrtGenaiVersion
from packaging import version

if version.parse(OrtGenaiVersion) <= version.parse("0.9.0"):
raise ValueError(
"onnxruntime-genai version <= 0.9.0 is not supported for extra_options in CLI. "
"Please either upgrade to onnxruntime-genai version > 0.9.0 or use the model builder pass directly in the config file."
)

from onnxruntime_genai.models.builder import parse_extra_options

return parse_extra_options(kv_items)

@staticmethod
def _save_config_file(config: dict):
"""Save the config file."""
Expand Down
13 changes: 13 additions & 0 deletions olive/cli/capture_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,12 @@ def register_subcommand(parser: ArgumentParser):
"for the CUDA graph to be used correctly."
),
)
mb_group.add_argument(
"--extra_mb_options",
type=str,
required=False,
help="Extra key-value pairs options to pass to the model builder. e.g., 'int4_is_symmetric=true,int4_op_types_to_quantize=MatMul/Gemm'.",
)

sub_parser.add_argument(
"--use_ort_genai", action="store_true", help="Use OnnxRuntime generate() API to run the model"
Expand Down Expand Up @@ -194,6 +200,13 @@ def _get_run_config(self, tempdir: str) -> dict:
(("passes", "m", "enable_cuda_graph"), self.args.enable_cuda_graph),
]
)
if self.args.extra_mb_options:
to_replace.append(
(
("passes", "m", "extra_options"),
BaseOliveCLICommand._parse_extra_options(self.args.extra_mb_options.split(",")),
)
)
if self.args.int4_block_size is not None:
to_replace.append((("passes", "m", "int4_block_size"), self.args.int4_block_size))
if self.args.int4_accuracy_level is not None:
Expand Down
14 changes: 14 additions & 0 deletions olive/cli/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,14 @@ def register_subcommand(parser: ArgumentParser):
help="Path to QNN environment directory (required when using AOT with QNN).",
)

# Extra options for model builder
sub_parser.add_argument(
"--extra_mb_options",
type=str,
required=False,
help="Extra key-value pairs options to pass to the model builder. e.g., 'int4_is_symmetric=true,int4_op_types_to_quantize=MatMul/Gemm'.",
)

add_logging_options(sub_parser)
add_save_config_file_options(sub_parser)
sub_parser.set_defaults(func=OptimizeCommand)
Expand Down Expand Up @@ -471,6 +479,12 @@ def _get_model_builder_pass_config(self) -> dict[str, Any]:
config["int4_block_size"] = block_size_value
config["int4_accuracy_level"] = 4
config["int4_op_types_to_quantize"] = ["MatMul", "Gather"]

extra_options = {}
if self.args.extra_mb_options:
extra_options = BaseOliveCLICommand._parse_extra_options(self.args.extra_mb_options.split(","))
config["extra_options"] = extra_options

return config

def _enable_onnx_conversion_pass(self) -> bool:
Expand Down
11 changes: 10 additions & 1 deletion olive/passes/onnx/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon
"for the CUDA graph to be used correctly."
),
),
"extra_options": PassConfigParam(
type_=dict[str, Any],
required=False,
description="Extra key-value pairs options to pass to the model builder.",
),
}

@classmethod
Expand Down Expand Up @@ -233,10 +238,14 @@ def _run_for_config(
{
key: value.value if isinstance(value, IntEnumBase) else value
for key, value in config.dict().items()
if value is not None and key not in {"precision", "metadata_only", "search"}
if value is not None and key not in {"precision", "metadata_only", "search", "extra_options"}
}
)

# Override extra options with user provided in extra_options parameter
if config.extra_options:
extra_args.update(config.extra_options)

model_attributes = copy.deepcopy(model.model_attributes or {})

try:
Expand Down
6 changes: 5 additions & 1 deletion test/passes/onnx/test_model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
def test_model_builder(tmp_path, metadata_only):
input_model = make_local_tiny_llama(tmp_path / "input_model", "onnx" if metadata_only else "hf")

p = create_pass_from_dict(ModelBuilder, {"precision": "fp32", "metadata_only": metadata_only}, disable_search=True)
p = create_pass_from_dict(
ModelBuilder,
{"precision": "fp32", "metadata_only": metadata_only, "extra_options": {"int4_is_symmetric": True}},
disable_search=True,
)
output_folder = tmp_path / "output_model"

# execute the pass
Expand Down