Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,17 @@ examples/csharp/ModelChat/models
!test/models/multimodal-decoder-with-input-ids/*
!test/models/phi3-v/*
!test/models/pipeline-model/*.json
!test/models/pipeline-model-tiny/
!test/models/pipeline-model-tiny/*
!test/models/speculative-tiny/
!test/models/speculative-tiny/**
!test/models/pipeline-v2-decoder/
!test/models/pipeline-v2-decoder/*
!test/models/pipeline-v2-vlm-override/
!test/models/pipeline-v2-vlm-override/*
!test/models/qwen2-5-vl/*
!test/models/qwen2-5-vl-pipeline/
!test/models/qwen2-5-vl-pipeline/*
!test/models/qwen3-5/*
!test/models/qwen3-vl/*
!test/models/whisper/*
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ include(cmake/cxx_standard.cmake)
add_compile_definitions(BUILDING_ORT_GENAI_C)

add_compile_definitions(USE_GUIDANCE=$<BOOL:${USE_GUIDANCE}>)
add_compile_definitions(USE_GENAI_PLUGINS=$<BOOL:${USE_GENAI_PLUGINS}>)

# Suggested by https://gitlab.kitware.com/cmake/cmake/-/issues/20132
# MacCatalyst is not well supported in CMake
Expand Down
4 changes: 4 additions & 0 deletions cmake/options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ option(USE_DML "Build with DML support" OFF)
option(USE_WINML "Build with WinML support" OFF)
option(USE_GUIDANCE "Build with guidance support" OFF)

# Dynamic pipeline plugin loading (issue #2114 §4.5). OFF by default: when off the plugin loader is
# still compiled but throws "plugin support is not enabled", so the build stays green with no new deps.
option(USE_GENAI_PLUGINS "Enable dynamic pipeline plugin loading" OFF)

# bindings
option(ENABLE_JAVA "Build the Java API." OFF)
cmake_dependent_option(PUBLISH_JAVA_MAVEN_LOCAL "Publish Java artifacts to local Maven repo" OFF "ENABLE_JAVA" ON)
Expand Down
436 changes: 436 additions & 0 deletions docs/pipeline-config-v2.1-design.md

Large diffs are not rendered by default.

307 changes: 307 additions & 0 deletions docs/pipeline-config-v2.1-rfc.md

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions examples/pipeline-config/01-preset-decoder/genai_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"version": 2,
"pipeline": {
"extends": "autoregressive-decoder",
"sessions": {
"decoder": {"file": "model.onnx"}
}
},
"tokens": {"eos": [151645], "pad": 0},
"generation": {"max_length": 4096, "sampling": {"temperature": 0.7}},
"metadata": {"model_type": "qwen2"}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"version": 2,
"pipeline": {
"sessions": {
"encoder": {"file": "encoder.onnx"},
"decoder": {"file": "decoder.onnx"}
},
"flow": [
{"run": "encoder", "when": "init"},
{"run": "decoder", "when": "step", "cross_attention_from": "encoder"}
],
"dataflow": [
{"from": "encoder.encoder_hidden_states", "to": "decoder.encoder_hidden_states"}
],
"state": {
"kv_cache": {"format": "separate"},
"cross_cache": {"source": "encoder", "frozen": true},
"position_ids": {"strategy": "default"}
}
},
"tokens": {"eos": [2], "pad": 1, "bos": 0, "decoder_start": 2},
"generation": {"max_length": 448, "num_beams": 1},
"metadata": {"model_type": "whisper"}
}
26 changes: 26 additions & 0 deletions examples/pipeline-config/03-vlm-per-image/genai_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"version": 2,
"pipeline": {
"extends": "vision-language",
"sessions": {
"vision": {"file": "vision.onnx"},
"embedding": {"file": "embedding.onnx"},
"decoder": {"file": "decoder.onnx"}
},
"flow": [
{"run": "vision", "when": "init", "loop": "per_image"},
{"run": "embedding", "when": "init"},
{"run": "decoder", "when": "step"}
],
"dataflow": [
{"from": "vision.image_features", "to": "embedding.image_features"},
{"from": "embedding.inputs_embeds", "to": "decoder.inputs_embeds"}
],
"state": {
"position_ids": {"strategy": "mrope_3d", "grid_source": "vision.image_grid_thw"}
}
},
"tokens": {"eos": [151645], "pad": 0, "image_token": 151655},
"generation": {"max_length": 4096, "sampling": {"temperature": 0.7}},
"metadata": {"model_type": "qwen2_5_vl"}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"version": 2,
"pipeline": {
"plugin": {
"library": "libmy_custom_pipeline.so",
"entry_point": "OgaCreateMyPipeline"
},
"sessions": {
"decoder": {"file": "model.onnx"}
}
},
"tokens": {"eos": [2], "pad": 0},
"generation": {"max_length": 4096},
"metadata": {"model_type": "my-custom-arch"}
}
26 changes: 26 additions & 0 deletions examples/pipeline-config/05-v1-to-v2/v1/genai_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"model": {
"type": "gpt2",
"pad_token_id": 98,
"bos_token_id": 98,
"eos_token_id": 98,
"vocab_size": 1000,
"context_length": 512,
"decoder": {
"session_options": {
"session.use_device_allocator_for_initializers": "0",
"provider_options": []
},
"filename": "model.onnx",
"num_key_value_heads": 4,
"head_size": 8,
"num_hidden_layers": 5,
"inputs": {
"past_names": "past_%d"
},
"outputs": {
"present_names": "present_%d"
}
}
}
}
20 changes: 20 additions & 0 deletions examples/pipeline-config/05-v1-to-v2/v2/genai_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"version": 2,
"pipeline": {
"extends": "autoregressive-decoder",
"sessions": {
"decoder": {"file": "model.onnx"}
},
"state": {
"kv_cache": {
"format": "combined",
"past_key_pattern": "past_%d",
"present_key_pattern": "present_%d"
},
"position_ids": {"strategy": "default"}
}
},
"tokens": {"eos": [98], "pad": 98, "bos": 98},
"generation": {"max_length": 512},
"metadata": {"model_type": "gpt2"}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"version": 2,
"pipeline": {
"extends": "vision-language",
"sessions": {
"vision": {"file": "vision.onnx"},
"speech": {"file": "speech.onnx"},
"embedding": {"file": "embedding.onnx"},
"decoder": {"file": "decoder.onnx"}
},
"flow": [
{"run": "vision", "when": "init", "loop": "batched"},
{"run": "speech", "when": "init", "loop": "batched"},
{"run": "embedding", "when": "init"},
{"run": "decoder", "when": "step"}
],
"dataflow": [
{"from": "vision.image_features", "to": "embedding.image_features"},
{"from": "speech.audio_features", "to": "embedding.audio_features"},
{"from": "embedding.inputs_embeds", "to": "decoder.inputs_embeds"}
]
},
"tokens": {"bos": 2, "eos": [1, 106], "pad": 0, "image_token": 262144},
"generation": {"max_length": 32768, "sampling": {"temperature": 1.0, "top_k": 1}},
"metadata": {"model_type": "gemma4"}
}
68 changes: 68 additions & 0 deletions examples/pipeline-config/07-prefill-decode/genai_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{
"model": {
"type": "decoder-pipeline",
"bos_token_id": 1,
"eos_token_id": 2,
"pad_token_id": 0,
"vocab_size": 32000,
"context_length": 4096,
"decoder": {
"hidden_size": 2048,
"num_attention_heads": 16,
"num_key_value_heads": 16,
"num_hidden_layers": 1,
"head_size": 128,
"inputs": {
"input_ids": "input_ids",
"attention_mask": "attention_mask",
"past_key_names": "past_key_values.%d.key",
"past_value_names": "past_key_values.%d.value"
},
"outputs": {
"logits": "logits",
"present_key_names": "present.%d.key",
"present_value_names": "present.%d.value"
},
"pipeline": [
{
"prefill": {
"filename": "prefill.onnx",
"run_on_prompt": true,
"run_on_token_gen": false,
"inputs": [
"input_ids",
"attention_mask",
"past_key_values.0.key",
"past_key_values.0.value"
],
"outputs": [
"logits",
"present.0.key",
"present.0.value"
]
},
"decode": {
"filename": "decode.onnx",
"run_on_prompt": false,
"run_on_token_gen": true,
"inputs": [
"input_ids",
"attention_mask",
"past_key_values.0.key",
"past_key_values.0.value"
],
"outputs": [
"logits",
"present.0.key",
"present.0.value"
]
}
}
]
}
},
"search": {
"max_length": 4096,
"past_present_share_buffer": true
}
}
Loading
Loading