Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/perf-bench-matrix.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@
"pyreq": "transformers==4.57.1 FlagEmbedding",
"pytest": "tests/benchmark/test_encoders.py::test_bge_m3"
},
{
"name": "vibevoice",
"pytest": "tests/benchmark/test_encoders.py::test_vibevoice",
"runs-on": "n150-perf"
},
{
"name": "vllm_bge_m3_encode_batch1",
"pytest": "tests/benchmark/test_vllm_benchmarks.py::test_vllm_bge_m3_batch1",
Expand Down
53 changes: 53 additions & 0 deletions tests/benchmark/test_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,59 @@ def _process_colbert_vecs(colbert_vecs: np.ndarray, attention_mask_item: list):
)


def test_vibevoice(output_file, request):
"""Benchmark VibeVoice-1.5B (microsoft/VibeVoice-1.5B).

VibeVoice is a long-form, generation-based text-to-speech model. In this
benchmark its forward reduces to the Qwen2.5 LM backbone producing logits
(speech_tensors=None; the semantic connector is exercised but unused), so
it runs cleanly through the generic single-forward encoder harness. The
loader wraps the model so forward() returns the bare logits tensor.
"""
from third_party.tt_forge_models.vibevoice.pytorch.loader import ModelLoader

def inputs_to_device(inputs, device):
"""Move tensor entries to device; pass non-tensors (e.g. return_dict) through."""
return {
k: (v.to(device) if isinstance(v, torch.Tensor) else v)
for k, v in inputs.items()
}

# Configuration
data_format = "bfloat16"
batch_size = 1
seq_len = 32

# Load model
loader = ModelLoader()
model_info_name = loader.get_model_info().name
print(f"\nLoading model {model_info_name}...")
model = loader.load_model(dtype_override=DTYPE_MAP[data_format])

load_inputs_fn = lambda batch_size: loader.load_inputs(
batch_size=batch_size, seq_len=seq_len, dtype_override=DTYPE_MAP[data_format]
)
preprocess_fn = lambda raw_inputs, device: inputs_to_device(raw_inputs, device)
output_processor_fn = lambda out, inputs: out

test_encoder(
model=model,
model_info_name=model_info_name,
output_file=output_file,
display_name="vibevoice",
request=request,
load_inputs_fn=load_inputs_fn,
preprocess_fn=preprocess_fn,
output_processor_fn=output_processor_fn,
data_format=data_format,
batch_size=batch_size,
input_sequence_length=seq_len,
loop_count=32,
optimization_level=1,
trace_enabled=False,
)


# Trace disabled: output tensor not on device (https://github.com/tenstorrent/tt-xla/issues/3937)
def test_unet_for_conditional_generation(output_file, request):
"""Test UNet for Conditional Generation model. This is a core component of the Stable Diffusion XL pipeline (https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)"""
Expand Down
Loading