Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
384 changes: 384 additions & 0 deletions benchmarks/bench_la_decode_mtp.py

Large diffs are not rendered by default.

448 changes: 448 additions & 0 deletions benchmarks/bench_la_kvbuffer.py

Large diffs are not rendered by default.

9 changes: 8 additions & 1 deletion benchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,14 @@ def set_seed(seed: int):


def benchmark_cuda_fn(fn, *, setup_fn=None, warmup=30, rep=200, aggregate="iqr_mean"):
"""Benchmark a CUDA callable with events and return milliseconds per call."""
"""Benchmark a CUDA callable with CUDA events; return milliseconds per call.

Args:
aggregate: How to summarize ``rep`` timed iterations.
``"iqr_mean"`` (default) — mean of the middle 50% after sorting
(robust to outliers; used by la_decode / MTP benchmarks).
``"mean"`` — arithmetic mean of all iterations.
"""
for _ in range(warmup):
if setup_fn is not None:
setup_fn()
Expand Down
6 changes: 6 additions & 0 deletions cula/lightning/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cula.lightning.la_decode_mtp import linear_attention_decode_mtp
from cula.lightning.la_state_update_kvbuffer import linear_attention_state_update_kvbuffer
from cula.lightning.la_verify_kvbuffer import linear_attention_verify_kvbuffer
from cula.ops.la_decode import linear_attention_decode
from cula.ops.lightning_attn_sm100 import (
LinearAttentionChunkwiseDecay,
Expand All @@ -24,4 +27,7 @@
"lightning_attn_fwd",
"lightning_attn_fwd_varlen",
"linear_attention_decode",
"linear_attention_decode_mtp",
"linear_attention_verify_kvbuffer",
"linear_attention_state_update_kvbuffer",
]
Loading