Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions runtime/src/iree/hal/drivers/amdgpu/executable.c
Original file line number Diff line number Diff line change
Expand Up @@ -2138,13 +2138,19 @@ iree_status_t iree_hal_amdgpu_executable_raw_hsaco_custom_kernarg_layout(
if (!iree_host_size_checked_add(
(iree_host_size_t)implicit_args_offset,
(iree_host_size_t)IREE_AMDGPU_KERNEL_IMPLICIT_ARGS_SIZE,
&implicit_args_end) ||
kernel_args->kernarg_size < implicit_args_end) {
&implicit_args_end)) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"AMDGPU kernel `%.*s` has truncated implicit kernarg suffix",
IREE_STATUS_OUT_OF_RANGE,
"AMDGPU kernel `%.*s` implicit kernarg suffix offset overflows",
(int)symbol_name.size, symbol_name.data);
}
// NOTE: kernel_args->kernarg_size may be smaller than implicit_args_end for
// hand-written assembly kernels (e.g. MIOpen miopenSp3AsmConv*) that
// declare a partial hidden suffix. That is valid: the custom-direct
// dispatch path reserves max(kernarg_size, explicit, implicit_offset +
// IMPLICIT_SIZE) and zero-fills the remainder, so accept it rather than
// rejecting the load.
(void)implicit_args_end;
*out_implicit_args_offset = implicit_args_offset;
}
return iree_ok_status();
Expand Down
17 changes: 10 additions & 7 deletions runtime/src/iree/hal/drivers/amdgpu/executable_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,12 @@ TEST(ExecutableTest, RawHsacoCustomKernargLayoutAcceptsFullImplicitSuffix) {
EXPECT_EQ(implicit_args_offset, 16u);
}

TEST(ExecutableTest,
RawHsacoCustomKernargLayoutRejectsTruncatedImplicitSuffix) {
// Hand-written assembly kernels may declare only a partial hidden suffix
// (kernarg_size < implicit offset + IREE_AMDGPU_KERNEL_IMPLICIT_ARGS_SIZE).
// The layout must accept this: dispatch reserves
// max(kernarg_size, implicit offset + implicit size) and zero-fills, so a
// short declared size is harmless.
TEST(ExecutableTest, RawHsacoCustomKernargLayoutAcceptsPartialImplicitSuffix) {
const iree_hal_amdgpu_hsaco_metadata_arg_t args[] = {
MakeArg(IREE_HAL_AMDGPU_HSACO_METADATA_ARG_KIND_GLOBAL_BUFFER,
/*offset=*/0, /*size=*/8),
Expand All @@ -381,11 +385,10 @@ TEST(ExecutableTest,

iree_host_size_t explicit_kernarg_size = 0;
uint16_t implicit_args_offset = 0;
IREE_EXPECT_STATUS_IS(
IREE_STATUS_INVALID_ARGUMENT,
iree_hal_amdgpu_executable_raw_hsaco_custom_kernarg_layout(
&kernel, &kernel_args, &explicit_kernarg_size,
&implicit_args_offset));
IREE_ASSERT_OK(iree_hal_amdgpu_executable_raw_hsaco_custom_kernarg_layout(
&kernel, &kernel_args, &explicit_kernarg_size, &implicit_args_offset));
EXPECT_EQ(explicit_kernarg_size, 8u);
EXPECT_EQ(implicit_args_offset, 16u);
}

} // namespace
Expand Down
Loading