Skip to content

Commit e109b83

Browse files
author
Annie Ren
committed
Add mmq device table for RDNA3.5
1 parent 773dc6e commit e109b83

1 file changed

Lines changed: 12 additions & 2 deletions

File tree

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ static constexpr __device__ int get_mmq_x_max_device() {
140140
}
141141

142142
static int get_mmq_y_host(const int cc) {
143+
if (GGML_CUDA_CC_IS_RDNA3_5(cc)) {
144+
return 64;
145+
}
143146
return GGML_CUDA_CC_IS_AMD(cc) ? (GGML_CUDA_CC_IS_RDNA1(cc) ? 64 : 128) :
144147
((GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA) ? 128 : 64);
145148
}
@@ -155,7 +158,9 @@ if (type == GGML_TYPE_NVFP4 || type == GGML_TYPE_MXFP4) {
155158

156159
static constexpr __device__ int get_mmq_y_device() {
157160
#if defined(GGML_USE_HIP)
158-
#if defined(RDNA1)
161+
#if defined(RDNA3_5)
162+
return 64;
163+
#elif defined(RDNA1)
159164
return 64;
160165
#else
161166
return 128;
@@ -296,6 +301,9 @@ static constexpr __device__ int mmq_get_granularity_device(const int /*mmq_x*/)
296301

297302
#if defined(GGML_USE_HIP)
298303
static int mmq_get_nwarps_host(const int cc, const int warp_size) {
304+
if (GGML_CUDA_CC_IS_RDNA3_5(cc)) {
305+
return 4;
306+
}
299307
return amd_mfma_available(cc) ? 8 : 256/warp_size;
300308
}
301309
#else
@@ -305,7 +313,9 @@ static int mmq_get_nwarps_host(const int /*cc*/, const int warp_size) {
305313
#endif // (GGML_USE_HIP)
306314

307315
static constexpr __device__ int mmq_get_nwarps_device() {
308-
#if defined(AMD_MFMA_AVAILABLE) || defined(AMD_WMMA_AVAILABLE)
316+
#if defined(RDNA3_5)
317+
return 4;
318+
#elif defined(AMD_MFMA_AVAILABLE) || defined(AMD_WMMA_AVAILABLE)
309319
return 8;
310320
#else
311321
return 256/ggml_cuda_get_physical_warp_size();

0 commit comments

Comments
 (0)