@@ -188,6 +188,11 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
188188 {
189189 bool use_cooperative_matrix_16_8_8 = vkdev->info .support_cooperative_matrix_16_8_8 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 8 == 0 && num_output % 8 == 0 ;
190190 bool use_cooperative_matrix_16_16_16 = vkdev->info .support_cooperative_matrix_16_16_16 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 16 == 0 && num_output % 16 == 0 ;
191+ if (vkdev->info .subgroup_size () != 32 && (!vkdev->info .support_subgroup_size_control () || vkdev->info .min_subgroup_size () > 32 || vkdev->info .max_subgroup_size () < 32 ))
192+ {
193+ use_cooperative_matrix_16_8_8 = false ;
194+ use_cooperative_matrix_16_16_16 = false ;
195+ }
191196
192197 // winograd43 transform kernel
193198 if (opt.use_winograd43_convolution )
@@ -427,10 +432,12 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
427432 pipeline_convolution_3x3s1d1_winograd43_gemm = new Pipeline (vkdev);
428433 if (use_cooperative_matrix_16_8_8)
429434 {
435+ pipeline_convolution_3x3s1d1_winograd43_gemm->set_subgroup_size (32 );
430436 pipeline_convolution_3x3s1d1_winograd43_gemm->set_local_size_xyz (32 , 1 , 1 );
431437 }
432438 else if (use_cooperative_matrix_16_16_16)
433439 {
440+ pipeline_convolution_3x3s1d1_winograd43_gemm->set_subgroup_size (32 );
434441 pipeline_convolution_3x3s1d1_winograd43_gemm->set_local_size_xyz (32 , 1 , 1 );
435442 }
436443 else if (opt.use_shader_local_memory )
@@ -705,10 +712,12 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
705712 pipeline_convolution_3x3s1d1_winograd23_gemm = new Pipeline (vkdev);
706713 if (use_cooperative_matrix_16_8_8)
707714 {
715+ pipeline_convolution_3x3s1d1_winograd23_gemm->set_subgroup_size (32 );
708716 pipeline_convolution_3x3s1d1_winograd23_gemm->set_local_size_xyz (32 , 1 , 1 );
709717 }
710718 else if (use_cooperative_matrix_16_16_16)
711719 {
720+ pipeline_convolution_3x3s1d1_winograd23_gemm->set_subgroup_size (32 );
712721 pipeline_convolution_3x3s1d1_winograd23_gemm->set_local_size_xyz (32 , 1 , 1 );
713722 }
714723 else if (opt.use_shader_local_memory )
@@ -755,6 +764,11 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
755764 {
756765 bool use_cooperative_matrix_16_8_8 = vkdev->info .support_cooperative_matrix_16_8_8 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 8 == 0 && num_output % 8 == 0 ;
757766 bool use_cooperative_matrix_16_16_16 = vkdev->info .support_cooperative_matrix_16_16_16 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 16 == 0 && num_output % 16 == 0 ;
767+ if (vkdev->info .subgroup_size () != 32 && (!vkdev->info .support_subgroup_size_control () || vkdev->info .min_subgroup_size () > 32 || vkdev->info .max_subgroup_size () < 32 ))
768+ {
769+ use_cooperative_matrix_16_8_8 = false ;
770+ use_cooperative_matrix_16_16_16 = false ;
771+ }
758772
759773 if (use_cooperative_matrix_16_8_8)
760774 {
@@ -846,6 +860,11 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
846860 {
847861 bool use_cooperative_matrix_16_8_8 = vkdev->info .support_cooperative_matrix_16_8_8 () && opt.use_cooperative_matrix && is_conv1x1s1d1 && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 8 == 0 && num_output % 8 == 0 ;
848862 bool use_cooperative_matrix_16_16_16 = vkdev->info .support_cooperative_matrix_16_16_16 () && opt.use_cooperative_matrix && is_conv1x1s1d1 && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 16 == 0 && num_output % 16 == 0 ;
863+ if (vkdev->info .subgroup_size () != 32 && (!vkdev->info .support_subgroup_size_control () || vkdev->info .min_subgroup_size () > 32 || vkdev->info .max_subgroup_size () < 32 ))
864+ {
865+ use_cooperative_matrix_16_8_8 = false ;
866+ use_cooperative_matrix_16_16_16 = false ;
867+ }
849868
850869 if (use_cooperative_matrix_16_8_8)
851870 {
@@ -948,6 +967,11 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
948967 {
949968 bool use_cooperative_matrix_16_8_8 = vkdev->info .support_cooperative_matrix_16_8_8 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 8 == 0 && num_output % 8 == 0 ;
950969 bool use_cooperative_matrix_16_16_16 = vkdev->info .support_cooperative_matrix_16_16_16 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 16 == 0 && num_output % 16 == 0 ;
970+ if (vkdev->info .subgroup_size () != 32 && (!vkdev->info .support_subgroup_size_control () || vkdev->info .min_subgroup_size () > 32 || vkdev->info .max_subgroup_size () < 32 ))
971+ {
972+ use_cooperative_matrix_16_8_8 = false ;
973+ use_cooperative_matrix_16_16_16 = false ;
974+ }
951975
952976 // check blob shape
953977 if (!vkdev->shape_support_image_storage (shape_bordered_packed) || !vkdev->shape_support_image_storage (out_shape_packed))
@@ -1020,10 +1044,12 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
10201044 pipeline_convolution_gemm = new Pipeline (vkdev);
10211045 if (use_cooperative_matrix_16_8_8)
10221046 {
1047+ pipeline_convolution_gemm->set_subgroup_size (32 );
10231048 pipeline_convolution_gemm->set_local_size_xyz (32 , 1 , 1 ); // 16_8_8
10241049 }
10251050 else if (use_cooperative_matrix_16_16_16)
10261051 {
1052+ pipeline_convolution_gemm->set_subgroup_size (32 );
10271053 pipeline_convolution_gemm->set_local_size_xyz (32 , 1 , 1 ); // 16_16_16
10281054 }
10291055 else if (opt.use_shader_local_memory )
@@ -1040,6 +1066,11 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
10401066 {
10411067 bool use_cooperative_matrix_16_8_8 = vkdev->info .support_cooperative_matrix_16_8_8 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 8 == 0 && num_output % 8 == 0 ;
10421068 bool use_cooperative_matrix_16_16_16 = vkdev->info .support_cooperative_matrix_16_16_16 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 16 == 0 && num_output % 16 == 0 ;
1069+ if (vkdev->info .subgroup_size () != 32 && (!vkdev->info .support_subgroup_size_control () || vkdev->info .min_subgroup_size () > 32 || vkdev->info .max_subgroup_size () < 32 ))
1070+ {
1071+ use_cooperative_matrix_16_8_8 = false ;
1072+ use_cooperative_matrix_16_16_16 = false ;
1073+ }
10431074
10441075 std::vector<vk_specialization_type> specializations (4 + 8 );
10451076 specializations[0 ].i = bias_term;
@@ -1084,10 +1115,12 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
10841115 pipeline_convolution_1x1s1d1 = new Pipeline (vkdev);
10851116 if (use_cooperative_matrix_16_8_8)
10861117 {
1118+ pipeline_convolution_1x1s1d1->set_subgroup_size (32 );
10871119 pipeline_convolution_1x1s1d1->set_local_size_xyz (32 , 1 , 1 ); // 16_8_8
10881120 }
10891121 else if (use_cooperative_matrix_16_16_16)
10901122 {
1123+ pipeline_convolution_1x1s1d1->set_subgroup_size (32 );
10911124 pipeline_convolution_1x1s1d1->set_local_size_xyz (32 , 1 , 1 ); // 16_16_16
10921125 }
10931126 else if (opt.use_shader_local_memory )
@@ -1404,6 +1437,11 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom
14041437 {
14051438 bool use_cooperative_matrix_16_8_8 = vkdev->info .support_cooperative_matrix_16_8_8 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && channels * elempack % 8 == 0 && num_output % 8 == 0 ;
14061439 bool use_cooperative_matrix_16_16_16 = vkdev->info .support_cooperative_matrix_16_16_16 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && channels * elempack % 16 == 0 && num_output % 16 == 0 ;
1440+ if (vkdev->info .subgroup_size () != 32 && (!vkdev->info .support_subgroup_size_control () || vkdev->info .min_subgroup_size () > 32 || vkdev->info .max_subgroup_size () < 32 ))
1441+ {
1442+ use_cooperative_matrix_16_8_8 = false ;
1443+ use_cooperative_matrix_16_16_16 = false ;
1444+ }
14071445
14081446 bool pre_winograd43 = opt.use_winograd43_convolution ;
14091447 if (opt.use_winograd23_convolution )
@@ -1630,6 +1668,11 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom
16301668 {
16311669 bool use_cooperative_matrix_16_8_8 = vkdev->info .support_cooperative_matrix_16_8_8 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && channels * elempack % 8 == 0 && num_output % 8 == 0 ;
16321670 bool use_cooperative_matrix_16_16_16 = vkdev->info .support_cooperative_matrix_16_16_16 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && channels * elempack % 16 == 0 && num_output % 16 == 0 ;
1671+ if (vkdev->info .subgroup_size () != 32 && (!vkdev->info .support_subgroup_size_control () || vkdev->info .min_subgroup_size () > 32 || vkdev->info .max_subgroup_size () < 32 ))
1672+ {
1673+ use_cooperative_matrix_16_8_8 = false ;
1674+ use_cooperative_matrix_16_16_16 = false ;
1675+ }
16331676
16341677 // gemm
16351678 top_blob.create (outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator );
@@ -1678,6 +1721,11 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom
16781721 {
16791722 bool use_cooperative_matrix_16_8_8 = vkdev->info .support_cooperative_matrix_16_8_8 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && channels * elempack % 8 == 0 && num_output % 8 == 0 ;
16801723 bool use_cooperative_matrix_16_16_16 = vkdev->info .support_cooperative_matrix_16_16_16 () && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && channels * elempack % 16 == 0 && num_output % 16 == 0 ;
1724+ if (vkdev->info .subgroup_size () != 32 && (!vkdev->info .support_subgroup_size_control () || vkdev->info .min_subgroup_size () > 32 || vkdev->info .max_subgroup_size () < 32 ))
1725+ {
1726+ use_cooperative_matrix_16_8_8 = false ;
1727+ use_cooperative_matrix_16_16_16 = false ;
1728+ }
16811729
16821730 top_blob.create (outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator );
16831731 if (top_blob.empty ())
0 commit comments