Skip to content

Commit f6f734f

Browse files
authored
vulkan int8 kernels (#6751)
* vulkan: add int8 kernels for core layers Implement Vulkan int8 paths for convolution, convolutiondepthwise, innerproduct, and gemm with scalar fallback and integer dot product shader branches. Add goal.md and enable existing int8 unit tests for Vulkan coverage. * enable int8 gpu benchmark
1 parent 9c62afb commit f6f734f

94 files changed

Lines changed: 17185 additions & 714 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

benchmark/benchncnn.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,6 @@ static ncnn::VkAllocator* g_staging_vkallocator = 0;
5050

5151
void benchmark(const char* comment, const std::vector<ncnn::Mat>& _in, const ncnn::Option& opt, const char* model_param_data = NULL)
5252
{
53-
// Skip if int8 model name and using GPU
54-
if (opt.use_vulkan_compute && strstr(comment, "int8") != NULL)
55-
{
56-
if (!model_param_data)
57-
fprintf(stderr, "%20s skipped (int8+GPU not supported)\n", comment);
58-
return;
59-
}
60-
6153
g_blob_pool_allocator.clear();
6254
g_workspace_pool_allocator.clear();
6355

benchmark/models/efficientnet_b0.param

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ Split splitncnn_0 1 2 368 368_splitncnn_0 368_sp
99
Pooling GlobalAveragePool_8 1 1 368_splitncnn_1 369 -23330=4,1,32,1,1 0=1 4=1
1010
InnerProduct Conv_9 1 1 369 370 -23330=4,1,8,1,1 0=8 1=1 2=256
1111
Swish Mul_11 1 1 370 372 -23330=4,1,8,1,1
12-
Convolution Conv_12 1 1 372 374 -23330=4,1,32,1,1 0=32 1=1 5=1 6=256 9=4
12+
InnerProduct Conv_12 1 1 372 374 -23330=4,1,32,1,1 0=32 1=1 2=256 9=4
1313
BinaryOp Mul_14 2 1 368_splitncnn_0 374 375 -23330=4,3,112,112,32 0=2
1414
Convolution Conv_15 1 1 375 377 -23330=4,3,112,112,16 0=16 1=1 5=1 6=512
1515
Convolution Conv_17 1 1 377 379 -23330=4,3,112,112,96 0=96 1=1 5=1 6=1536
@@ -20,7 +20,7 @@ Split splitncnn_1 1 2 385 385_splitncnn_0 385_sp
2020
Pooling GlobalAveragePool_25 1 1 385_splitncnn_1 386 -23330=4,1,96,1,1 0=1 4=1
2121
InnerProduct Conv_26 1 1 386 387 -23330=4,1,4,1,1 0=4 1=1 2=384
2222
Swish Mul_28 1 1 387 389 -23330=4,1,4,1,1
23-
Convolution Conv_29 1 1 389 391 -23330=4,1,96,1,1 0=96 1=1 5=1 6=384 9=4
23+
InnerProduct Conv_29 1 1 389 391 -23330=4,1,96,1,1 0=96 1=1 2=384 9=4
2424
BinaryOp Mul_31 2 1 385_splitncnn_0 391 392 -23330=4,3,56,56,96 0=2
2525
Convolution Conv_32 1 1 392 394 -23330=4,3,56,56,24 0=24 1=1 5=1 6=2304
2626
Split splitncnn_2 1 2 394 394_splitncnn_0 394_splitncnn_1 -23330=8,3,56,56,24,3,56,56,24
@@ -32,7 +32,7 @@ Split splitncnn_3 1 2 402 402_splitncnn_0 402_sp
3232
Pooling GlobalAveragePool_42 1 1 402_splitncnn_1 403 -23330=4,1,144,1,1 0=1 4=1
3333
InnerProduct Conv_43 1 1 403 404 -23330=4,1,6,1,1 0=6 1=1 2=864
3434
Swish Mul_45 1 1 404 406 -23330=4,1,6,1,1
35-
Convolution Conv_46 1 1 406 408 -23330=4,1,144,1,1 0=144 1=1 5=1 6=864 9=4
35+
InnerProduct Conv_46 1 1 406 408 -23330=4,1,144,1,1 0=144 1=1 2=864 9=4
3636
BinaryOp Mul_48 2 1 402_splitncnn_0 408 409 -23330=4,3,56,56,144 0=2
3737
Convolution Conv_49 1 1 409 411 -23330=4,3,56,56,24 0=24 1=1 5=1 6=3456
3838
BinaryOp Add_51 2 1 394_splitncnn_0 411 412 -23330=4,3,56,56,24
@@ -44,7 +44,7 @@ Split splitncnn_4 1 2 420 420_splitncnn_0 420_sp
4444
Pooling GlobalAveragePool_60 1 1 420_splitncnn_1 421 -23330=4,1,144,1,1 0=1 4=1
4545
InnerProduct Conv_61 1 1 421 422 -23330=4,1,6,1,1 0=6 1=1 2=864
4646
Swish Mul_63 1 1 422 424 -23330=4,1,6,1,1
47-
Convolution Conv_64 1 1 424 426 -23330=4,1,144,1,1 0=144 1=1 5=1 6=864 9=4
47+
InnerProduct Conv_64 1 1 424 426 -23330=4,1,144,1,1 0=144 1=1 2=864 9=4
4848
BinaryOp Mul_66 2 1 420_splitncnn_0 426 427 -23330=4,3,28,28,144 0=2
4949
Convolution Conv_67 1 1 427 429 -23330=4,3,28,28,40 0=40 1=1 5=1 6=5760
5050
Split splitncnn_5 1 2 429 429_splitncnn_0 429_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
@@ -56,7 +56,7 @@ Split splitncnn_6 1 2 437 437_splitncnn_0 437_sp
5656
Pooling GlobalAveragePool_77 1 1 437_splitncnn_1 438 -23330=4,1,240,1,1 0=1 4=1
5757
InnerProduct Conv_78 1 1 438 439 -23330=4,1,10,1,1 0=10 1=1 2=2400
5858
Swish Mul_80 1 1 439 441 -23330=4,1,10,1,1
59-
Convolution Conv_81 1 1 441 443 -23330=4,1,240,1,1 0=240 1=1 5=1 6=2400 9=4
59+
InnerProduct Conv_81 1 1 441 443 -23330=4,1,240,1,1 0=240 1=1 2=2400 9=4
6060
BinaryOp Mul_83 2 1 437_splitncnn_0 443 444 -23330=4,3,28,28,240 0=2
6161
Convolution Conv_84 1 1 444 446 -23330=4,3,28,28,40 0=40 1=1 5=1 6=9600
6262
BinaryOp Add_86 2 1 429_splitncnn_0 446 447 -23330=4,3,28,28,40
@@ -68,7 +68,7 @@ Split splitncnn_7 1 2 455 455_splitncnn_0 455_sp
6868
Pooling GlobalAveragePool_95 1 1 455_splitncnn_1 456 -23330=4,1,240,1,1 0=1 4=1
6969
InnerProduct Conv_96 1 1 456 457 -23330=4,1,10,1,1 0=10 1=1 2=2400
7070
Swish Mul_98 1 1 457 459 -23330=4,1,10,1,1
71-
Convolution Conv_99 1 1 459 461 -23330=4,1,240,1,1 0=240 1=1 5=1 6=2400 9=4
71+
InnerProduct Conv_99 1 1 459 461 -23330=4,1,240,1,1 0=240 1=1 2=2400 9=4
7272
BinaryOp Mul_101 2 1 455_splitncnn_0 461 462 -23330=4,3,14,14,240 0=2
7373
Convolution Conv_102 1 1 462 464 -23330=4,3,14,14,80 0=80 1=1 5=1 6=19200
7474
Split splitncnn_8 1 2 464 464_splitncnn_0 464_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
@@ -80,7 +80,7 @@ Split splitncnn_9 1 2 472 472_splitncnn_0 472_sp
8080
Pooling GlobalAveragePool_112 1 1 472_splitncnn_1 473 -23330=4,1,480,1,1 0=1 4=1
8181
InnerProduct Conv_113 1 1 473 474 -23330=4,1,20,1,1 0=20 1=1 2=9600
8282
Swish Mul_115 1 1 474 476 -23330=4,1,20,1,1
83-
Convolution Conv_116 1 1 476 478 -23330=4,1,480,1,1 0=480 1=1 5=1 6=9600 9=4
83+
InnerProduct Conv_116 1 1 476 478 -23330=4,1,480,1,1 0=480 1=1 2=9600 9=4
8484
BinaryOp Mul_118 2 1 472_splitncnn_0 478 479 -23330=4,3,14,14,480 0=2
8585
Convolution Conv_119 1 1 479 481 -23330=4,3,14,14,80 0=80 1=1 5=1 6=38400
8686
BinaryOp Add_121 2 1 464_splitncnn_0 481 482 -23330=4,3,14,14,80
@@ -93,7 +93,7 @@ Split splitncnn_11 1 2 490 490_splitncnn_0 490_sp
9393
Pooling GlobalAveragePool_130 1 1 490_splitncnn_1 491 -23330=4,1,480,1,1 0=1 4=1
9494
InnerProduct Conv_131 1 1 491 492 -23330=4,1,20,1,1 0=20 1=1 2=9600
9595
Swish Mul_133 1 1 492 494 -23330=4,1,20,1,1
96-
Convolution Conv_134 1 1 494 496 -23330=4,1,480,1,1 0=480 1=1 5=1 6=9600 9=4
96+
InnerProduct Conv_134 1 1 494 496 -23330=4,1,480,1,1 0=480 1=1 2=9600 9=4
9797
BinaryOp Mul_136 2 1 490_splitncnn_0 496 497 -23330=4,3,14,14,480 0=2
9898
Convolution Conv_137 1 1 497 499 -23330=4,3,14,14,80 0=80 1=1 5=1 6=38400
9999
BinaryOp Add_139 2 1 482_splitncnn_0 499 500 -23330=4,3,14,14,80
@@ -105,7 +105,7 @@ Split splitncnn_12 1 2 508 508_splitncnn_0 508_sp
105105
Pooling GlobalAveragePool_148 1 1 508_splitncnn_1 509 -23330=4,1,480,1,1 0=1 4=1
106106
InnerProduct Conv_149 1 1 509 510 -23330=4,1,20,1,1 0=20 1=1 2=9600
107107
Swish Mul_151 1 1 510 512 -23330=4,1,20,1,1
108-
Convolution Conv_152 1 1 512 514 -23330=4,1,480,1,1 0=480 1=1 5=1 6=9600 9=4
108+
InnerProduct Conv_152 1 1 512 514 -23330=4,1,480,1,1 0=480 1=1 2=9600 9=4
109109
BinaryOp Mul_154 2 1 508_splitncnn_0 514 515 -23330=4,3,14,14,480 0=2
110110
Convolution Conv_155 1 1 515 517 -23330=4,3,14,14,112 0=112 1=1 5=1 6=53760
111111
Split splitncnn_13 1 2 517 517_splitncnn_0 517_splitncnn_1 -23330=8,3,14,14,112,3,14,14,112
@@ -117,7 +117,7 @@ Split splitncnn_14 1 2 525 525_splitncnn_0 525_sp
117117
Pooling GlobalAveragePool_165 1 1 525_splitncnn_1 526 -23330=4,1,672,1,1 0=1 4=1
118118
InnerProduct Conv_166 1 1 526 527 -23330=4,1,28,1,1 0=28 1=1 2=18816
119119
Swish Mul_168 1 1 527 529 -23330=4,1,28,1,1
120-
Convolution Conv_169 1 1 529 531 -23330=4,1,672,1,1 0=672 1=1 5=1 6=18816 9=4
120+
InnerProduct Conv_169 1 1 529 531 -23330=4,1,672,1,1 0=672 1=1 2=18816 9=4
121121
BinaryOp Mul_171 2 1 525_splitncnn_0 531 532 -23330=4,3,14,14,672 0=2
122122
Convolution Conv_172 1 1 532 534 -23330=4,3,14,14,112 0=112 1=1 5=1 6=75264
123123
BinaryOp Add_174 2 1 517_splitncnn_0 534 535 -23330=4,3,14,14,112
@@ -130,7 +130,7 @@ Split splitncnn_16 1 2 543 543_splitncnn_0 543_sp
130130
Pooling GlobalAveragePool_183 1 1 543_splitncnn_1 544 -23330=4,1,672,1,1 0=1 4=1
131131
InnerProduct Conv_184 1 1 544 545 -23330=4,1,28,1,1 0=28 1=1 2=18816
132132
Swish Mul_186 1 1 545 547 -23330=4,1,28,1,1
133-
Convolution Conv_187 1 1 547 549 -23330=4,1,672,1,1 0=672 1=1 5=1 6=18816 9=4
133+
InnerProduct Conv_187 1 1 547 549 -23330=4,1,672,1,1 0=672 1=1 2=18816 9=4
134134
BinaryOp Mul_189 2 1 543_splitncnn_0 549 550 -23330=4,3,14,14,672 0=2
135135
Convolution Conv_190 1 1 550 552 -23330=4,3,14,14,112 0=112 1=1 5=1 6=75264
136136
BinaryOp Add_192 2 1 535_splitncnn_0 552 553 -23330=4,3,14,14,112
@@ -142,7 +142,7 @@ Split splitncnn_17 1 2 561 561_splitncnn_0 561_sp
142142
Pooling GlobalAveragePool_201 1 1 561_splitncnn_1 562 -23330=4,1,672,1,1 0=1 4=1
143143
InnerProduct Conv_202 1 1 562 563 -23330=4,1,28,1,1 0=28 1=1 2=18816
144144
Swish Mul_204 1 1 563 565 -23330=4,1,28,1,1
145-
Convolution Conv_205 1 1 565 567 -23330=4,1,672,1,1 0=672 1=1 5=1 6=18816 9=4
145+
InnerProduct Conv_205 1 1 565 567 -23330=4,1,672,1,1 0=672 1=1 2=18816 9=4
146146
BinaryOp Mul_207 2 1 561_splitncnn_0 567 568 -23330=4,3,7,7,672 0=2
147147
Convolution Conv_208 1 1 568 570 -23330=4,3,7,7,192 0=192 1=1 5=1 6=129024
148148
Split splitncnn_18 1 2 570 570_splitncnn_0 570_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
@@ -154,7 +154,7 @@ Split splitncnn_19 1 2 578 578_splitncnn_0 578_sp
154154
Pooling GlobalAveragePool_218 1 1 578_splitncnn_1 579 -23330=4,1,1152,1,1 0=1 4=1
155155
InnerProduct Conv_219 1 1 579 580 -23330=4,1,48,1,1 0=48 1=1 2=55296
156156
Swish Mul_221 1 1 580 582 -23330=4,1,48,1,1
157-
Convolution Conv_222 1 1 582 584 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
157+
InnerProduct Conv_222 1 1 582 584 -23330=4,1,1152,1,1 0=1152 1=1 2=55296 9=4
158158
BinaryOp Mul_224 2 1 578_splitncnn_0 584 585 -23330=4,3,7,7,1152 0=2
159159
Convolution Conv_225 1 1 585 587 -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
160160
BinaryOp Add_227 2 1 570_splitncnn_0 587 588 -23330=4,3,7,7,192
@@ -167,7 +167,7 @@ Split splitncnn_21 1 2 596 596_splitncnn_0 596_sp
167167
Pooling GlobalAveragePool_236 1 1 596_splitncnn_1 597 -23330=4,1,1152,1,1 0=1 4=1
168168
InnerProduct Conv_237 1 1 597 598 -23330=4,1,48,1,1 0=48 1=1 2=55296
169169
Swish Mul_239 1 1 598 600 -23330=4,1,48,1,1
170-
Convolution Conv_240 1 1 600 602 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
170+
InnerProduct Conv_240 1 1 600 602 -23330=4,1,1152,1,1 0=1152 1=1 2=55296 9=4
171171
BinaryOp Mul_242 2 1 596_splitncnn_0 602 603 -23330=4,3,7,7,1152 0=2
172172
Convolution Conv_243 1 1 603 605 -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
173173
BinaryOp Add_245 2 1 588_splitncnn_0 605 606 -23330=4,3,7,7,192
@@ -180,7 +180,7 @@ Split splitncnn_23 1 2 614 614_splitncnn_0 614_sp
180180
Pooling GlobalAveragePool_254 1 1 614_splitncnn_1 615 -23330=4,1,1152,1,1 0=1 4=1
181181
InnerProduct Conv_255 1 1 615 616 -23330=4,1,48,1,1 0=48 1=1 2=55296
182182
Swish Mul_257 1 1 616 618 -23330=4,1,48,1,1
183-
Convolution Conv_258 1 1 618 620 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
183+
InnerProduct Conv_258 1 1 618 620 -23330=4,1,1152,1,1 0=1152 1=1 2=55296 9=4
184184
BinaryOp Mul_260 2 1 614_splitncnn_0 620 621 -23330=4,3,7,7,1152 0=2
185185
Convolution Conv_261 1 1 621 623 -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
186186
BinaryOp Add_263 2 1 606_splitncnn_0 623 624 -23330=4,3,7,7,192
@@ -192,7 +192,7 @@ Split splitncnn_24 1 2 632 632_splitncnn_0 632_sp
192192
Pooling GlobalAveragePool_272 1 1 632_splitncnn_1 633 -23330=4,1,1152,1,1 0=1 4=1
193193
InnerProduct Conv_273 1 1 633 634 -23330=4,1,48,1,1 0=48 1=1 2=55296
194194
Swish Mul_275 1 1 634 636 -23330=4,1,48,1,1
195-
Convolution Conv_276 1 1 636 638 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
195+
InnerProduct Conv_276 1 1 636 638 -23330=4,1,1152,1,1 0=1152 1=1 2=55296 9=4
196196
BinaryOp Mul_278 2 1 632_splitncnn_0 638 639 -23330=4,3,7,7,1152 0=2
197197
Convolution Conv_279 1 1 639 641 -23330=4,3,7,7,320 0=320 1=1 5=1 6=368640
198198
Convolution Conv_281 1 1 641 643 -23330=4,3,7,7,1280 0=1280 1=1 5=1 6=409600

0 commit comments

Comments
 (0)