@@ -483,18 +483,18 @@ void testRope_F32_F32() {
483
483
});
484
484
}
485
485
486
- void matmul_F32_F32_F32 () {
487
- #define MATMUL_N 64
488
- #define MATMUL_D 96
486
+ void testMatmul_F32_F32_F32 () {
487
+ #define MATMUL_F32_N 64
488
+ #define MATMUL_F32_D 96
489
489
execute (
490
490
[](NnNetConfigBuilder *netBuilder, NnNodeConfigBuilder *nodeBuilder, NnSegmentConfigBuilder *segmentBuilder) {
491
- NnUint xPipeIndex = netBuilder->addPipe (" X" , size2D (F_32, N_BATCHES, MATMUL_N ));
492
- NnUint yPipeIndex = netBuilder->addPipe (" Y" , size2D (F_32, N_BATCHES, MATMUL_D ));
491
+ NnUint xPipeIndex = netBuilder->addPipe (" X" , size2D (F_32, N_BATCHES, MATMUL_F32_N ));
492
+ NnUint yPipeIndex = netBuilder->addPipe (" Y" , size2D (F_32, N_BATCHES, MATMUL_F32_D ));
493
493
segmentBuilder->addOp (
494
494
OP_MATMUL, " matmul" , 0 ,
495
495
pointerBatchConfig (SRC_PIPE, xPipeIndex),
496
496
pointerBatchConfig (SRC_PIPE, yPipeIndex),
497
- size2D (F_32, MATMUL_N, MATMUL_D ),
497
+ size2D (F_32, MATMUL_F32_N, MATMUL_F32_D ),
498
498
NnMatmulOpConfig{});
499
499
},
500
500
[](NnExecutor *executor, NnNetExecution *execution, NnVulkanDevice *device) {
@@ -503,32 +503,88 @@ void matmul_F32_F32_F32() {
503
503
float *xPipe = (float *)execution->pipes [0 ];
504
504
float *yPipe = (float *)execution->pipes [1 ];
505
505
506
- float weight[MATMUL_N * MATMUL_D ];
507
- for (NnUint i = 0 ; i < N_BATCHES * MATMUL_N ; i++)
506
+ float weight[MATMUL_F32_N * MATMUL_F32_D ];
507
+ for (NnUint i = 0 ; i < N_BATCHES * MATMUL_F32_N ; i++)
508
508
xPipe[i] = i * 0 .01f ;
509
- for (NnUint i = 0 ; i < MATMUL_N * MATMUL_D ; i++)
509
+ for (NnUint i = 0 ; i < MATMUL_F32_N * MATMUL_F32_D ; i++)
510
510
weight[i] = i * 0 .001f ;
511
- executor->loadWeight (" matmul" , 0 , MATMUL_N * MATMUL_D * sizeof (float ), (NnByte *)weight);
511
+ executor->loadWeight (" matmul" , 0 , MATMUL_F32_N * MATMUL_F32_D * sizeof (float ), (NnByte *)weight);
512
512
513
513
// act
514
514
executor->forward ();
515
515
516
516
// assert
517
517
for (NnUint b = 0 ; b < N_BATCHES; b++) {
518
- for (NnUint d = 0 ; d < MATMUL_D ; d++) {
518
+ for (NnUint d = 0 ; d < MATMUL_F32_D ; d++) {
519
519
float sum = 0 .0f ;
520
- for (NnUint n = 0 ; n < MATMUL_N ; n++)
521
- sum += xPipe[b * MATMUL_N + n] * weight[d * MATMUL_N + n];
520
+ for (NnUint n = 0 ; n < MATMUL_F32_N ; n++)
521
+ sum += xPipe[b * MATMUL_F32_N + n] * weight[d * MATMUL_F32_N + n];
522
522
523
- const NnUint p = b * MATMUL_D + d;
523
+ const NnUint p = b * MATMUL_F32_D + d;
524
524
assertFloat (p, yPipe[p], sum, 0 .0002f );
525
525
}
526
526
}
527
- printOk (" matmul_F32_F32_F32 " );
527
+ printOk (" testMatmul_F32_F32_F32 " );
528
528
});
529
529
}
530
530
531
- void multiheadAtt_F32_F32 () {
531
+ void testMatmul_Q80_Q40_F32 () {
532
+ #define MATMUL_Q80_Q40_N 64
533
+ #define MATMUL_Q80_Q40_D 96
534
+ execute (
535
+ [](NnNetConfigBuilder *netBuilder, NnNodeConfigBuilder *nodeBuilder, NnSegmentConfigBuilder *segmentBuilder) {
536
+ NnUint xPipeIndex = netBuilder->addPipe (" X" , size2D (F_Q80, N_BATCHES, MATMUL_Q80_Q40_N));
537
+ NnUint yPipeIndex = netBuilder->addPipe (" Y" , size2D (F_32, N_BATCHES, MATMUL_Q80_Q40_D));
538
+ segmentBuilder->addOp (
539
+ OP_MATMUL, " matmul" , 0 ,
540
+ pointerBatchConfig (SRC_PIPE, xPipeIndex),
541
+ pointerBatchConfig (SRC_PIPE, yPipeIndex),
542
+ size2D (F_Q40, MATMUL_Q80_Q40_N, MATMUL_Q80_Q40_D),
543
+ NnMatmulOpConfig{});
544
+ },
545
+ [](NnExecutor *executor, NnNetExecution *execution, NnVulkanDevice *device) {
546
+ // arrange
547
+ execution->setBatchSize (N_BATCHES);
548
+ NnBlockQ80 *xPipe = (NnBlockQ80 *)execution->pipes [0 ];
549
+ float *yPipe = (float *)execution->pipes [1 ];
550
+
551
+ constexpr NnUint xSize = N_BATCHES * MATMUL_Q80_Q40_N;
552
+ constexpr NnUint weightSize = MATMUL_Q80_Q40_N * MATMUL_Q80_Q40_D;
553
+ constexpr NnUint weightBlocks = weightSize / Q40_BLOCK_SIZE;
554
+
555
+ float x[xSize];
556
+ float weight[weightSize];
557
+ NnBlockQ40 weightQ40[weightBlocks];
558
+
559
+ for (NnUint i = 0 ; i < xSize; i++)
560
+ x[i] = i * 0 .01f ;
561
+ for (NnUint i = 0 ; i < weightSize; i++)
562
+ weight[i] = i * 0 .001f ;
563
+
564
+ quantizeF32toQ80 (x, xPipe, xSize, 1 , 0 );
565
+ quantizeF32toQ40 (weight, weightQ40, weightSize, 1 , 0 );
566
+
567
+ executor->loadWeight (" matmul" , 0 , weightBlocks * sizeof (NnBlockQ40), (NnByte *)weightQ40);
568
+
569
+ // act
570
+ executor->forward ();
571
+
572
+ // assert
573
+ for (NnUint b = 0 ; b < N_BATCHES; b++) {
574
+ for (NnUint d = 0 ; d < MATMUL_Q80_Q40_D; d++) {
575
+ float sum = 0 .0f ;
576
+ for (NnUint n = 0 ; n < MATMUL_Q80_Q40_N; n++)
577
+ sum += x[b * MATMUL_Q80_Q40_N + n] * weight[d * MATMUL_Q80_Q40_N + n];
578
+ const NnUint p = b * MATMUL_Q80_Q40_D + d;
579
+ const float change = (yPipe[p] - sum) / sum;
580
+ assertFloat (p, change, 0.0 , 0 .04f );
581
+ }
582
+ }
583
+ printOk (" testMatmul_Q80_Q40_F32" );
584
+ });
585
+ }
586
+
587
+ void testMultiheadAtt_F32_F32 () {
532
588
#define MULTIHEAD_ATT_DIM 128
533
589
execute (
534
590
[](NnNetConfigBuilder *netBuilder, NnNodeConfigBuilder *nodeBuilder, NnSegmentConfigBuilder *segmentBuilder) {
@@ -560,7 +616,7 @@ void multiheadAtt_F32_F32() {
560
616
// TODO: for now this is a smoke test
561
617
execution->setBatchSize (N_BATCHES);
562
618
executor->forward ();
563
- printOk (" multiheadAtt_F32_F32 " );
619
+ printOk (" testMultiheadAtt_F32_F32 " );
564
620
});
565
621
}
566
622
@@ -577,7 +633,8 @@ int main() {
577
633
testCast_F32_F32 ();
578
634
testCast_F32_Q80 ();
579
635
testRope_F32_F32 ();
580
- matmul_F32_F32_F32 ();
581
- multiheadAtt_F32_F32 ();
636
+ testMatmul_F32_F32_F32 ();
637
+ testMatmul_Q80_Q40_F32 ();
638
+ testMultiheadAtt_F32_F32 ();
582
639
return 0 ;
583
640
}
0 commit comments