bump env
[riscv-tests.git] / mt / ad_matmul.c
1 #include "stdlib.h"
2
3 #include "util.h"
4
5 #include "dataset.h"
6 void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
7 {
8 int i, k;
9 int j = coreid*(lda/ncores);
10 int jend = (coreid+1)*(lda/ncores);
11 for ( ; j < jend; j++ )
12 {
13 int j32 = j << 5;
14 data_t* Cj32 = C + j32;
15 for ( k = 0; k < 32; k+=2 )
16 {
17 data_t Aj32k = A[k + j32];
18 data_t Aj32k2 = A[k + 1 + j32];
19 data_t* Bk32 = B + (k << 5);
20 data_t* Bk322 = Bk32 + 32;
21 for ( i = 0; i < 32; i+=4 )
22 {
23 Cj32[i] += Aj32k * Bk32 [i];
24 Cj32[i] += Aj32k2 * Bk322 [i];
25 Cj32[i+1] += Aj32k * Bk32 [i+1];
26 Cj32[i+1] += Aj32k2 * Bk322[i+1];
27 Cj32[i+2] += Aj32k * Bk32 [i+2];
28 Cj32[i+2] += Aj32k2 * Bk322[i+2];
29 Cj32[i+3] += Aj32k * Bk32 [i+3];
30 Cj32[i+3] += Aj32k2 * Bk322[i+3];
31 }
32 barrier(ncores);
33 }
34 }
35
36
37 }