Disable TriggerDmode while spike is changed.
[riscv-tests.git] / mt / bk_matmul.c
1 #include "stdlib.h"
2
3 #include "util.h"
4
5 #include "dataset.h"
6 void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
7 {
8 int i, j, k, ii, jj, kk;
9 if(coreid > 1) return;
10 if (coreid == 0) {
11 // for ( ii = 0; ii < 32; ii+=IC )
12 for ( kk = 0; kk < 32; kk+=16 )
13 for ( j = 0; j < 16; j++ )
14 // for ( j = 0; j < 16; j++ )
15 {
16 for ( i = 0; i < 32; i+=8 )
17 // for ( i = ii; i < ii + IC && i < 32; i+=8 )
18 {
19 data_t temp0 = C[i+j*32];
20 data_t temp1 = C[i+j*32+1];
21 data_t temp2 = C[i+j*32+2];
22 data_t temp3 = C[i+j*32+3];
23 data_t temp4 = C[i+j*32+4];
24 data_t temp5 = C[i+j*32+5];
25 data_t temp6 = C[i+j*32+6];
26 data_t temp7 = C[i+j*32+7];
27 for ( k = kk; k < kk+16 && k < 32; k++ )
28 // for ( k = 0; k < 32; k++ )
29 {
30 data_t tempA = A[j*32+k];
31 temp0 += tempA * B[k*32 + i];
32 temp1 += tempA * B[k*32 + i+1];
33 temp2 += tempA * B[k*32 + i+2];
34 temp3 += tempA * B[k*32 + i+3];
35 temp4 += tempA * B[k*32 + i+4];
36 temp5 += tempA * B[k*32 + i+5];
37 temp6 += tempA * B[k*32 + i+6];
38 temp7 += tempA * B[k*32 + i+7];
39 }
40 C[i+j*32] = temp0;
41 C[i+j*32+1] = temp1;
42 C[i+j*32+2] = temp2;
43 C[i+j*32+3] = temp3;
44 C[i+j*32+4] = temp4;
45 C[i+j*32+5] = temp5;
46 C[i+j*32+6] = temp6;
47 C[i+j*32+7] = temp7;
48 }
49 }
50 }
51 if(coreid == 1 || ncores == 1) {
52 // for ( ii = 0; ii < 32; ii+=IC )
53 for ( kk = 0; kk < 32; kk+=16 )
54 for ( j = 16; j < 32; j++ )
55 // for ( j = 16; j < 32; j++ )
56 {
57 for ( i = 0; i < 32; i+=8 )
58 // for ( i = ii; i < ii + IC && i < 32; i+=8 )
59 {
60 data_t temp0 = C[i+j*32];
61 data_t temp1 = C[i+j*32+1];
62 data_t temp2 = C[i+j*32+2];
63 data_t temp3 = C[i+j*32+3];
64 data_t temp4 = C[i+j*32+4];
65 data_t temp5 = C[i+j*32+5];
66 data_t temp6 = C[i+j*32+6];
67 data_t temp7 = C[i+j*32+7];
68 for ( k = kk; k < kk+16 && k < 32; k++ )
69 {
70 data_t tempA = A[j*32+k];
71 temp0 += tempA * B[k*32 + i];
72 temp1 += tempA * B[k*32 + i+1];
73 temp2 += tempA * B[k*32 + i+2];
74 temp3 += tempA * B[k*32 + i+3];
75 temp4 += tempA * B[k*32 + i+4];
76 temp5 += tempA * B[k*32 + i+5];
77 temp6 += tempA * B[k*32 + i+6];
78 temp7 += tempA * B[k*32 + i+7];
79 }
80 C[i+j*32] = temp0;
81 C[i+j*32+1] = temp1;
82 C[i+j*32+2] = temp2;
83 C[i+j*32+3] = temp3;
84 C[i+j*32+4] = temp4;
85 C[i+j*32+5] = temp5;
86 C[i+j*32+6] = temp6;
87 C[i+j*32+7] = temp7;
88 }
89
90 }
91 }
92 }