Check mbadaddr in ma_addr test
[riscv-tests.git] / mt / bf_matmul.c
1 #include "stdlib.h"
2
3 #include "util.h"
4
5 #include "dataset.h"
6 void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
7 {
8
9 // ***************************** //
10 // **** ADD YOUR CODE HERE ***** //
11 // ***************************** //
12 //
13 // feel free to make a separate function for MI and MSI versions.
14 int j, k, i;
15 data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
16 data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15;
17 if(coreid == 0) {
18 for(j = 0; j < 32; j++) {
19 temp0 = 0; //C[j*lda];
20 temp1 = 0; //C[1 + j*lda];
21 temp2 = 0; //C[2 + j*lda];
22 temp3 = 0; //C[3 + j*lda];
23 temp4 = 0; //C[4 + j*lda];
24 temp5 = 0; //C[5 + j*lda];
25 temp6 = 0; //C[6 + j*lda];
26 temp7 = 0; //C[7 + j*lda];
27 temp8 = 0; //C[8 + j*lda];
28 temp9 = 0; //C[9 + j*lda];
29 temp10 = 0; //C[10 + j*lda];
30 temp11 = 0; //C[11 + j*lda];
31 temp12 = 0; //C[12 + j*lda];
32 temp13 = 0; //C[13 + j*lda];
33 temp14 = 0; //C[14 + j*lda];
34 temp15 = 0; //C[15 + j*lda];
35 for(k = 0; k < 32; k++) {
36 temp0 += A[j*lda + k] * B[k*lda];
37 temp1 += A[j*lda + k] * B[1+k*lda];
38 temp2 += A[j*lda + k] * B[2+k*lda];
39 temp3 += A[j*lda + k] * B[3+k*lda];
40 temp4 += A[j*lda + k] * B[4+k*lda];
41 temp5 += A[j*lda + k] * B[5+k*lda];
42 temp6 += A[j*lda + k] * B[6+k*lda];
43 temp7 += A[j*lda + k] * B[7+k*lda];
44 temp8 += A[j*lda + k] * B[8+k*lda];
45 temp9 += A[j*lda + k] * B[9+k*lda];
46 temp10 += A[j*lda + k] * B[10+k*lda];
47 temp11 += A[j*lda + k] * B[11+k*lda];
48 temp12 += A[j*lda + k] * B[12+k*lda];
49 temp13 += A[j*lda + k] * B[13+k*lda];
50 temp14 += A[j*lda + k] * B[14+k*lda];
51 temp15 += A[j*lda + k] * B[15+k*lda];
52 }
53 C[j*lda] = temp0;
54 C[1 + j*lda] = temp1;
55 C[2 + j*lda] = temp2;
56 C[3 + j*lda] = temp3;
57 C[4 + j*lda] = temp4;
58 C[5 + j*lda] = temp5;
59 C[6 + j*lda] = temp6;
60 C[7 + j*lda] = temp7;
61 C[8 + j*lda] = temp8;
62 C[9 + j*lda] = temp9;
63 C[10 + j*lda] = temp10;
64 C[11 + j*lda] = temp11;
65 C[12 + j*lda] = temp12;
66 C[13 + j*lda] = temp13;
67 C[14 + j*lda] = temp14;
68 C[15 + j*lda] = temp15;
69 }
70 }
71
72 if(coreid == 1 || ncores == 1) {
73 for(j = 0; j < 32; j++) {
74 temp0 = 0; //C[16+j*lda];
75 temp1 = 0; //C[17+j*lda];
76 temp2 = 0; //C[18+j*lda];
77 temp3 = 0; //C[19+j*lda];
78 temp4 = 0; //C[20+j*lda];
79 temp5 = 0; //C[21+j*lda];
80 temp6 = 0; //C[22+j*lda];
81 temp7 = 0; //C[23+j*lda];
82 temp8 = 0; //C[24+j*lda];
83 temp9 = 0; //C[25+j*lda];
84 temp10 = 0; //C[26+j*lda];
85 temp11 = 0; //C[27+j*lda];
86 temp12 = 0; //C[28+j*lda];
87 temp13 = 0; //C[29+j*lda];
88 temp14 = 0; //C[30+j*lda];
89 temp15 = 0; //C[31+j*lda];
90 for(k = 0; k < 32; k++) {
91 temp0 += A[j*lda + k] * B[16+k*lda];
92 temp1 += A[j*lda + k] * B[17+k*lda];
93 temp2 += A[j*lda + k] * B[18+k*lda];
94 temp3 += A[j*lda + k] * B[19+k*lda];
95 temp4 += A[j*lda + k] * B[20+k*lda];
96 temp5 += A[j*lda + k] * B[21+k*lda];
97 temp6 += A[j*lda + k] * B[22+k*lda];
98 temp7 += A[j*lda + k] * B[23+k*lda];
99 temp8 += A[j*lda + k] * B[24+k*lda];
100 temp9 += A[j*lda + k] * B[25+k*lda];
101 temp10 += A[j*lda + k] * B[26+k*lda];
102 temp11 += A[j*lda + k] * B[27+k*lda];
103 temp12 += A[j*lda + k] * B[28+k*lda];
104 temp13 += A[j*lda + k] * B[29+k*lda];
105 temp14 += A[j*lda + k] * B[30+k*lda];
106 temp15 += A[j*lda + k] * B[31+k*lda];
107 }
108 C[16 + j*lda] = temp0;
109 C[17 + j*lda] = temp1;
110 C[18 + j*lda] = temp2;
111 C[19 + j*lda] = temp3;
112 C[20 + j*lda] = temp4;
113 C[21 + j*lda] = temp5;
114 C[22 + j*lda] = temp6;
115 C[23 + j*lda] = temp7;
116 C[24 + j*lda] = temp8;
117 C[25 + j*lda] = temp9;
118 C[26 + j*lda] = temp10;
119 C[27 + j*lda] = temp11;
120 C[28 + j*lda] = temp12;
121 C[29 + j*lda] = temp13;
122 C[30 + j*lda] = temp14;
123 C[31 + j*lda] = temp15;
124 }
125 }
126
127 }