6 void __attribute__((noinline
)) matmul(const int coreid
, const int ncores
, const int lda
, const data_t A
[], const data_t B
[], data_t C
[] )
9 int row
,row2
, column
, column2
, column3
, column4
, column5
, column6
, column7
, column8
;
10 size_t max_dim
= 32*32;
11 data_t element
, element2
, element3
, element4
, element5
, element6
, element7
, element8
;
12 data_t temp_mat
[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
13 data_t temp_mat2
[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
14 //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
15 for (l
=coreid
*32/ncores
; l
<32*(1+coreid
)/ncores
; l
+=2){
18 for (i
=0; i
<lda
; i
+=4){
20 element2
= A
[row
+i
+1];
21 element3
= A
[row
+i
+2];
22 element4
= A
[row
+i
+3];
24 element6
= A
[row2
+i
+1];
25 element7
= A
[row2
+i
+2];
26 element8
= A
[row2
+i
+3];
31 for (j
=0; j
<32; j
+=4){
32 temp_mat
[j
]+=element
*B
[column
+j
]+element2
*B
[column2
+j
]+element3
*B
[column3
+j
]+element4
*B
[column4
+j
];
33 temp_mat
[j
+1]+=element
*B
[column
+j
+1]+element2
*B
[column2
+j
+1]+element3
*B
[column3
+j
+1]+element4
*B
[column4
+j
+1];
34 temp_mat
[j
+2]+=element
*B
[column
+j
+2]+element2
*B
[column2
+j
+2]+element3
*B
[column3
+j
+2]+element4
*B
[column4
+j
+2];
35 temp_mat
[j
+3]+=element
*B
[column
+j
+3]+element2
*B
[column2
+j
+3]+element3
*B
[column3
+j
+3]+element4
*B
[column4
+j
+3];
36 temp_mat2
[j
]+=element5
*B
[column
+j
]+element6
*B
[column2
+j
]+element7
*B
[column3
+j
]+element8
*B
[column4
+j
];
37 temp_mat2
[j
+1]+=element5
*B
[column
+j
+1]+element6
*B
[column2
+j
+1]+element7
*B
[column3
+j
+1]+element8
*B
[column4
+j
+1];
38 temp_mat2
[j
+2]+=element5
*B
[column
+j
+2]+element6
*B
[column2
+j
+2]+element7
*B
[column3
+j
+2]+element8
*B
[column4
+j
+2];
39 temp_mat2
[j
+3]+=element5
*B
[column
+j
+3]+element6
*B
[column2
+j
+3]+element7
*B
[column3
+j
+3]+element8
*B
[column4
+j
+3];
44 C[row2+k]=temp_mat2[k];
52 C
[row2
+k
]=temp_mat2
[k
];
58 // ***************************** //
59 // **** ADD YOUR CODE HERE ***** //
60 // ***************************** //
62 // feel free to make a separate function for MI and MSI versions.