6 void __attribute__((noinline
)) matmul(const int coreid
, const int ncores
, const int lda
, const data_t A
[], const data_t B
[], data_t C
[] )
9 int row
,row2
, column
, column2
, column3
, column4
, column5
, column6
, column7
, column8
;
10 data_t element
, element2
, element3
, element4
, element5
, element6
, element7
, element8
;
11 data_t B1
, B2
, B3
, B4
;
12 data_t temp_mat
[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
13 data_t temp_mat2
[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
16 for (l
=coreid
*local_lda
/ncores
; l
<local_lda
*(1+coreid
)/ncores
; l
+=2){
21 for (i
=0; i
<local_lda
; i
+=4){
23 element2
= A
[row
+i
+1];
24 element3
= A
[row
+i
+2];
25 element4
= A
[row
+i
+3];
28 element6
= A
[row2
+i
+1];
29 element7
= A
[row2
+i
+2];
30 element8
= A
[row2
+i
+3];
33 column2
=(i
+1)*local_lda
;
34 column3
=(i
+2)*local_lda
;
35 column4
=(i
+3)*local_lda
;
42 for (j
=0; j
<lda
; j
+=4){
43 temp_mat
[j
]+=element
*B1
+element2
*B2
+element3
*B3
+element4
*B4
;
44 temp_mat
[j
+1]+=element
*B
[column
+j
+1]+element2
*B
[column2
+j
+1]+element3
*B
[column3
+j
+1]+element4
*B
[column4
+j
+1];
45 temp_mat
[j
+2]+=element
*B
[column
+j
+2]+element2
*B
[column2
+j
+2]+element3
*B
[column3
+j
+2]+element4
*B
[column4
+j
+2];
46 temp_mat
[j
+3]+=element
*B
[column
+j
+3]+element2
*B
[column2
+j
+3]+element3
*B
[column3
+j
+3]+element4
*B
[column4
+j
+3];
48 temp_mat2
[j
]+=element5
*B1
+element6
*B2
+element7
*B3
+element8
*B4
;
49 temp_mat2
[j
+1]+=element5
*B
[column
+j
+1]+element6
*B
[column2
+j
+1]+element7
*B
[column3
+j
+1]+element8
*B
[column4
+j
+1];
50 temp_mat2
[j
+2]+=element5
*B
[column
+j
+2]+element6
*B
[column2
+j
+2]+element7
*B
[column3
+j
+2]+element8
*B
[column4
+j
+2];
51 temp_mat2
[j
+3]+=element5
*B
[column
+j
+3]+element6
*B
[column2
+j
+3]+element7
*B
[column3
+j
+3]+element8
*B
[column4
+j
+3];
59 //element = A[row+i+4];
60 //element5 = A[row2+i+4];
63 for(k
=0; k
<local_lda
; k
++){
66 C
[row2
+k
]=temp_mat2
[k
];
73 // ***************************** //
74 // **** ADD YOUR CODE HERE ***** //
75 // ***************************** //
77 // feel free to make a separate function for MI and MSI versions.