6 void __attribute__((noinline
)) matmul(const int coreid
, const int ncores
, const int lda
, const data_t A
[], const data_t B
[], data_t C
[] )
9 // ***************************** //
10 // **** ADD YOUR CODE HERE ***** //
11 // ***************************** //
13 // feel free to make a separate function for MI and MSI versions.
34 static data_t BB
[1024];
39 for ( k
= 0; k
< lda
; k
++) {
40 for ( i
= coreid
*(lda
/ncores
); i
< (coreid
+1)*(lda
/ncores
); i
++ ) {
41 BB
[i
*lda
+ k
] = B
[k
*lda
+ i
];
46 for ( i
= 0; i
< lda
; i
+=4 ) {
47 for ( j
= coreid
*(lda
/ncores
); j
< (coreid
+1)*(lda
/ncores
); j
++ ) {
48 c1
= 0; c2
= 0; c3
= 0; c4
= 0;
53 for ( k
= 0; k
< lda
; k
+=8 ) {