6 void __attribute__((noinline
)) matmul(const int coreid
, const int ncores
, const int lda
, const data_t A
[], const data_t B
[], data_t C
[] )
9 // ***************************** //
10 // **** ADD YOUR CODE HERE ***** //
11 // ***************************** //
13 // feel free to make a separate function for MI and MSI versions.
14 int i
, j
, k
, ii
, jj
, kk
;
15 int block
= lda
/ ncores
;
16 int leftover
= lda
% ncores
;
17 int start
= block
* coreid
;
21 for ( j
= start
; j
< (start
+block
); j
++ )
22 for ( k
= 0; k
< lda
; k
++ )
24 for ( i
= 0; i
< lda
; i
++ )
26 C
[i
+ j
*lda
] += A
[j
*lda
+ k
] * B
[k
*lda
+ i
];