6 void __attribute__((noinline
)) matmul(const int coreid
, const int ncores
, const int lda
, const data_t A
[], const data_t B
[], data_t C
[] )
9 // ***************************** //
10 // **** ADD YOUR CODE HERE ***** //
11 // ***************************** //
13 // feel free to make a separate function for MI and MSI versions.
15 int i
, j
, k
, B_t
[32*32], x
, y
;
17 // int ii = 0, done = 0;
18 //for(x = coreid*(lda/ncores); x < (coreid+1)*(lda/ncores) && x < lda; x++) {
19 for (x
= 0; x
< lda
; x
++) {
20 for(y
= 0; y
< lda
; y
++) {
21 B_t
[y
*lda
+ x
] = B
[x
*lda
+ y
];
24 // for ( ii = lda/4 ; ii < lda ; ii += lda/4)
26 // for ( i = coreid*(ii/ncores); i < (coreid+1)*(ii/ncores) && i < ii; i++ )
27 for ( i
= coreid
*(lda
/ncores
); i
< (coreid
+1)*(lda
/ncores
) && i
< lda
; i
++ )
30 for ( j
= 0; j
< lda
; j
++ )
34 for ( k
= 0; k
< lda
; k
++ )
36 C
[CLoc
] += A
[ALoc
+ k
] * B_t
[BLoc
+ k
];