6 void __attribute__((noinline
)) matmul(const int coreid
, const int ncores
, const int lda
, const data_t A
[], const data_t B
[], data_t C
[] )
9 // ***************************** //
10 // **** ADD YOUR CODE HERE ***** //
11 // ***************************** //
13 // feel free to make a separate function for MI and MSI versions.
15 for (int i
= coreid
; i
< lda
; i
+=ncores
*2)
17 for (int j
= 0; j
< lda
; j
++)
19 for (int k
= 0; k
< lda
; k
++)
21 int A12
= A
[j
*lda
+ k
];
22 int B1
= B
[k
*lda
+ i
];
23 int B2
= B
[k
*lda
+ i
+ ncores
];
24 C
[i
+j
*lda
] += A12
* B1
;
25 C
[i
+ncores
+j
*lda
] += A12
* B2
;
26 //C[i+j*lda] += A[j*lda +k] * B[k*lda +i];