7 void __attribute__((noinline
)) matmul(const int coreid
, const int ncores
, const int lda
, const data_t A
[], const data_t B
[], data_t C
[] )
10 // ***************************** //
11 // **** ADD YOUR CODE HERE ***** //
12 // ***************************** //
14 // feel free to make a separate function for MI and MSI versions.
16 //----------MSI--------------
20 for(j
= coreid
*lda
/ncores
; j
< coreid
*lda
/ncores
+ lda
/ncores
; j
++) {
21 for(i
= 0; i
< lda
; i
+=4) {
26 for(k
= 0; k
< lda
; k
++) {
27 Cval0
+= A
[j
*lda
+k
]*B
[k
*lda
+i
];
28 Cval1
+= A
[j
*lda
+k
]*B
[k
*lda
+i
+1];
29 Cval2
+= A
[j
*lda
+k
]*B
[k
*lda
+i
+2];
30 Cval3
+= A
[j
*lda
+k
]*B
[k
*lda
+i
+3];
40 //------------------MI-------------------
44 for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
45 for(i = 0; i < lda; i+=4) {
51 for(k = 0; k < lda; k++) {
52 Cval0 += A[j*lda+k]*B[k*lda+i];
53 Cval1 += A[j*lda+k]*B[k*lda+i+1];
54 Cval2 += A[j*lda+k]*B[k*lda+i+2];
55 Cval3 += A[j*lda+k]*B[k*lda+i+3];
58 for(k = lda-1; k >= 0; k--) {
59 Cval0 += A[j*lda+k]*B[k*lda+i];
60 Cval1 += A[j*lda+k]*B[k*lda+i+1];
61 Cval2 += A[j*lda+k]*B[k*lda+i+2];
62 Cval3 += A[j*lda+k]*B[k*lda+i+3];