6 void __attribute__((noinline
)) matmul(const int coreid
, const int ncores
, const int lda
, const data_t A
[], const data_t B
[], data_t C
[] )
9 // ***************************** //
10 // **** ADD YOUR CODE HERE ***** //
11 // ***************************** //
13 // feel free to make a separate function for MI and MSI versions.
15 //----------MSI--------------
19 for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
20 for(i = 0; i < lda; i+=4) {
25 for(k = 0; k < lda; k++) {
26 Cval0 += A[j*lda+k]*B[k*lda+i];
27 Cval1 += A[j*lda+k]*B[k*lda+i+1];
28 Cval2 += A[j*lda+k]*B[k*lda+i+2];
29 Cval3 += A[j*lda+k]*B[k*lda+i+3];
39 //------------------MI-------------------
43 for(j
= coreid
*lda
/ncores
; j
< coreid
*lda
/ncores
+ lda
/ncores
; j
++) {
44 for(i
= 0; i
< lda
; i
+=4) {
50 for(k
= 0; k
< lda
; k
++) {
51 Cval0
+= A
[j
*lda
+k
]*B
[k
*lda
+i
];
52 Cval1
+= A
[j
*lda
+k
]*B
[k
*lda
+i
+1];
53 Cval2
+= A
[j
*lda
+k
]*B
[k
*lda
+i
+2];
54 Cval3
+= A
[j
*lda
+k
]*B
[k
*lda
+i
+3];
57 for(k
= lda
-1; k
>= 0; k
--) {
58 Cval0
+= A
[j
*lda
+k
]*B
[k
*lda
+i
];
59 Cval1
+= A
[j
*lda
+k
]*B
[k
*lda
+i
+1];
60 Cval2
+= A
[j
*lda
+k
]*B
[k
*lda
+i
+2];
61 Cval3
+= A
[j
*lda
+k
]*B
[k
*lda
+i
+3];