6 void __attribute__((noinline
)) matmul(const int coreid
, const int ncores
, const int lda
, const data_t A
[], const data_t B
[], data_t C
[] )
11 int j_start
= coreid
*(32/ncores
);
12 int j_end
= (coreid
+1)*(32/ncores
);
13 for ( i
= 0; i
< 32; i
++ ) {
15 for ( j
= j_start
; j
< j_end
; j
++ )
19 for ( k
= 0; k
< 32; k
++ )
21 acc_temp
+= *(A_j
+ k
) * *(B_i
+ k
*32);
23 C
[i
+ j
*32] = acc_temp
;