--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k;
+ data_t acc_temp;
+ data_t *A_j, *B_i;
+ int j_start = coreid*(32/ncores);
+ int j_end = (coreid+1)*(32/ncores);
+ for ( i = 0; i < 32; i++ ) {
+ B_i = B + i;
+ for ( j = j_start; j < j_end; j++ )
+ {
+ acc_temp = 0;
+ A_j = A + j*32;
+ for ( k = 0; k < 32; k++ )
+ {
+ acc_temp += *(A_j + k) * *(B_i + k*32);
+ }
+ C[i + j*32] = acc_temp;
+ }
+ }
+}