1 // See LICENSE for license details.
9 #pragma GCC optimize ("unroll-loops")
11 void thread_entry(int cid
, int nc
)
15 uint64_t s
= 0xdeadbeefU
;
25 for (size_t i
= 0; i
< m
; i
++)
26 for (size_t j
= 0; j
< p
; j
++)
27 a
[i
*p
+j
] = (t
)(s
= lfsr(s
));
28 for (size_t i
= 0; i
< p
; i
++)
29 for (size_t j
= 0; j
< n
; j
++)
30 b
[i
*n
+j
] = (t
)(s
= lfsr(s
));
31 memset(c
, 0, m
*n
*sizeof(c
[0]));
33 size_t instret
, cycles
;
34 for (int i
= 0; i
< R
; i
++)
36 instret
= -read_csr(minstret
);
37 cycles
= -read_csr(mcycle
);
38 mm(m
, n
, p
, a
, p
, b
, n
, c
, n
);
39 instret
+= read_csr(minstret
);
40 cycles
+= read_csr(mcycle
);
43 asm volatile("fence");
45 printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
46 cid
, RBM
, RBN
, RBK
, CBM
, CBN
, CBK
);
47 printf("C%d: %d instructions\n", cid
, (int)(instret
));
48 printf("C%d: %d cycles\n", cid
, (int)(cycles
));
49 printf("C%d: %d flops\n", cid
, 2*m
*n
*p
);
50 printf("C%d: %d Mflops @ 1 GHz\n", cid
, 2000*m
*n
*p
/(cycles
));
53 for (size_t i
= 0; i
< m
; i
++)
55 for (size_t j
= 0; j
< n
; j
++)
58 for (size_t k
= 0; k
< p
; k
++)
59 s
+= a
[i
*p
+k
] * b
[k
*n
+j
];
61 if (fabs(c
[i
*n
+j
]-s
) > fabs(1e-6*s
))
63 printf("C%d: c[%lu][%lu] %f != %f\n", cid
, i
, j
, c
[i
*n
+j
], s
);