minor mt updates
[riscv-tests.git] / benchmarks / mm / mm_main.c
1 #include "common.h"
2 #include <assert.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include "util.h"
6
7 void thread_entry(int cid, int nc)
8 {
9 const int R = 8;
10 int m, n, p;
11 uint64_t s = 0xdeadbeefU;
12
13 if (have_vec) {
14 m = HCBM;
15 n = HCBN;
16 p = HCBK;
17 } else {
18 m = CBM;
19 n = CBN;
20 p = CBK;
21 }
22
23 t a[m*p];
24 t b[p*n];
25 t c[m*n];
26
27 for (size_t i = 0; i < m; i++)
28 for (size_t j = 0; j < p; j++)
29 a[i*p+j] = (t)(s = lfsr(s));
30 for (size_t i = 0; i < p; i++)
31 for (size_t j = 0; j < n; j++)
32 b[i*n+j] = (t)(s = lfsr(s));
33 memset(c, 0, m*n*sizeof(c[0]));
34
35 size_t instret, cycles;
36 if (have_vec) {
37 for (int i = 0; i < R; i++)
38 {
39 instret = -rdinstret();
40 cycles = -rdcycle();
41 mm_rb_hwacha(m, n, p, a, p, b, n, c, n);
42 instret += rdinstret();
43 cycles += rdcycle();
44 }
45 } else {
46 for (int i = 0; i < R; i++)
47 {
48 instret = -rdinstret();
49 cycles = -rdcycle();
50 mm(m, n, p, a, p, b, n, c, n);
51 instret += rdinstret();
52 cycles += rdcycle();
53 }
54 }
55
56 printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
57 cid, RBM, RBN, RBK, CBM, CBN, CBK);
58 printf("C%d: %d instructions\n", cid, (int)(instret));
59 printf("C%d: %d cycles\n", cid, (int)(cycles));
60 printf("C%d: %d flops\n", cid, 2*m*n*p);
61 printf("C%d: %d Mflops @ 1 GHz\n", cid, 2000*m*n*p/(cycles));
62
63 #if 1
64 for (size_t i = 0; i < m; i++)
65 {
66 for (size_t j = 0; j < n; j++)
67 {
68 t s = 0;
69 for (size_t k = 0; k < p; k++)
70 s += a[i*p+k] * b[k*n+j];
71 s *= R;
72 if (fabs(c[i*n+j]-s) > fabs(1e-6*s))
73 {
74 printf("C%d: c[%lu][%lu] %f != %f\n", cid, i, j, c[i*n+j], s);
75 exit(1);
76 }
77 }
78 }
79 #endif
80
81 barrier(nc);
82 exit(0);
83 }