1f4e28b30b200adcef80862514af2426bcba1d9a
1 // See LICENSE for license details.
9 void thread_entry(int cid
, int nc
)
13 uint64_t s
= 0xdeadbeefU
;
29 for (size_t i
= 0; i
< m
; i
++)
30 for (size_t j
= 0; j
< p
; j
++)
31 a
[i
*p
+j
] = (t
)(s
= lfsr(s
));
32 for (size_t i
= 0; i
< p
; i
++)
33 for (size_t j
= 0; j
< n
; j
++)
34 b
[i
*n
+j
] = (t
)(s
= lfsr(s
));
35 memset(c
, 0, m
*n
*sizeof(c
[0]));
37 size_t instret
, cycles
;
39 for (int i
= 0; i
< R
; i
++)
41 instret
= -rdinstret();
43 mm_rb_hwacha(m
, n
, p
, a
, p
, b
, n
, c
, n
);
44 instret
+= rdinstret();
48 for (int i
= 0; i
< R
; i
++)
50 instret
= -rdinstret();
52 mm(m
, n
, p
, a
, p
, b
, n
, c
, n
);
53 instret
+= rdinstret();
58 asm volatile("fence");
60 printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
61 cid
, RBM
, RBN
, RBK
, CBM
, CBN
, CBK
);
62 printf("C%d: %d instructions\n", cid
, (int)(instret
));
63 printf("C%d: %d cycles\n", cid
, (int)(cycles
));
64 printf("C%d: %d flops\n", cid
, 2*m
*n
*p
);
65 printf("C%d: %d Mflops @ 1 GHz\n", cid
, 2000*m
*n
*p
/(cycles
));
68 for (size_t i
= 0; i
< m
; i
++)
70 for (size_t j
= 0; j
< n
; j
++)
73 for (size_t k
= 0; k
< p
; k
++)
74 s
+= a
[i
*p
+k
] * b
[k
*n
+j
];
76 if (fabs(c
[i
*n
+j
]-s
) > fabs(1e-6*s
))
78 printf("C%d: c[%lu][%lu] %f != %f\n", cid
, i
, j
, c
[i
*n
+j
], s
);