7 void thread_entry(int cid
, int nc
)
26 for (size_t i
= 0; i
< m
; i
++)
27 for (size_t j
= 0; j
< p
; j
++)
29 for (size_t i
= 0; i
< p
; i
++)
30 for (size_t j
= 0; j
< n
; j
++)
32 memset(c
, 0, m
*n
*sizeof(c
[0]));
34 size_t instret
, cycles
;
36 for (int i
= 0; i
< R
; i
++)
38 instret
= -rdinstret();
40 mm_rb_hwacha(m
, n
, p
, a
, p
, b
, n
, c
, n
);
41 instret
+= rdinstret();
45 for (int i
= 0; i
< R
; i
++)
47 instret
= -rdinstret();
49 mm(m
, n
, p
, a
, p
, b
, n
, c
, n
);
50 instret
+= rdinstret();
55 printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
56 cid
, RBM
, RBN
, RBK
, CBM
, CBN
, CBK
);
57 printf("C%d: %d instructions\n", cid
, (int)(instret
));
58 printf("C%d: %d cycles\n", cid
, (int)(cycles
));
59 printf("C%d: %d flops\n", cid
, 2*m
*n
*p
);
60 printf("C%d: %d Mflops @ 1 GHz\n", cid
, 2000*m
*n
*p
/(cycles
));
63 for (size_t i
= 0; i
< m
; i
++)
65 for (size_t j
= 0; j
< n
; j
++)
68 for (size_t aik
= i
, bkj
= -j
; aik
< i
+p
; aik
++, bkj
++)
70 if (fabs(c
[i
*n
+j
]-s
*R
) > 1e-6*s
)
72 printf("C%d: c[%lu][%lu] %f != %f\n", cid
, i
, j
, c
[i
*n
+j
], s
);