Fix capitalization of XLEN variable
[riscv-tests.git] / mt / ag_matmul.c
1 #include "stdlib.h"
2
3 #include "util.h"
4
5 #include "dataset.h"
6 #include "util.h"
7 void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
8 {
9 int i, j, k;
10
11 for ( i = 0; i < lda; i+=2 )
12 {
13 for (k = 0; k < lda; k+=4)
14 {
15 int d0 = B[k*lda + i];
16 int c0 = B[k*lda + i + 1];
17 int d1 = B[(k+1)*lda + i];
18 int c1 = B[(k+1)*lda + i + 1];
19 int d2 = B[(k+2)*lda + i];
20 int c2 = B[(k+2)*lda + i + 1];
21 int d3 = B[(k+3)*lda + i];
22 int c3 = B[(k+3)*lda + i + 1];
23
24 for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j+=4)
25 {
26
27 int sum = A[j*lda + k] * d0;
28 sum += A[j*lda + k + 1] * d1;
29 sum += A[j*lda + k + 2] * d2;
30 sum += A[j*lda + k + 3] * d3;
31 C[j*lda +i] += sum;
32
33 sum = A[j*lda + k] * c0;
34 sum += A[j*lda + k + 1] * c1;
35 sum += A[j*lda + k + 2] * c2;
36 sum += A[j*lda + k + 3] * c3;
37 C[j*lda + i + 1] += sum;
38
39 sum = A[(j+1)*lda + k] * d0;
40 sum += A[(j+1)*lda + k + 1] * d1;
41 sum += A[(j+1)*lda + k + 2] * d2;
42 sum += A[(j+1)*lda + k + 3] * d3;
43 C[(j+1)*lda +i] += sum;
44
45 sum = A[(j+1)*lda + k] * c0;
46 sum += A[(j+1)*lda + k + 1] * c1;
47 sum += A[(j+1)*lda + k + 2] * c2;
48 sum += A[(j+1)*lda + k + 3] * c3;
49 C[(j+1)*lda + i + 1] += sum;
50
51 sum = A[(j+2)*lda + k] * d0;
52 sum += A[(j+2)*lda + k + 1] * d1;
53 sum += A[(j+2)*lda + k + 2] * d2;
54 sum += A[(j+2)*lda + k + 3] * d3;
55 C[(j+2)*lda +i] += sum;
56
57 sum = A[(j+2)*lda + k] * c0;
58 sum += A[(j+2)*lda + k + 1] * c1;
59 sum += A[(j+2)*lda + k + 2] * c2;
60 sum += A[(j+2)*lda + k + 3] * c3;
61 C[(j+2)*lda + i + 1] += sum;
62
63 sum = A[(j+3)*lda + k] * d0;
64 sum += A[(j+3)*lda + k + 1] * d1;
65 sum += A[(j+3)*lda + k + 2] * d2;
66 sum += A[(j+3)*lda + k + 3] * d3;
67 C[(j+3)*lda +i] += sum;
68
69 sum = A[(j+3)*lda + k] * c0;
70 sum += A[(j+3)*lda + k + 1] * c1;
71 sum += A[(j+3)*lda + k + 2] * c2;
72 sum += A[(j+3)*lda + k + 3] * c3;
73 C[(j+3)*lda + i + 1] += sum;
74
75 }
76 barrier(ncores);
77 }
78 }
79 }