Simplify test_function_call.
[riscv-tests.git] / mt / cm_matmul.c
1 #include "stdlib.h"
2
3 #include "util.h"
4
5 #include "dataset.h"
6 void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
7 {
8 int i,j,k,l;
9 data_t element1, element2, element3, element4, element5, element6, element7, element8;
10 int row, row2;
11 int column1, column2, column3, column4, column5, column6, column7, column8;
12 data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
13 data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
14 if (coreid == 0){
15 for (i=0; i<lda; i+=2){
16 row = i*lda;
17 row2 = (i+1)*lda;
18 for (j=0; j<16; j+=4){
19 element1 = A[row+j];
20 element2 = A[row+j+1];
21 element3 = A[row+j+2];
22 element4 = A[row+j+3];
23 column1 = j*32;
24 column2 = (j+1)*32;
25 column3 = (j+2)*32;
26 column4 = (j+3)*32;
27 element5 = A[row2+j];
28 element6 = A[row2+j+1];
29 element7 = A[row2+j+2];
30 element8 = A[row2+j+3];
31
32 for (k=0; k<32; k+=4){
33 temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
34 temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
35 temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
36 temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
37 temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
38 temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
39 temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
40 temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
41 }
42 if (j==12){
43 for (l=0; l<32; l++){
44 C[row+l]+=temp[l];
45 C[row2+l]+=temp2[l];
46 temp[l]=0;
47 temp2[l]=0;
48 }
49 }
50 }
51 }
52 }
53 if (coreid==1 || ncores == 1){
54 for (i=0; i<32; i+=2){
55 row = (31-i)*lda;
56 row2 = (31-i-1)*lda;
57 for (j=16; j<32; j+=4){
58 element1 = A[row+j];
59 element2 = A[row+j+1];
60 element3 = A[row+j+2];
61 element4 = A[row+j+3];
62 element5 = A[row2+j];
63 element6 = A[row2+j+1];
64 element7 = A[row2+j+2];
65 element8 = A[row2+j+3];
66 column1 = j*32;
67 column2 = (j+1)*32;
68 column3 = (j+2)*32;
69 column4 = (j+3)*32;
70 for (k=0; k<32; k+=4){
71 temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
72 temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
73 temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
74 temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
75 temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
76 temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
77 temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
78 temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
79 }
80 if (j==28){
81 for (l=0; l<32; l++){
82 C[row+l]+=temp[l];
83 C[row2+l]+=temp2[l];
84 temp[l]=0;
85 temp2[l]=0;
86 }
87 }
88 }
89 }
90 }
91 }