Remove Hwacha v3 tests
[riscv-tests.git] / mt / cf_matmul.c
1 #include "stdlib.h"
2
3 #include "util.h"
4
5 #include "dataset.h"
6 void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
7 {
8 if(coreid > 1) return;
9 int i,j,k,l;
10 data_t element1, element2, element3, element4, element5, element6, element7, element8;
11 int row, row2;
12 int column1, column2, column3, column4, column5, column6, column7, column8;
13 data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
14 data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
15 if (coreid == 0){
16 for (i=0; i<32; i+=2){
17 row = i*32;
18 row2 = (i+1)*32;
19 for (j=0; j<16; j+=4){
20 element1 = A[row+j];
21 element2 = A[row+j+1];
22 element3 = A[row+j+2];
23 element4 = A[row+j+3];
24 column1 = j*32;
25 column2 = (j+1)*32;
26 column3 = (j+2)*32;
27 column4 = (j+3)*32;
28 element5 = A[row2+j];
29 element6 = A[row2+j+1];
30 element7 = A[row2+j+2];
31 element8 = A[row2+j+3];
32
33 for (k=0; k<32; k+=4){
34 temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
35 temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
36 temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
37 temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
38 temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
39 temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
40 temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
41 temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
42 }
43
44
45 }
46 for (l=0; l<32; l++){
47 C[row+l]+=temp[l];
48 C[row2+l]+=temp2[l];
49 temp[l]=0;
50 temp2[l]=0;
51 }
52
53 }
54 }
55 if(coreid == 1 || ncores == 1) {
56 for (i=0; i<32; i+=2){
57 row = (31-i)*32;
58 row2 = (31-i-1)*32;
59 for (j=16; j<32; j+=4){
60 element1 = A[row+j];
61 element2 = A[row+j+1];
62 element3 = A[row+j+2];
63 element4 = A[row+j+3];
64 element5 = A[row2+j];
65 element6 = A[row2+j+1];
66 element7 = A[row2+j+2];
67 element8 = A[row2+j+3];
68 column1 = j*32;
69 column2 = (j+1)*32;
70 column3 = (j+2)*32;
71 column4 = (j+3)*32;
72 for (k=0; k<32; k+=4){
73 temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
74 temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
75 temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
76 temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
77 temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
78 temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
79 temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
80 temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
81 }
82
83
84
85 }
86 for (l=0; l<32; l++){
87 C[row+l]+=temp[l];
88 C[row2+l]+=temp2[l];
89 temp[l]=0;
90 temp2[l]=0;
91 }
92 }
93 }
94 // ***************************** //
95 // **** ADD YOUR CODE HERE ***** //
96 // ***************************** //
97 //
98 // feel free to make a separate function for MI and MSI versions.
99
100 }