Add --gdb argument so I can run valgrind on gdb.
[riscv-tests.git] / mt / af_matmul.c
1 #include "stdlib.h"
2
3 #include "util.h"
4
5 #include "dataset.h"
6 void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
7 {
8 size_t i, j, k, l;
9 int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
10 data_t element, element2, element3, element4, element5, element6, element7, element8;
11 data_t B1, B2, B3, B4;
12 data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
13 data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
14 int local_lda = lda;
15
16 for (l=coreid*local_lda/ncores; l<local_lda*(1+coreid)/ncores; l+=2){
17 row=l*32;
18 row2=(l+1)*32;
19 //element = A[row];
20 //element5 = A[row2];
21 for (i=0; i<local_lda; i+=4){
22 element = A[row+i];
23 element2 = A[row+i+1];
24 element3 = A[row+i+2];
25 element4 = A[row+i+3];
26
27 element5 = A[row2+i];
28 element6 = A[row2+i+1];
29 element7 = A[row2+i+2];
30 element8 = A[row2+i+3];
31
32 column=i*local_lda;
33 column2=(i+1)*local_lda;
34 column3=(i+2)*local_lda;
35 column4=(i+3)*local_lda;
36
37 B1 = B[column];
38 B2 = B[column2];
39 B3 = B[column3];
40 B4 = B[column4];
41
42 for (j=0; j<lda; j+=4){
43 temp_mat[j]+=element*B1+element2*B2+element3*B3+element4*B4;
44 temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
45 temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
46 temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
47
48 temp_mat2[j]+=element5*B1+element6*B2+element7*B3+element8*B4;
49 temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
50 temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
51 temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
52
53 B1 = B[column+j+4];
54 B2 = B[column2+j+4];
55 B3 = B[column3+j+4];
56 B4 = B[column4+j+4];
57
58 }
59 //element = A[row+i+4];
60 //element5 = A[row2+i+4];
61 }
62
63 for(k=0; k<local_lda; k++){
64 C[row+k]=temp_mat[k];
65 temp_mat[k]=0;
66 C[row2+k]=temp_mat2[k];
67 temp_mat2[k]=0;
68
69 }
70
71
72 }
73 // ***************************** //
74 // **** ADD YOUR CODE HERE ***** //
75 // ***************************** //
76 //
77 // feel free to make a separate function for MI and MSI versions.
78
79 }