More RV32 tests
[riscv-tests.git] / mt / ai_matmul.c
1 #include "stdlib.h"
2
3 #include "util.h"
4
5 #include "dataset.h"
6 #include "util.h"
7 void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
8 {
9
10 // ***************************** //
11 // **** ADD YOUR CODE HERE ***** //
12 // ***************************** //
13 //
14 // feel free to make a separate function for MI and MSI versions.
15
16 //----------MSI--------------
17 ///*
18 int i,j,k;
19 barrier(ncores);
20 for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
21 for(i = 0; i < lda; i+=4) {
22 data_t Cval0 = 0;
23 data_t Cval1 = 0;
24 data_t Cval2 = 0;
25 data_t Cval3 = 0;
26 for(k = 0; k < lda; k++) {
27 Cval0 += A[j*lda+k]*B[k*lda+i];
28 Cval1 += A[j*lda+k]*B[k*lda+i+1];
29 Cval2 += A[j*lda+k]*B[k*lda+i+2];
30 Cval3 += A[j*lda+k]*B[k*lda+i+3];
31 }
32 C[j*lda+i] = Cval0;
33 C[j*lda+i+1] = Cval1;
34 C[j*lda+i+2] = Cval2;
35 C[j*lda+i+3] = Cval3;
36 }
37 }
38 //*/
39
40 //------------------MI-------------------
41 /*
42 int i,j,k;
43 barrier(nc);
44 for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
45 for(i = 0; i < lda; i+=4) {
46 data_t Cval0 = 0;
47 data_t Cval1 = 0;
48 data_t Cval2 = 0;
49 data_t Cval3 = 0;
50 if(coreid == 0) {
51 for(k = 0; k < lda; k++) {
52 Cval0 += A[j*lda+k]*B[k*lda+i];
53 Cval1 += A[j*lda+k]*B[k*lda+i+1];
54 Cval2 += A[j*lda+k]*B[k*lda+i+2];
55 Cval3 += A[j*lda+k]*B[k*lda+i+3];
56 }
57 } else {
58 for(k = lda-1; k >= 0; k--) {
59 Cval0 += A[j*lda+k]*B[k*lda+i];
60 Cval1 += A[j*lda+k]*B[k*lda+i+1];
61 Cval2 += A[j*lda+k]*B[k*lda+i+2];
62 Cval3 += A[j*lda+k]*B[k*lda+i+3];
63 }
64 }
65 C[j*lda+i] = Cval0;
66 C[j*lda+i+1] = Cval1;
67 C[j*lda+i+2] = Cval2;
68 C[j*lda+i+3] = Cval3;
69 }
70 }
71 */
72 }