Add a top-level make clean target.
[riscv-tests.git] / benchmarks / vec-matmul / vec_matmul_main.c
1 // See LICENSE for license details.
2
3 //**************************************************************************
4 // Vector-Thread Vector Matrix Multiply benchmark
5 //--------------------------------------------------------------------------
6 //
7 // This benchmark multiplies two 2-D arrays together and writes the results to
8 // a third vector. The input data (and reference data) should be generated
9 // using the matmul_gendata.pl perl script and dumped to a file named
10 // dataset.h.
11
12 #include "util.h"
13
14 // Choose which implementation you wish to test... but leave only one on!
15 // (only the first one will be executed).
16 //#define SCALAR_C
17 //#define SCALAR_ASM
18 #define VT_ASM
19
20 //--------------------------------------------------------------------------
21 // Input/Reference Data
22
23 //#include "dataset_test.h"
24 #include "dataset.h"
25
26 //--------------------------------------------------------------------------
27 // Helper functions
28
29 int verify( int n, float test[], float correct[] )
30 {
31 int i;
32 for ( i = 0; i < n; i++ ) {
33 if ( test[i] > 1.02*correct[i]
34 || test[i] < 0.98*correct[i]) {
35 #if HOST_DEBUG
36 printf(" test[%d] : %3.2f\n", i, test[i]);
37 printf(" corr[%d] : %3.2f\n", i, correct[i]);
38 #endif
39 // tell us which index fails + 2
40 // (so that if i==0,i==1 fails, we don't
41 // think it was a 'not-finished yet' or pass)
42 return i+10;
43 }
44 }
45 return 1;
46 }
47
48 void finishTest( int correct, long long num_cycles, long long num_retired )
49 {
50 int toHostValue = correct;
51 #if HOST_DEBUG
52 if ( toHostValue == 1 )
53 printf( "*** PASSED ***\n" );
54 else
55 printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
56 exit(0);
57 #else
58 // we no longer run in -testrun mode, which means we can't use
59 // the tohost register to communicate "test is done" and "test results"
60 // so instead we will communicate through print* functions!
61 if ( correct == 1 )
62 {
63 printstr( "*** PASSED *** (num_cycles = 0x" );
64 printhex(num_cycles);
65 printstr( ", num_inst_retired = 0x");
66 printhex(num_retired);
67 printstr( ")\n" );
68 }
69 else
70 {
71 printstr( "*** FAILED *** (num_cycles = 0x");
72 printhex(num_cycles);
73 printstr( ", num_inst_retired = 0x");
74 printhex(num_retired);
75 printstr( ")\n" );
76 }
77 exit();
78 #endif
79 }
80
81
82 // deprecated - cr10/stats-enable register no longer exists
83 void setStats( int enable )
84 {
85 #if ( !HOST_DEBUG && SET_STATS )
86 asm( "mtpcr %0, cr10" : : "r" (enable) );
87 #endif
88 }
89
90 long long getCycles()
91 {
92 long long cycles = 1337;
93 #if ( !HOST_DEBUG && SET_STATS )
94 __asm__ __volatile__( "rdcycle %0" : "=r" (cycles) );
95 #endif
96 return cycles;
97 }
98
99 long long getInstRetired()
100 {
101 long long inst_retired = 1338;
102 #if ( !HOST_DEBUG && SET_STATS )
103 __asm__ __volatile__( "rdinstret %0" : "=r" (inst_retired) );
104 #endif
105 return inst_retired;
106 }
107
108 //--------------------------------------------------------------------------
109 // matmul function
110
111 // scalar C implementation
112 void matmul(const int lda, const float A[], const float B[], float C[] )
113 {
114 int i, j, k;
115
116 for ( j = 0; j < lda; j++ )
117 for ( i = 0; i < lda; i++ )
118 {
119 float cij = C[i + j*lda];
120 for ( k = 0; k < lda; k++ )
121 {
122 cij += A[j*lda + k] * B[k*lda + i];
123 }
124 C[i + j*lda] = cij;
125 }
126 }
127
128
129 // assembly implementations can be found in *_asm.S
130
131 //--------------------------------------------------------------------------
132 // Main
133
134 int main( int argc, char* argv[] )
135 {
136 int i,j;
137 long long start_cycles = 0;
138 long long stop_cycles = 0;
139 long long num_cycles;
140 long long start_retired = 0;
141 long long stop_retired = 0;
142 long long num_retired;
143
144 float results_data[ARRAY_SIZE];
145 for ( i = 0; i < DIM_SIZE; i++ )
146 for ( j = 0; j < DIM_SIZE; j++ )
147 results_data[i + j*DIM_SIZE] = 0.0f;
148
149 // Output the input array
150
151 #if HOST_DEBUG
152 printArray( "input1", ARRAY_SIZE, input1_data );
153 printArray( "input2", ARRAY_SIZE, input2_data );
154 printArray( "verify", ARRAY_SIZE, verify_data );
155 printArray( "results", ARRAY_SIZE, results_data );
156 #endif
157
158 // --------------------------------------------------
159 // If needed we preallocate everything in the caches
160
161 #if PREALLOCATE
162
163
164
165 #endif
166
167 // --------------------------------------------------
168 // Do the matmul
169 start_cycles = getCycles();
170 start_retired = getInstRetired();
171
172 #ifdef SCALAR_C
173 matmul( DIM_SIZE, input1_data, input2_data, results_data );
174 #else
175 #ifdef SCALAR_ASM
176 #if HOST_DEBUG==0
177 scalar_matmul_asm( DIM_SIZE, input1_data, input2_data, results_data );
178 #endif
179 #else
180 #ifdef VT_ASM
181 #if HOST_DEBUG==0
182 vt_matmul_asm( DIM_SIZE, input1_data, input2_data, results_data );
183 #endif
184 #endif
185 #endif
186 #endif
187
188
189 stop_cycles = getCycles();
190 stop_retired = getInstRetired();
191 num_cycles = stop_cycles - start_cycles;
192 num_retired = stop_retired - start_retired;
193
194
195 // --------------------------------------------------
196 // Print out the results
197
198 #if HOST_DEBUG
199 printArray( "results", ARRAY_SIZE, results_data );
200 #endif
201
202
203 // --------------------------------------------------
204 // Check the results
205 int correct = verify( ARRAY_SIZE, results_data, verify_data );
206 finishTest(correct, num_cycles, num_retired);
207 }