add benchmarks gitignore
[riscv-tests.git] / benchmarks / vec-matmul / vec_matmul_main.c
1 //**************************************************************************
2 // Vector-Thread Vector Matrix Multiply benchmark
3 //--------------------------------------------------------------------------
4 //
5 // This benchmark multiplies two 2-D arrays together and writes the results to
6 // a third vector. The input data (and reference data) should be generated
7 // using the matmul_gendata.pl perl script and dumped to a file named
8 // dataset.h. The riscv-gcc toolchain does not support system calls so printf's
9 // can only be used on a host system, not on the riscv-v processor simulator
10 // itself.
11 //
12 // HOWEVER: printstr() and printhex() are provided, for a primitive form of
13 // printing strings and hexadecimal values to stdout.
14
15
16 // Choose which implementation you wish to test... but leave only one on!
17 // (only the first one will be executed).
18 //#define SCALAR_C
19 //#define SCALAR_ASM
20 #define VT_ASM
21
22 //--------------------------------------------------------------------------
23 // Macros
24
25 // Set HOST_DEBUG to 1 if you are going to compile this for a host
26 // machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
27 // to 0 if you are compiling with the smips-gcc toolchain.
28
29 #ifndef HOST_DEBUG
30 #define HOST_DEBUG 0
31 #endif
32
33 // Set PREALLOCATE to 1 if you want to preallocate the benchmark
34 // function before starting stats. If you have instruction/data
35 // caches and you don't want to count the overhead of misses, then
36 // you will need to use preallocation.
37
38 #ifndef PREALLOCATE
39 #define PREALLOCATE 0
40 #endif
41
42 // Set SET_STATS to 1 if you want to carve out the piece that actually
43 // does the computation.
44
45 #ifndef SET_STATS
46 #define SET_STATS 0
47 #endif
48
49 //--------------------------------------------------------------------------
50 // Host Platform Includes
51
52 #if HOST_DEBUG
53 #include <stdio.h>
54 #include <stdlib.h>
55 #else
56 void printstr(const char*);
57 void exit();
58 #endif
59
60
61 //--------------------------------------------------------------------------
62 // Input/Reference Data
63
64 //#include "dataset_test.h"
65 #include "dataset.h"
66
67 //--------------------------------------------------------------------------
68 // Helper functions
69
70 int verify( int n, float test[], float correct[] )
71 {
72 int i;
73 for ( i = 0; i < n; i++ ) {
74 if ( test[i] > 1.02*correct[i]
75 || test[i] < 0.98*correct[i]) {
76 #if HOST_DEBUG
77 printf(" test[%d] : %3.2f\n", i, test[i]);
78 printf(" corr[%d] : %3.2f\n", i, correct[i]);
79 #endif
80 // tell us which index fails + 2
81 // (so that if i==0,i==1 fails, we don't
82 // think it was a 'not-finished yet' or pass)
83 return i+10;
84 }
85 }
86 return 1;
87 }
88
89 #if HOST_DEBUG
90 void printArray( char name[], int n, float arr[] )
91 {
92 int i;
93 printf( " %10s :", name );
94 for ( i = 0; i < n; i++ )
95 printf( " %03.2f ", arr[i] );
96 printf( "\n" );
97 }
98 #endif
99
100
101 void finishTest( int correct, long long num_cycles, long long num_retired )
102 {
103 int toHostValue = correct;
104 #if HOST_DEBUG
105 if ( toHostValue == 1 )
106 printf( "*** PASSED ***\n" );
107 else
108 printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
109 exit(0);
110 #else
111 // we no longer run in -testrun mode, which means we can't use
112 // the tohost register to communicate "test is done" and "test results"
113 // so instead we will communicate through print* functions!
114 if ( correct == 1 )
115 {
116 printstr( "*** PASSED *** (num_cycles = 0x" );
117 printhex(num_cycles);
118 printstr( ", num_inst_retired = 0x");
119 printhex(num_retired);
120 printstr( ")\n" );
121 }
122 else
123 {
124 printstr( "*** FAILED *** (num_cycles = 0x");
125 printhex(num_cycles);
126 printstr( ", num_inst_retired = 0x");
127 printhex(num_retired);
128 printstr( ")\n" );
129 }
130 exit();
131 #endif
132 }
133
134
135 // deprecated - cr10/stats-enable register no longer exists
136 void setStats( int enable )
137 {
138 #if ( !HOST_DEBUG && SET_STATS )
139 asm( "mtpcr %0, cr10" : : "r" (enable) );
140 #endif
141 }
142
143 long long getCycles()
144 {
145 long long cycles = 1337;
146 #if ( !HOST_DEBUG && SET_STATS )
147 __asm__ __volatile__( "rdcycle %0" : "=r" (cycles) );
148 #endif
149 return cycles;
150 }
151
152 long long getInstRetired()
153 {
154 long long inst_retired = 1338;
155 #if ( !HOST_DEBUG && SET_STATS )
156 __asm__ __volatile__( "rdinstret %0" : "=r" (inst_retired) );
157 #endif
158 return inst_retired;
159 }
160
161 //--------------------------------------------------------------------------
162 // matmul function
163
164 // scalar C implementation
165 void matmul(const int lda, const float A[], const float B[], float C[] )
166 {
167 int i, j, k;
168
169 for ( j = 0; j < lda; j++ )
170 for ( i = 0; i < lda; i++ )
171 {
172 float cij = C[i + j*lda];
173 for ( k = 0; k < lda; k++ )
174 {
175 cij += A[j*lda + k] * B[k*lda + i];
176 }
177 C[i + j*lda] = cij;
178 }
179 }
180
181
182 // assembly implementations can be found in *_asm.S
183
184 //--------------------------------------------------------------------------
185 // Main
186
187 int main( int argc, char* argv[] )
188 {
189 int i,j;
190 long long start_cycles = 0;
191 long long stop_cycles = 0;
192 long long num_cycles;
193 long long start_retired = 0;
194 long long stop_retired = 0;
195 long long num_retired;
196
197 float results_data[ARRAY_SIZE];
198 for ( i = 0; i < DIM_SIZE; i++ )
199 for ( j = 0; j < DIM_SIZE; j++ )
200 results_data[i + j*DIM_SIZE] = 0.0f;
201
202 // Output the input array
203
204 #if HOST_DEBUG
205 printArray( "input1", ARRAY_SIZE, input1_data );
206 printArray( "input2", ARRAY_SIZE, input2_data );
207 printArray( "verify", ARRAY_SIZE, verify_data );
208 printArray( "results", ARRAY_SIZE, results_data );
209 #endif
210
211 // --------------------------------------------------
212 // If needed we preallocate everything in the caches
213
214 #if PREALLOCATE
215
216
217
218 #endif
219
220 // --------------------------------------------------
221 // Do the matmul
222 start_cycles = getCycles();
223 start_retired = getInstRetired();
224
225 #ifdef SCALAR_C
226 matmul( DIM_SIZE, input1_data, input2_data, results_data );
227 #else
228 #ifdef SCALAR_ASM
229 #if HOST_DEBUG==0
230 scalar_matmul_asm( DIM_SIZE, input1_data, input2_data, results_data );
231 #endif
232 #else
233 #ifdef VT_ASM
234 #if HOST_DEBUG==0
235 vt_matmul_asm( DIM_SIZE, input1_data, input2_data, results_data );
236 #endif
237 #endif
238 #endif
239 #endif
240
241
242 stop_cycles = getCycles();
243 stop_retired = getInstRetired();
244 num_cycles = stop_cycles - start_cycles;
245 num_retired = stop_retired - start_retired;
246
247
248 // --------------------------------------------------
249 // Print out the results
250
251 #if HOST_DEBUG
252 printArray( "results", ARRAY_SIZE, results_data );
253 #endif
254
255
256 // --------------------------------------------------
257 // Check the results
258 int correct = verify( ARRAY_SIZE, results_data, verify_data );
259 finishTest(correct, num_cycles, num_retired);
260 }