Sort fixes: support for repeated trials.
[riscv-tests.git] / benchmarks / vec-matmul / vec_matmul_main.c
1 //**************************************************************************
2 // Vector-Thread Vector Matrix Multiply benchmark
3 //--------------------------------------------------------------------------
4 //
5 // This benchmark multiplies two 2-D arrays together and writes the results to
6 // a third vector. The input data (and reference data) should be generated
7 // using the matmul_gendata.pl perl script and dumped to a file named
8 // dataset.h.
9
10 #include "util.h"
11
12 // Choose which implementation you wish to test... but leave only one on!
13 // (only the first one will be executed).
14 //#define SCALAR_C
15 //#define SCALAR_ASM
16 #define VT_ASM
17
18 //--------------------------------------------------------------------------
19 // Input/Reference Data
20
21 //#include "dataset_test.h"
22 #include "dataset.h"
23
24 //--------------------------------------------------------------------------
25 // Helper functions
26
27 int verify( int n, float test[], float correct[] )
28 {
29 int i;
30 for ( i = 0; i < n; i++ ) {
31 if ( test[i] > 1.02*correct[i]
32 || test[i] < 0.98*correct[i]) {
33 #if HOST_DEBUG
34 printf(" test[%d] : %3.2f\n", i, test[i]);
35 printf(" corr[%d] : %3.2f\n", i, correct[i]);
36 #endif
37 // tell us which index fails + 2
38 // (so that if i==0,i==1 fails, we don't
39 // think it was a 'not-finished yet' or pass)
40 return i+10;
41 }
42 }
43 return 1;
44 }
45
46 void finishTest( int correct, long long num_cycles, long long num_retired )
47 {
48 int toHostValue = correct;
49 #if HOST_DEBUG
50 if ( toHostValue == 1 )
51 printf( "*** PASSED ***\n" );
52 else
53 printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
54 exit(0);
55 #else
56 // we no longer run in -testrun mode, which means we can't use
57 // the tohost register to communicate "test is done" and "test results"
58 // so instead we will communicate through print* functions!
59 if ( correct == 1 )
60 {
61 printstr( "*** PASSED *** (num_cycles = 0x" );
62 printhex(num_cycles);
63 printstr( ", num_inst_retired = 0x");
64 printhex(num_retired);
65 printstr( ")\n" );
66 }
67 else
68 {
69 printstr( "*** FAILED *** (num_cycles = 0x");
70 printhex(num_cycles);
71 printstr( ", num_inst_retired = 0x");
72 printhex(num_retired);
73 printstr( ")\n" );
74 }
75 exit();
76 #endif
77 }
78
79
80 // deprecated - cr10/stats-enable register no longer exists
81 void setStats( int enable )
82 {
83 #if ( !HOST_DEBUG && SET_STATS )
84 asm( "mtpcr %0, cr10" : : "r" (enable) );
85 #endif
86 }
87
88 long long getCycles()
89 {
90 long long cycles = 1337;
91 #if ( !HOST_DEBUG && SET_STATS )
92 __asm__ __volatile__( "rdcycle %0" : "=r" (cycles) );
93 #endif
94 return cycles;
95 }
96
97 long long getInstRetired()
98 {
99 long long inst_retired = 1338;
100 #if ( !HOST_DEBUG && SET_STATS )
101 __asm__ __volatile__( "rdinstret %0" : "=r" (inst_retired) );
102 #endif
103 return inst_retired;
104 }
105
106 //--------------------------------------------------------------------------
107 // matmul function
108
109 // scalar C implementation
110 void matmul(const int lda, const float A[], const float B[], float C[] )
111 {
112 int i, j, k;
113
114 for ( j = 0; j < lda; j++ )
115 for ( i = 0; i < lda; i++ )
116 {
117 float cij = C[i + j*lda];
118 for ( k = 0; k < lda; k++ )
119 {
120 cij += A[j*lda + k] * B[k*lda + i];
121 }
122 C[i + j*lda] = cij;
123 }
124 }
125
126
127 // assembly implementations can be found in *_asm.S
128
129 //--------------------------------------------------------------------------
130 // Main
131
132 int main( int argc, char* argv[] )
133 {
134 int i,j;
135 long long start_cycles = 0;
136 long long stop_cycles = 0;
137 long long num_cycles;
138 long long start_retired = 0;
139 long long stop_retired = 0;
140 long long num_retired;
141
142 float results_data[ARRAY_SIZE];
143 for ( i = 0; i < DIM_SIZE; i++ )
144 for ( j = 0; j < DIM_SIZE; j++ )
145 results_data[i + j*DIM_SIZE] = 0.0f;
146
147 // Output the input array
148
149 #if HOST_DEBUG
150 printArray( "input1", ARRAY_SIZE, input1_data );
151 printArray( "input2", ARRAY_SIZE, input2_data );
152 printArray( "verify", ARRAY_SIZE, verify_data );
153 printArray( "results", ARRAY_SIZE, results_data );
154 #endif
155
156 // --------------------------------------------------
157 // If needed we preallocate everything in the caches
158
159 #if PREALLOCATE
160
161
162
163 #endif
164
165 // --------------------------------------------------
166 // Do the matmul
167 start_cycles = getCycles();
168 start_retired = getInstRetired();
169
170 #ifdef SCALAR_C
171 matmul( DIM_SIZE, input1_data, input2_data, results_data );
172 #else
173 #ifdef SCALAR_ASM
174 #if HOST_DEBUG==0
175 scalar_matmul_asm( DIM_SIZE, input1_data, input2_data, results_data );
176 #endif
177 #else
178 #ifdef VT_ASM
179 #if HOST_DEBUG==0
180 vt_matmul_asm( DIM_SIZE, input1_data, input2_data, results_data );
181 #endif
182 #endif
183 #endif
184 #endif
185
186
187 stop_cycles = getCycles();
188 stop_retired = getInstRetired();
189 num_cycles = stop_cycles - start_cycles;
190 num_retired = stop_retired - start_retired;
191
192
193 // --------------------------------------------------
194 // Print out the results
195
196 #if HOST_DEBUG
197 printArray( "results", ARRAY_SIZE, results_data );
198 #endif
199
200
201 // --------------------------------------------------
202 // Check the results
203 int correct = verify( ARRAY_SIZE, results_data, verify_data );
204 finishTest(correct, num_cycles, num_retired);
205 }