Add another FP recoding test case
[riscv-tests.git] / benchmarks / vec-fft / vec-fft_main.c
1 // See LICENSE for license details.
2
3 // *************************************************************************
4 // multiply filter bencmark
5 // -------------------------------------------------------------------------
6 //
7 // This benchmark tests the software multiply implemenation. The
8 // input data (and reference data) should be generated using the
9 // multiply_gendata.pl perl script and dumped to a file named
10 // dataset1.h You should not change anything except the
11 // HOST_DEBUG and VERIFY macros for your timing run.
12
13 #include "vec-fft.h"
14
15 //--------------------------------------------------------------------------
16 // Macros
17
18 // Set HOST_DEBUG to 1 if you are going to compile this for a host
19 // machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
20 // to 0 if you are compiling with the smips-gcc toolchain.
21
22 #ifndef HOST_DEBUG
23 #define HOST_DEBUG 0
24 #endif
25
26 // Set PREALLOCATE to 1 if you want to preallocate the benchmark
27 // function before starting stats. If you have instruction/data
28 // caches and you don't want to count the overhead of misses, then
29 // you will need to use preallocation.
30
31 #ifndef PREALLOCATE
32 #define PREALLOCATE 0
33 #endif
34
35 // Set VERIFY to 1 if you want the program to check that the sort
36 // function returns the right answer. When you are doing your
37 // benchmarking you should set this to 0 so that the verification
38 // is not included in your timing.
39
40 #ifndef VERIFY
41 #define VERIFY 1
42 #endif
43
44 // Set SET_STATS to 1 if you want to carve out the piece that actually
45 // does the computation.
46
47 #ifndef SET_STATS
48 #define SET_STATS 0
49 #endif
50
51 // Set MINIMAL to 1 if you want to run the core FFT kernel without
52 // any instrumentation or warm-up.
53 #ifndef MINIMAL
54 #define MINIMAL 1
55 #endif
56
57 //--------------------------------------------------------------------------
58 // Platform Specific Includes
59
60 #if HOST_DEBUG
61 #include <stdio.h>
62 #include <stdlib.h>
63 #else
64 void printstr(const char*);
65 void exit();
66 #endif
67
68
69 //--------------------------------------------------------------------------
70 // Input/Reference Data
71
72 #include "fft_const.h"
73
74 //--------------------------------------------------------------------------
75 // Helper functions
76
77 #if !MINIMAL
78
79 void setup_input(int n, fftval_t in_real[], fftval_t in_imag[])
80 {
81 int i;
82 for(i=0; i < n; i++) {
83 in_real[i] = input_data_real[i];
84 in_imag[i] = input_data_imag[i];
85 }
86 }
87 void setup_warm_tf(int n, fftval_t in_real[], fftval_t in_imag[])
88 {
89 int i;
90 for(i=0; i < n; i++) {
91 in_real[i] = tf_real[i];
92 in_imag[i] = tf_imag[i];
93 }
94 }
95
96 fftval_t calculate_error( int n, const fftval_t test_real[], const fftval_t test_imag[])
97 {
98 fftval_t current_max = 0;
99 printf("idx, real expected, real observed, imag expected, imag observed %d\n", 0);
100
101 #if defined(FFT_FIXED)
102 for(int i = 0; i < n; i++)
103 {
104 const double scale = 1 << FIX_PT;
105 const double real_diff = (test_real[i] - output_data_real[i])/scale;
106 const double imag_diff = (test_imag[i] - output_data_imag[i])/scale;
107
108 const double i_sq_error = real_diff*real_diff + imag_diff*imag_diff;
109 if(i_sq_error > current_max) {
110 printf("i = %d, current error: %d\n", i, (long)current_max);
111 current_max = i_sq_error;
112 }
113 }
114 #elif defined(FFT_FLOATING)
115 fftval_t real_expect = 0.0;
116 fftval_t imag_expect = 0.0;
117 for(int i = 0; i < n; i++)
118 {
119 /* TODO: Fix error caculation for half precision */
120 const fftval_t real_diff = (test_real[i] - output_data_real[i]);
121 const fftval_t imag_diff = (test_imag[i] - output_data_imag[i]);
122 fftval_t i_sq_error = real_diff*real_diff + imag_diff*imag_diff;
123
124 #if 0
125 long tr = (long)(test_real[i] * 1000000000);
126 long ti = (long)(test_imag[i] * 1000000000);
127 long er = (long)(output_data_real[i] * 1000000000);
128 long ei = (long)(output_data_imag[i] * 1000000000);
129
130 printf("i = %d, expected (%d,%d) and got (%d,%d), diff (%d,%d)\n",
131 i,
132 er, ei,
133 tr, ti,
134 er-tr, ei-ti);
135 #endif
136
137 #if 1
138 fftbit_t tr, ti, er, ei;
139 #ifdef FP_HALF
140 tr = test_real[i];
141 ti = test_imag[i];
142 er = output_data_real[i];
143 ei = output_data_imag[i];
144 #else
145 union bits {
146 fftval_t v;
147 fftbit_t u;
148 } bits;
149 bits.v = test_real[i]; tr = bits.u;
150 bits.v = test_imag[i]; ti = bits.u;
151 bits.v = output_data_real[i]; er = bits.u;
152 bits.v = output_data_imag[i]; ei = bits.u;
153 #endif
154 printf("%d: %d %d %d %d\n", i, er, tr, ei, ti);
155 // printf("%4d\t" FFT_PRI "\t" FFT_PRI "\t" FFT_PRI "\t" FFT_PRI "\n",
156 // i, er, tr, ei, ti);
157 #endif
158
159 #if 0
160 if(i_sq_error > current_max) {
161 printf("i = %d, max error (ppb): %ld\n", i, (long)(current_max * 1000000000));
162 current_max = i_sq_error;
163 real_expect = output_data_real[i];
164 imag_expect = output_data_imag[i];
165 }
166 #endif
167 }
168 /*
169 printf("real expected: %d\n", (long)(real_expect));
170 printf("imag expected: %d\n", (long)(imag_expect));
171 */
172 #endif
173
174 return current_max;
175 }
176
177 void finishTest( double max_sq_error, long long num_cycles, long long num_retired)
178 {
179 int passed = max_sq_error < 10e-8;
180
181 if( passed ) printstr("*** PASSED ***");
182 else printstr("*** FAILED ***");
183
184 printf(" (num_cycles = %ld, num_inst_retired = %ld)\n", num_cycles, num_retired);
185
186 passed = passed ? 1 : 2; // if it passed, return 1
187
188 exit();
189 }
190
191 void setStats( int enable )
192 {
193 #if ( !HOST_DEBUG && SET_STATS )
194 //asm( "mtpcr %0, cr10" : : "r" (enable) );
195 #endif
196 }
197
198 long long getCycles()
199 {
200 long long cycles = 1337;
201 #if ( !HOST_DEBUG && SET_STATS )
202 __asm__ __volatile__( "rdcycle %0" : "=r" (cycles) );
203 #endif
204 return cycles;
205 }
206
207 long long getInstRetired()
208 {
209 long long inst_retired = 1338;
210 #if ( !HOST_DEBUG && SET_STATS )
211 __asm__ __volatile__( "rdinstret %0" : "=r" (inst_retired) );
212 #endif
213 return inst_retired;
214 }
215
216 #endif /* !MINIMAL */
217
218 //--------------------------------------------------------------------------
219 // Main
220 #define HWACHA_RADIX 2
221
222 #ifdef DATA_IN_UNPERMUTED
223 void permute(fftval_t workspace_real[], fftval_t workspace_imag[])
224 {
225 const int logradix = log2down(HWACHA_RADIX);
226 const int term_mask = HWACHA_RADIX-1;
227 const int num_term = log2down(FFT_SIZE)/logradix;
228 for(int i = 0; i < FFT_SIZE; i++)
229 {
230 // Get permuted address
231 int i_left = i;
232 int permuted = 0;
233 for(int cur_fft_size=HWACHA_RADIX; cur_fft_size <= FFT_SIZE; cur_fft_size = cur_fft_size << logradix)
234 {
235 permuted = (permuted << logradix) | (i_left & term_mask);
236 i_left = i_left >> logradix;
237 }
238 // If addresses are different and i < permuted (so we only do permutation once)
239 if(i < permuted)
240 {
241 fftval_t t = workspace_real[i];
242 fftval_t u = workspace_imag[i];
243 workspace_real[i] = workspace_real[permuted];
244 workspace_imag[i] = workspace_imag[permuted];
245 workspace_real[permuted] = t;
246 workspace_imag[permuted] = u;
247 }
248 }
249 }
250 #endif /* DATA_IN_UNPERMUTED */
251
252 #if MINIMAL
253
254 int main(void)
255 {
256 #ifdef DATA_IN_UNPERMUTED
257 permute(input_data_real, input_data_imag);
258 #endif
259 fft(input_data_real, input_data_imag, tf_real, tf_imag);
260 // calculate_error(FFT_SIZE, input_data_real, input_data_imag);
261 exit();
262 }
263
264 #else /* !MINIMAL */
265
266 int main(void)
267 {
268 static fftval_t workspace_real[FFT_SIZE];
269 static fftval_t workspace_imag[FFT_SIZE];
270 static fftval_t warm_tf_real[FFT_SIZE];
271 static fftval_t warm_tf_imag[FFT_SIZE];
272 setup_input(FFT_SIZE, workspace_real, workspace_imag);
273 setup_warm_tf(FFT_SIZE, warm_tf_real, warm_tf_imag);
274
275 #if PREALLOCATE
276 fft(workspace_real, workspace_imag, warm_tf_real, warm_tf_imag);
277 setup_input(FFT_SIZE, workspace_real, workspace_imag);
278 #endif
279
280 long long start_cycles, start_retired, stop_cycles, stop_retired;
281 start_cycles = getCycles();
282 start_retired = getInstRetired();
283
284 #ifdef DATA_IN_UNPERMUTED
285 permute(workspace_real, workspace_imag);
286 #endif
287 setStats(1);
288 fft(workspace_real, workspace_imag, warm_tf_real, warm_tf_imag);
289 setStats(0);
290
291 stop_cycles = getCycles();
292 stop_retired = getInstRetired();
293 long long num_cycles = stop_cycles - start_cycles;
294 long long num_retired = stop_retired - start_retired;
295
296 const double max_sq_error = calculate_error(FFT_SIZE, workspace_real, workspace_imag);
297
298 // Check the results
299 finishTest(max_sq_error, num_cycles, num_retired);
300 }
301
302 #endif /* MINIMAL */