Merge branch 'master' of github.com:ucb-bar/riscv-tests
[riscv-tests.git] / benchmarks / vec-fft / vec-fft_main.c
1 // *************************************************************************
2 // multiply filter bencmark
3 // -------------------------------------------------------------------------
4 //
5 // This benchmark tests the software multiply implemenation. The
6 // input data (and reference data) should be generated using the
7 // multiply_gendata.pl perl script and dumped to a file named
8 // dataset1.h You should not change anything except the
9 // HOST_DEBUG and VERIFY macros for your timing run.
10
11 #include "vec-fft.h"
12
13 //--------------------------------------------------------------------------
14 // Macros
15
16 // Set HOST_DEBUG to 1 if you are going to compile this for a host
17 // machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
18 // to 0 if you are compiling with the smips-gcc toolchain.
19
20 #ifndef HOST_DEBUG
21 #define HOST_DEBUG 0
22 #endif
23
24 // Set PREALLOCATE to 1 if you want to preallocate the benchmark
25 // function before starting stats. If you have instruction/data
26 // caches and you don't want to count the overhead of misses, then
27 // you will need to use preallocation.
28
29 #ifndef PREALLOCATE
30 #define PREALLOCATE 0
31 #endif
32
33 // Set VERIFY to 1 if you want the program to check that the sort
34 // function returns the right answer. When you are doing your
35 // benchmarking you should set this to 0 so that the verification
36 // is not included in your timing.
37
38 #ifndef VERIFY
39 #define VERIFY 1
40 #endif
41
42 // Set SET_STATS to 1 if you want to carve out the piece that actually
43 // does the computation.
44
45 #ifndef SET_STATS
46 #define SET_STATS 0
47 #endif
48
49 // Set MINIMAL to 1 if you want to run the core FFT kernel without
50 // any instrumentation or warm-up.
51 #ifndef MINIMAL
52 #define MINIMAL 1
53 #endif
54
55 //--------------------------------------------------------------------------
56 // Platform Specific Includes
57
58 #if HOST_DEBUG
59 #include <stdio.h>
60 #include <stdlib.h>
61 #else
62 void printstr(const char*);
63 void exit();
64 #endif
65
66
67 //--------------------------------------------------------------------------
68 // Input/Reference Data
69
70 #include "fft_const.h"
71
72 //--------------------------------------------------------------------------
73 // Helper functions
74
75 #if !MINIMAL
76
77 void setup_input(int n, fftval_t in_real[], fftval_t in_imag[])
78 {
79 int i;
80 for(i=0; i < n; i++) {
81 in_real[i] = input_data_real[i];
82 in_imag[i] = input_data_imag[i];
83 }
84 }
85 void setup_warm_tf(int n, fftval_t in_real[], fftval_t in_imag[])
86 {
87 int i;
88 for(i=0; i < n; i++) {
89 in_real[i] = tf_real[i];
90 in_imag[i] = tf_imag[i];
91 }
92 }
93
94 fftval_t calculate_error( int n, const fftval_t test_real[], const fftval_t test_imag[])
95 {
96 fftval_t current_max = 0;
97 printf("idx, real expected, real observed, imag expected, imag observed %d\n", 0);
98
99 #if defined(FFT_FIXED)
100 for(int i = 0; i < n; i++)
101 {
102 const double scale = 1 << FIX_PT;
103 const double real_diff = (test_real[i] - output_data_real[i])/scale;
104 const double imag_diff = (test_imag[i] - output_data_imag[i])/scale;
105
106 const double i_sq_error = real_diff*real_diff + imag_diff*imag_diff;
107 if(i_sq_error > current_max) {
108 printf("i = %d, current error: %d\n", i, (long)current_max);
109 current_max = i_sq_error;
110 }
111 }
112 #elif defined(FFT_FLOATING)
113 fftval_t real_expect = 0.0;
114 fftval_t imag_expect = 0.0;
115 for(int i = 0; i < n; i++)
116 {
117 /* TODO: Fix error caculation for half precision */
118 const fftval_t real_diff = (test_real[i] - output_data_real[i]);
119 const fftval_t imag_diff = (test_imag[i] - output_data_imag[i]);
120 fftval_t i_sq_error = real_diff*real_diff + imag_diff*imag_diff;
121
122 #if 0
123 long tr = (long)(test_real[i] * 1000000000);
124 long ti = (long)(test_imag[i] * 1000000000);
125 long er = (long)(output_data_real[i] * 1000000000);
126 long ei = (long)(output_data_imag[i] * 1000000000);
127
128 printf("i = %d, expected (%d,%d) and got (%d,%d), diff (%d,%d)\n",
129 i,
130 er, ei,
131 tr, ti,
132 er-tr, ei-ti);
133 #endif
134
135 #if 1
136 fftbit_t tr, ti, er, ei;
137 #ifdef FP_HALF
138 tr = test_real[i];
139 ti = test_imag[i];
140 er = output_data_real[i];
141 ei = output_data_imag[i];
142 #else
143 union bits {
144 fftval_t v;
145 fftbit_t u;
146 } bits;
147 bits.v = test_real[i]; tr = bits.u;
148 bits.v = test_imag[i]; ti = bits.u;
149 bits.v = output_data_real[i]; er = bits.u;
150 bits.v = output_data_imag[i]; ei = bits.u;
151 #endif
152 printf("%d: %d %d %d %d\n", i, er, tr, ei, ti);
153 // printf("%4d\t" FFT_PRI "\t" FFT_PRI "\t" FFT_PRI "\t" FFT_PRI "\n",
154 // i, er, tr, ei, ti);
155 #endif
156
157 #if 0
158 if(i_sq_error > current_max) {
159 printf("i = %d, max error (ppb): %ld\n", i, (long)(current_max * 1000000000));
160 current_max = i_sq_error;
161 real_expect = output_data_real[i];
162 imag_expect = output_data_imag[i];
163 }
164 #endif
165 }
166 /*
167 printf("real expected: %d\n", (long)(real_expect));
168 printf("imag expected: %d\n", (long)(imag_expect));
169 */
170 #endif
171
172 return current_max;
173 }
174
175 void finishTest( double max_sq_error, long long num_cycles, long long num_retired)
176 {
177 int passed = max_sq_error < 10e-8;
178
179 if( passed ) printstr("*** PASSED ***");
180 else printstr("*** FAILED ***");
181
182 printf(" (num_cycles = %ld, num_inst_retired = %ld)\n", num_cycles, num_retired);
183
184 passed = passed ? 1 : 2; // if it passed, return 1
185
186 exit();
187 }
188
189 void setStats( int enable )
190 {
191 #if ( !HOST_DEBUG && SET_STATS )
192 //asm( "mtpcr %0, cr10" : : "r" (enable) );
193 #endif
194 }
195
196 long long getCycles()
197 {
198 long long cycles = 1337;
199 #if ( !HOST_DEBUG && SET_STATS )
200 __asm__ __volatile__( "rdcycle %0" : "=r" (cycles) );
201 #endif
202 return cycles;
203 }
204
205 long long getInstRetired()
206 {
207 long long inst_retired = 1338;
208 #if ( !HOST_DEBUG && SET_STATS )
209 __asm__ __volatile__( "rdinstret %0" : "=r" (inst_retired) );
210 #endif
211 return inst_retired;
212 }
213
214 #endif /* !MINIMAL */
215
216 //--------------------------------------------------------------------------
217 // Main
218 #define HWACHA_RADIX 2
219
220 #ifdef DATA_IN_UNPERMUTED
221 void permute(fftval_t workspace_real[], fftval_t workspace_imag[])
222 {
223 const int logradix = log2down(HWACHA_RADIX);
224 const int term_mask = HWACHA_RADIX-1;
225 const int num_term = log2down(FFT_SIZE)/logradix;
226 for(int i = 0; i < FFT_SIZE; i++)
227 {
228 // Get permuted address
229 int i_left = i;
230 int permuted = 0;
231 for(int cur_fft_size=HWACHA_RADIX; cur_fft_size <= FFT_SIZE; cur_fft_size = cur_fft_size << logradix)
232 {
233 permuted = (permuted << logradix) | (i_left & term_mask);
234 i_left = i_left >> logradix;
235 }
236 // If addresses are different and i < permuted (so we only do permutation once)
237 if(i < permuted)
238 {
239 fftval_t t = workspace_real[i];
240 fftval_t u = workspace_imag[i];
241 workspace_real[i] = workspace_real[permuted];
242 workspace_imag[i] = workspace_imag[permuted];
243 workspace_real[permuted] = t;
244 workspace_imag[permuted] = u;
245 }
246 }
247 }
248 #endif /* DATA_IN_UNPERMUTED */
249
250 #if MINIMAL
251
252 int main(void)
253 {
254 #ifdef DATA_IN_UNPERMUTED
255 permute(input_data_real, input_data_imag);
256 #endif
257 fft(input_data_real, input_data_imag, tf_real, tf_imag);
258 // calculate_error(FFT_SIZE, input_data_real, input_data_imag);
259 exit();
260 }
261
262 #else /* !MINIMAL */
263
264 int main(void)
265 {
266 static fftval_t workspace_real[FFT_SIZE];
267 static fftval_t workspace_imag[FFT_SIZE];
268 static fftval_t warm_tf_real[FFT_SIZE];
269 static fftval_t warm_tf_imag[FFT_SIZE];
270 setup_input(FFT_SIZE, workspace_real, workspace_imag);
271 setup_warm_tf(FFT_SIZE, warm_tf_real, warm_tf_imag);
272
273 #if PREALLOCATE
274 fft(workspace_real, workspace_imag, warm_tf_real, warm_tf_imag);
275 setup_input(FFT_SIZE, workspace_real, workspace_imag);
276 #endif
277
278 long long start_cycles, start_retired, stop_cycles, stop_retired;
279 start_cycles = getCycles();
280 start_retired = getInstRetired();
281
282 #ifdef DATA_IN_UNPERMUTED
283 permute(workspace_real, workspace_imag);
284 #endif
285 setStats(1);
286 fft(workspace_real, workspace_imag, warm_tf_real, warm_tf_imag);
287 setStats(0);
288
289 stop_cycles = getCycles();
290 stop_retired = getInstRetired();
291 long long num_cycles = stop_cycles - start_cycles;
292 long long num_retired = stop_retired - start_retired;
293
294 const double max_sq_error = calculate_error(FFT_SIZE, workspace_real, workspace_imag);
295
296 // Check the results
297 finishTest(max_sq_error, num_cycles, num_retired);
298 }
299
300 #endif /* MINIMAL */