benchmarks/vec-fft/vec-fft_main.c

   1 // See LICENSE for license details.
   2
   3 // *************************************************************************
   4 // multiply filter bencmark
   5 // -------------------------------------------------------------------------
   6 //
   7 // This benchmark tests the software multiply implemenation. The
   8 // input data (and reference data) should be generated using the
   9 // multiply_gendata.pl perl script and dumped to a file named
  10 // dataset1.h You should not change anything except the
  11 // HOST_DEBUG and VERIFY macros for your timing run.
  12
  13 #include "vec-fft.h"
  14
  15 //--------------------------------------------------------------------------
  16 // Macros
  17
  18 // Set HOST_DEBUG to 1 if you are going to compile this for a host
  19 // machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
  20 // to 0 if you are compiling with the smips-gcc toolchain.
  21
  22 #ifndef HOST_DEBUG
  23 #define HOST_DEBUG 0
  24 #endif
  25
  26 // Set PREALLOCATE to 1 if you want to preallocate the benchmark
  27 // function before starting stats. If you have instruction/data
  28 // caches and you don't want to count the overhead of misses, then
  29 // you will need to use preallocation.
  30
  31 #ifndef PREALLOCATE
  32 #define PREALLOCATE 0
  33 #endif
  34
  35 // Set VERIFY to 1 if you want the program to check that the sort
  36 // function returns the right answer. When you are doing your
  37 // benchmarking you should set this to 0 so that the verification
  38 // is not included in your timing.
  39
  40 #ifndef VERIFY
  41 #define VERIFY     1
  42 #endif
  43
  44 // Set SET_STATS to 1 if you want to carve out the piece that actually
  45 // does the computation.
  46
  47 #ifndef SET_STATS
  48 #define SET_STATS 0
  49 #endif
  50
  51 // Set MINIMAL to 1 if you want to run the core FFT kernel without
  52 // any instrumentation or warm-up.
  53 #ifndef MINIMAL
  54 #define MINIMAL 1
  55 #endif
  56
  57 //--------------------------------------------------------------------------
  58 // Platform Specific Includes
  59
  60 #if HOST_DEBUG
  61    #include <stdio.h>
  62    #include <stdlib.h>
  63 #else
  64 void printstr(const char*);
  65 void exit();
  66 #endif
  67
  68
  69 //--------------------------------------------------------------------------
  70 // Input/Reference Data
  71
  72 #include "fft_const.h"
  73
  74 //--------------------------------------------------------------------------
  75 // Helper functions
  76
  77 #if !MINIMAL
  78
  79 void setup_input(int n, fftval_t in_real[], fftval_t in_imag[])
  80 {
  81   int i;
  82   for(i=0; i < n; i++) {
  83     in_real[i] = input_data_real[i];
  84     in_imag[i] = input_data_imag[i];
  85   }
  86 }
  87 void setup_warm_tf(int n, fftval_t in_real[], fftval_t in_imag[])
  88 {
  89   int i;
  90   for(i=0; i < n; i++) {
  91     in_real[i] = tf_real[i];
  92     in_imag[i] = tf_imag[i];
  93   }
  94 }
  95
  96 fftval_t calculate_error( int n, const fftval_t test_real[], const fftval_t test_imag[])
  97 {
  98   fftval_t current_max = 0;
  99   printf("idx, real expected, real observed, imag expected, imag observed %d\n", 0);
 100
 101 #if defined(FFT_FIXED)
 102   for(int i = 0; i < n; i++)
 103   {
 104     const double scale = 1 << FIX_PT;
 105     const double real_diff = (test_real[i] - output_data_real[i])/scale;
 106     const double imag_diff = (test_imag[i] - output_data_imag[i])/scale;
 107
 108     const double i_sq_error = real_diff*real_diff + imag_diff*imag_diff;
 109     if(i_sq_error > current_max) {
 110       printf("i = %d, current error: %d\n", i, (long)current_max);
 111       current_max = i_sq_error;
 112     }
 113   }
 114 #elif defined(FFT_FLOATING)
 115   fftval_t real_expect = 0.0;
 116   fftval_t imag_expect = 0.0;
 117   for(int i = 0; i < n; i++)
 118   {
 119     /* TODO: Fix error caculation for half precision */
 120     const fftval_t real_diff = (test_real[i] - output_data_real[i]);
 121     const fftval_t imag_diff = (test_imag[i] - output_data_imag[i]);
 122     fftval_t i_sq_error = real_diff*real_diff + imag_diff*imag_diff;
 123
 124 #if 0
 125     long tr = (long)(test_real[i] * 1000000000);
 126     long ti = (long)(test_imag[i] * 1000000000);
 127     long er = (long)(output_data_real[i] * 1000000000);
 128     long ei = (long)(output_data_imag[i] * 1000000000);
 129
 130     printf("i = %d, expected (%d,%d) and got (%d,%d), diff (%d,%d)\n",
 131            i,
 132            er, ei,
 133            tr, ti,
 134            er-tr, ei-ti);
 135 #endif
 136
 137 #if 1
 138     fftbit_t tr, ti, er, ei;
 139 #ifdef FP_HALF
 140     tr = test_real[i];
 141     ti = test_imag[i];
 142     er = output_data_real[i];
 143     ei = output_data_imag[i];
 144 #else
 145     union bits {
 146       fftval_t v;
 147       fftbit_t u;
 148     } bits;
 149     bits.v = test_real[i]; tr = bits.u;
 150     bits.v = test_imag[i]; ti = bits.u;
 151     bits.v = output_data_real[i]; er = bits.u;
 152     bits.v = output_data_imag[i]; ei = bits.u;
 153 #endif
 154     printf("%d: %d %d %d %d\n", i, er, tr, ei, ti);
 155     // printf("%4d\t" FFT_PRI "\t" FFT_PRI "\t" FFT_PRI "\t" FFT_PRI "\n",
 156     //       i, er, tr, ei, ti);
 157 #endif
 158
 159 #if 0
 160     if(i_sq_error > current_max) {
 161       printf("i = %d, max error (ppb): %ld\n", i, (long)(current_max * 1000000000));
 162       current_max = i_sq_error;
 163       real_expect = output_data_real[i];
 164       imag_expect = output_data_imag[i];
 165     }
 166 #endif
 167   }
 168 /*
 169   printf("real expected: %d\n", (long)(real_expect));
 170   printf("imag expected: %d\n", (long)(imag_expect));
 171 */
 172 #endif
 173
 174   return current_max;
 175 }
 176
 177 void finishTest( double max_sq_error, long long num_cycles, long long num_retired)
 178 {
 179   int passed = max_sq_error < 10e-8;
 180
 181   if( passed ) printstr("*** PASSED ***");
 182   else printstr("*** FAILED ***");
 183
 184   printf(" (num_cycles = %ld, num_inst_retired = %ld)\n", num_cycles, num_retired);
 185
 186   passed = passed ? 1 : 2; // if it passed, return 1
 187
 188   exit();
 189 }
 190
 191 void setStats( int enable )
 192 {
 193 #if ( !HOST_DEBUG && SET_STATS )
 194   //asm( "mtpcr %0, cr10" : : "r" (enable) );
 195 #endif
 196 }
 197
 198 long long getCycles()
 199 {
 200    long long cycles = 1337;
 201 #if ( !HOST_DEBUG && SET_STATS )
 202   __asm__ __volatile__( "rdcycle %0" : "=r" (cycles) );
 203 #endif
 204   return cycles;
 205 }
 206
 207 long long getInstRetired()
 208 {
 209    long long inst_retired = 1338;
 210 #if ( !HOST_DEBUG && SET_STATS )
 211   __asm__ __volatile__( "rdinstret %0" : "=r" (inst_retired) );
 212 #endif
 213   return inst_retired;
 214 }
 215
 216 #endif /* !MINIMAL */
 217
 218 //--------------------------------------------------------------------------
 219 // Main
 220 #define HWACHA_RADIX 2
 221
 222 #ifdef DATA_IN_UNPERMUTED
 223 void permute(fftval_t workspace_real[], fftval_t workspace_imag[])
 224 {
 225   const int logradix = log2down(HWACHA_RADIX);
 226   const int term_mask = HWACHA_RADIX-1;
 227   const int num_term = log2down(FFT_SIZE)/logradix;
 228   for(int i = 0; i < FFT_SIZE; i++)
 229   {
 230     // Get permuted address
 231     int i_left = i;
 232     int permuted = 0;
 233     for(int cur_fft_size=HWACHA_RADIX; cur_fft_size <= FFT_SIZE; cur_fft_size = cur_fft_size << logradix)
 234     {
 235       permuted = (permuted << logradix) | (i_left & term_mask);
 236       i_left = i_left >> logradix;
 237     }
 238     // If addresses are different and i < permuted (so we only do permutation once)
 239     if(i < permuted)
 240     {
 241       fftval_t t = workspace_real[i];
 242       fftval_t u = workspace_imag[i];
 243       workspace_real[i] = workspace_real[permuted];
 244       workspace_imag[i] = workspace_imag[permuted];
 245       workspace_real[permuted] = t;
 246       workspace_imag[permuted] = u;
 247     }
 248   }
 249 }
 250 #endif /* DATA_IN_UNPERMUTED */
 251
 252 #if MINIMAL
 253
 254 int main(void)
 255 {
 256 #ifdef DATA_IN_UNPERMUTED
 257   permute(input_data_real, input_data_imag);
 258 #endif
 259   fft(input_data_real, input_data_imag, tf_real, tf_imag);
 260 //  calculate_error(FFT_SIZE, input_data_real, input_data_imag);
 261   exit();
 262 }
 263
 264 #else /* !MINIMAL */
 265
 266 int main(void)
 267 {
 268   static fftval_t workspace_real[FFT_SIZE];
 269   static fftval_t workspace_imag[FFT_SIZE];
 270   static fftval_t warm_tf_real[FFT_SIZE];
 271   static fftval_t warm_tf_imag[FFT_SIZE];
 272   setup_input(FFT_SIZE, workspace_real, workspace_imag);
 273   setup_warm_tf(FFT_SIZE, warm_tf_real, warm_tf_imag);
 274
 275 #if PREALLOCATE
 276   fft(workspace_real, workspace_imag, warm_tf_real, warm_tf_imag);
 277   setup_input(FFT_SIZE, workspace_real, workspace_imag);
 278 #endif
 279
 280   long long start_cycles, start_retired, stop_cycles, stop_retired;
 281   start_cycles = getCycles();
 282   start_retired = getInstRetired();
 283
 284 #ifdef DATA_IN_UNPERMUTED
 285   permute(workspace_real, workspace_imag);
 286 #endif
 287   setStats(1);
 288   fft(workspace_real, workspace_imag, warm_tf_real, warm_tf_imag);
 289   setStats(0);
 290
 291   stop_cycles = getCycles();
 292   stop_retired = getInstRetired();
 293   long long num_cycles = stop_cycles - start_cycles;
 294   long long num_retired = stop_retired - start_retired;
 295
 296   const double max_sq_error = calculate_error(FFT_SIZE, workspace_real, workspace_imag);
 297
 298   // Check the results
 299   finishTest(max_sq_error, num_cycles, num_retired);
 300 }
 301
 302 #endif /* MINIMAL */