62521c4d13c96d5e5e9964b57dc4ece31fd71948
1 // See LICENSE for license details.
3 //**************************************************************************
4 // Vector-Thread Complex Multiply benchmark
5 //--------------------------------------------------------------------------
7 // This benchmark multiplies two complex numbers together. The input data (and
8 // reference data) should be generated using the cmplxmult_gendata.pl perl
9 // script and dumped to a file named dataset.h. The riscv-gcc toolchain does
10 // not support system calls so printf's can only be used on a host system, not
11 // on the riscv-v processor simulator itself.
13 // HOWEVER: printstr() and printhex() are provided, for a primitive form of
14 // printing strings and hexadecimal values to stdout.
17 // Choose which implementation you wish to test... but leave only one on!
18 // (only the first one will be executed).
23 //--------------------------------------------------------------------------
26 // Set HOST_DEBUG to 1 if you are going to compile this for a host
27 // machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
28 // to 0 if you are compiling with the smips-gcc toolchain.
34 // Set PREALLOCATE to 1 if you want to preallocate the benchmark
35 // function before starting stats. If you have instruction/data
36 // caches and you don't want to count the overhead of misses, then
37 // you will need to use preallocation.
43 // Set SET_STATS to 1 if you want to carve out the piece that actually
44 // does the computation.
50 //--------------------------------------------------------------------------
51 // Host Platform Includes
57 void printstr(const char*);
63 //--------------------------------------------------------------------------
64 // Complex Value Structs
73 //--------------------------------------------------------------------------
74 // Input/Reference Data
76 //#include "dataset_test.h"
80 //--------------------------------------------------------------------------
83 float absolute( float in
)
92 // are two floating point numbers "close enough"?
93 // this is pretty loose, because Perl is giving me pretty terrible answers
94 int close_enough(float a
, float b
)
98 if ( absolute(a
) > 1.10*absolute(b
)
99 || absolute(a
) < 0.90*absolute(b
)
100 || absolute(a
) > 1.10*absolute(b
)
101 || absolute(a
) < 0.90*absolute(b
))
103 if (absolute(absolute(a
) - absolute(b
)) > 0.1)
112 int verify( int n
, struct Complex test
[], struct Complex correct
[] )
115 for ( i
= 0; i
< n
; i
++ ) {
116 if ( !close_enough(test
[i
].real
, correct
[i
].real
)
117 || !close_enough(test
[i
].imag
, correct
[i
].imag
))
120 printf(" test[%d] : {%3.2f, %3.2f}\n", i
, test
[i
].real
, test
[i
].imag
);
121 printf(" corr[%d] : {%3.2f, %3.2f}\n", i
, correct
[i
].real
, correct
[i
].imag
);
123 // tell us which index fails + 10
124 // (so that if i==0,i==1 fails, we don't
125 // think it was a 'not-finished yet' or pass)
134 void printComplexArray( char name
[], int n
, struct Complex arr
[] )
138 printf( " %10s :", name
);
139 for ( i
= 0; i
< n
; i
++ )
140 printf( " {%03.2f,%03.2f} ", arr
[i
].real
, arr
[i
].imag
);
145 for ( i
= 0; i
< n
; i
++ )
148 printhex((int) arr
[i
].real
);
150 printhex((int) arr
[i
].imag
);
160 void finishTest( int correct
, long long num_cycles
, long long num_retired
)
162 int toHostValue
= correct
;
164 if ( toHostValue
== 1 )
165 printf( "*** PASSED ***\n" );
167 printf( "*** FAILED *** (tohost = %d)\n", toHostValue
);
170 // we no longer run in -testrun mode, which means we can't use
171 // the tohost register to communicate "test is done" and "test results"
172 // so instead we will communicate through print* functions!
175 printstr( "*** PASSED *** (num_cycles = 0x" );
176 printhex(num_cycles
);
177 printstr( ", num_inst_retired = 0x");
178 printhex(num_retired
);
183 printstr( "*** FAILED *** (num_cycles = 0x");
184 printhex(num_cycles
);
185 printstr( ", num_inst_retired = 0x");
186 printhex(num_retired
);
196 // deprecated - cr10/stats-enable register no longer exists
197 void setStats( int enable
)
199 #if ( !HOST_DEBUG && SET_STATS )
200 asm( "mtpcr %0, cr10" : : "r" (enable
) );
204 long long getCycles()
206 long long cycles
= 1337;
207 #if ( !HOST_DEBUG && SET_STATS )
208 __asm__
__volatile__( "rdcycle %0" : "=r" (cycles
) );
213 long long getInstRetired()
215 long long inst_retired
= 1338;
216 #if ( !HOST_DEBUG && SET_STATS )
217 __asm__
__volatile__( "rdinstret %0" : "=r" (inst_retired
) );
222 //--------------------------------------------------------------------------
223 // complex multiply function
225 // scalar C implementation
226 void cmplxmult( int n
, struct Complex a
[], struct Complex b
[], struct Complex c
[] )
229 for ( i
= 0; i
< n
; i
++ )
231 c
[i
].real
= (a
[i
].real
* b
[i
].real
) - (a
[i
].imag
* b
[i
].imag
);
232 c
[i
].imag
= (a
[i
].imag
* b
[i
].real
) + (a
[i
].real
* b
[i
].imag
);
236 // assembly implementations can be found in *_asm.S
238 //--------------------------------------------------------------------------
241 int main( int argc
, char* argv
[] )
243 struct Complex results_data
[DATA_SIZE
];
244 long long start_cycles
= 0;
245 long long stop_cycles
= 0;
246 long long num_cycles
;
247 long long start_retired
= 0;
248 long long stop_retired
= 0;
249 long long num_retired
;
251 // Output the input array
254 printComplexArray( "input1", DATA_SIZE
, input1_data
);
255 printComplexArray( "input2", DATA_SIZE
, input2_data
);
256 printComplexArray( "verify", DATA_SIZE
, verify_data
);
259 // --------------------------------------------------
260 // If needed we preallocate everything in the caches
265 cmplxmult( DATA_SIZE
, input1_data
, input2_data
, results_data
);
268 scalar_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
271 vt_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
278 // --------------------------------------------------
281 start_cycles
= getCycles();
282 start_retired
= getInstRetired();
285 cmplxmult( DATA_SIZE
, input1_data
, input2_data
, results_data
);
289 scalar_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
294 vt_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
300 stop_cycles
= getCycles();
301 stop_retired
= getInstRetired();
302 num_cycles
= stop_cycles
- start_cycles
;
303 num_retired
= stop_retired
- start_retired
;
305 // --------------------------------------------------
306 // Print out the results
309 printComplexArray( "results", DATA_SIZE
, results_data
);
310 printComplexArray( "verify ", DATA_SIZE
, verify_data
);
314 // --------------------------------------------------
316 int correct
= verify( DATA_SIZE
, results_data
, verify_data
);
317 finishTest(correct
, num_cycles
, num_retired
);