1 //**************************************************************************
2 // Vector-Thread Complex Multiply benchmark
3 //--------------------------------------------------------------------------
5 // This benchmark multiplies two complex numbers together. The input data (and
6 // reference data) should be generated using the cmplxmult_gendata.pl perl
7 // script and dumped to a file named dataset.h. The riscv-gcc toolchain does
8 // not support system calls so printf's can only be used on a host system, not
9 // on the riscv-v processor simulator itself.
11 // HOWEVER: printstr() and printhex() are provided, for a primitive form of
12 // printing strings and hexadecimal values to stdout.
15 // Choose which implementation you wish to test... but leave only one on!
16 // (only the first one will be executed).
21 //--------------------------------------------------------------------------
24 // Set HOST_DEBUG to 1 if you are going to compile this for a host
25 // machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
26 // to 0 if you are compiling with the smips-gcc toolchain.
32 // Set PREALLOCATE to 1 if you want to preallocate the benchmark
33 // function before starting stats. If you have instruction/data
34 // caches and you don't want to count the overhead of misses, then
35 // you will need to use preallocation.
41 // Set SET_STATS to 1 if you want to carve out the piece that actually
42 // does the computation.
48 //--------------------------------------------------------------------------
49 // Host Platform Includes
55 void printstr(const char*);
60 //--------------------------------------------------------------------------
61 // Complex Value Structs
70 //--------------------------------------------------------------------------
71 // Input/Reference Data
73 //#include "dataset_test.h"
77 //--------------------------------------------------------------------------
80 float absolute( float in
)
89 // are two floating point numbers "close enough"?
90 // this is pretty loose, because Perl is giving me pretty terrible answers
91 int close_enough(float a
, float b
)
95 if ( absolute(a
) > 1.10*absolute(b
)
96 || absolute(a
) < 0.90*absolute(b
)
97 || absolute(a
) > 1.10*absolute(b
)
98 || absolute(a
) < 0.90*absolute(b
))
100 if (absolute(absolute(a
) - absolute(b
)) > 0.1)
109 int verify( int n
, struct Complex test
[], struct Complex correct
[] )
112 for ( i
= 0; i
< n
; i
++ ) {
113 if ( !close_enough(test
[i
].real
, correct
[i
].real
)
114 || !close_enough(test
[i
].imag
, correct
[i
].imag
))
117 printf(" test[%d] : {%3.2f, %3.2f}\n", i
, test
[i
].real
, test
[i
].imag
);
118 printf(" corr[%d] : {%3.2f, %3.2f}\n", i
, correct
[i
].real
, correct
[i
].imag
);
120 // tell us which index fails + 10
121 // (so that if i==0,i==1 fails, we don't
122 // think it was a 'not-finished yet' or pass)
131 void printComplexArray( char name
[], int n
, struct Complex arr
[] )
135 printf( " %10s :", name
);
136 for ( i
= 0; i
< n
; i
++ )
137 printf( " {%03.2f,%03.2f} ", arr
[i
].real
, arr
[i
].imag
);
142 for ( i
= 0; i
< n
; i
++ )
145 printhex((int) arr
[i
].real
);
147 printhex((int) arr
[i
].imag
);
157 void finishTest( int correct
, long long num_cycles
, long long num_retired
)
159 int toHostValue
= correct
;
161 if ( toHostValue
== 1 )
162 printf( "*** PASSED ***\n" );
164 printf( "*** FAILED *** (tohost = %d)\n", toHostValue
);
167 // we no longer run in -testrun mode, which means we can't use
168 // the tohost register to communicate "test is done" and "test results"
169 // so instead we will communicate through print* functions!
172 printstr( "*** PASSED *** (num_cycles = 0x" );
173 printhex(num_cycles
);
174 printstr( ", num_inst_retired = 0x");
175 printhex(num_retired
);
180 printstr( "*** FAILED *** (num_cycles = 0x");
181 printhex(num_cycles
);
182 printstr( ", num_inst_retired = 0x");
183 printhex(num_retired
);
193 // deprecated - cr10/stats-enable register no longer exists
194 void setStats( int enable
)
196 #if ( !HOST_DEBUG && SET_STATS )
197 asm( "mtpcr %0, cr10" : : "r" (enable
) );
201 long long getCycles()
203 long long cycles
= 1337;
204 #if ( !HOST_DEBUG && SET_STATS )
205 __asm__
__volatile__( "rdcycle %0" : "=r" (cycles
) );
210 long long getInstRetired()
212 long long inst_retired
= 1338;
213 #if ( !HOST_DEBUG && SET_STATS )
214 __asm__
__volatile__( "rdinstret %0" : "=r" (inst_retired
) );
219 //--------------------------------------------------------------------------
220 // complex multiply function
222 // scalar C implementation
223 void cmplxmult( int n
, struct Complex a
[], struct Complex b
[], struct Complex c
[] )
226 for ( i
= 0; i
< n
; i
++ )
228 c
[i
].real
= (a
[i
].real
* b
[i
].real
) - (a
[i
].imag
* b
[i
].imag
);
229 c
[i
].imag
= (a
[i
].imag
* b
[i
].real
) + (a
[i
].real
* b
[i
].imag
);
233 // assembly implementations can be found in *_asm.S
235 //--------------------------------------------------------------------------
238 int main( int argc
, char* argv
[] )
240 struct Complex results_data
[DATA_SIZE
];
241 long long start_cycles
= 0;
242 long long stop_cycles
= 0;
243 long long num_cycles
;
244 long long start_retired
= 0;
245 long long stop_retired
= 0;
246 long long num_retired
;
248 // Output the input array
251 printComplexArray( "input1", DATA_SIZE
, input1_data
);
252 printComplexArray( "input2", DATA_SIZE
, input2_data
);
253 printComplexArray( "verify", DATA_SIZE
, verify_data
);
256 // --------------------------------------------------
257 // If needed we preallocate everything in the caches
262 cmplxmult( DATA_SIZE
, input1_data
, input2_data
, results_data
);
265 scalar_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
268 vt_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
275 // --------------------------------------------------
278 start_cycles
= getCycles();
279 start_retired
= getInstRetired();
282 cmplxmult( DATA_SIZE
, input1_data
, input2_data
, results_data
);
286 scalar_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
291 vt_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
297 stop_cycles
= getCycles();
298 stop_retired
= getInstRetired();
299 num_cycles
= stop_cycles
- start_cycles
;
300 num_retired
= stop_retired
- start_retired
;
302 // --------------------------------------------------
303 // Print out the results
306 printComplexArray( "results", DATA_SIZE
, results_data
);
307 printComplexArray( "verify ", DATA_SIZE
, verify_data
);
311 // --------------------------------------------------
313 int correct
= verify( DATA_SIZE
, results_data
, verify_data
);
314 finishTest(correct
, num_cycles
, num_retired
);