c08e41ae574087cf6bdad2d9f62bea232edb810c
1 //**************************************************************************
2 // Vector-Thread Vector-vector add benchmark
3 //--------------------------------------------------------------------------
5 // This benchmark uses adds to vectors and writes the results to a third
6 // vector. The input data (and reference data) should be generated using the
7 // vvadd_gendata.pl perl script and dumped to a file named dataset.h. The
8 // riscv-gcc toolchain does not support system calls so printf's can only be
9 // used on a host system, not on the riscv-v processor simulator itself.
11 // HOWEVER: printstr() and printhex() are provided, for a primitive form of
12 // printing strings and hexadecimal values to stdout.
15 // Choose which implementation you wish to test... but leave only one on!
16 // (only the first one will be executed).
21 //--------------------------------------------------------------------------
24 // Set HOST_DEBUG to 1 if you are going to compile this for a host
25 // machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
26 // to 0 if you are compiling with the smips-gcc toolchain.
32 // Set PREALLOCATE to 1 if you want to preallocate the benchmark
33 // function before starting stats. If you have instruction/data
34 // caches and you don't want to count the overhead of misses, then
35 // you will need to use preallocation.
41 // Set SET_STATS to 1 if you want to carve out the piece that actually
42 // does the computation.
48 //--------------------------------------------------------------------------
49 // Platform Specific Includes
55 void printstr(const char*);
60 //--------------------------------------------------------------------------
61 // Input/Reference Data
63 //#include "dataset_test.h"
66 //--------------------------------------------------------------------------
69 int verify( int n
, float test
[], float correct
[] )
72 for ( i
= 0; i
< n
; i
++ ) {
73 // if ( test[i] != correct[i] ) {
74 if ( test
[i
] > 1.02*correct
[i
]
75 || test
[i
] < 0.98*correct
[i
]) {
77 printf(" test[%d] : %3.2f\n", i
, test
[i
]);
78 printf(" corr[%d] : %3.2f\n", i
, correct
[i
]);
80 // tell us which index fails + 2
81 // (so that if i==0,i==1 fails, we don't
82 // think it was a 'not-finished yet' or pass)
91 void printArray( char name
[], int n
, float arr
[] )
94 printf( " %10s :", name
);
95 for ( i
= 0; i
< n
; i
++ )
96 printf( " %03.2f ", arr
[i
] );
102 void finishTest( int correct
, long long num_cycles
, long long num_retired
)
104 int toHostValue
= correct
;
106 if ( toHostValue
== 1 )
107 printf( "*** PASSED ***\n" );
109 printf( "*** FAILED *** (tohost = %d)\n", toHostValue
);
112 // we no longer run in -testrun mode, which means we can't use
113 // the tohost register to communicate "test is done" and "test results"
114 // so instead we will communicate through print* functions!
117 printstr( "*** PASSED *** (num_cycles = 0x" );
118 printhex(num_cycles
);
119 printstr( ", num_inst_retired = 0x");
120 printhex(num_retired
);
125 printstr( "*** FAILED *** (num_cycles = 0x");
126 printhex(num_cycles
);
127 printstr( ", num_inst_retired = 0x");
128 printhex(num_retired
);
136 // deprecated - cr10/stats-enable register no longer exists
137 void setStats( int enable
)
139 #if ( !HOST_DEBUG && SET_STATS )
140 asm( "mtpcr %0, cr10" : : "r" (enable
) );
144 long long getCycles()
146 long long cycles
= 1337;
147 #if ( !HOST_DEBUG && SET_STATS )
148 __asm__
__volatile__( "rdcycle %0" : "=r" (cycles
) );
153 long long getInstRetired()
155 long long inst_retired
= 1338;
156 #if ( !HOST_DEBUG && SET_STATS )
157 __asm__
__volatile__( "rdinstret %0" : "=r" (inst_retired
) );
162 //--------------------------------------------------------------------------
165 // scalar C implementation
166 void vvadd( int n
, float a
[], float b
[], float c
[] )
169 for ( i
= 0; i
< n
; i
++ )
173 // assembly implementations can be found in *_asm.S
175 //--------------------------------------------------------------------------
178 int main( int argc
, char* argv
[] )
180 float results_data
[DATA_SIZE
];
181 long long start_cycles
= 0;
182 long long stop_cycles
= 0;
183 long long num_cycles
;
184 long long start_retired
= 0;
185 long long stop_retired
= 0;
186 long long num_retired
;
188 // Output the input array
191 printArray( "input1", DATA_SIZE
, input1_data
);
192 printArray( "input2", DATA_SIZE
, input2_data
);
193 printArray( "verify", DATA_SIZE
, verify_data
);
196 // --------------------------------------------------
197 // If needed we preallocate everything in the caches
202 vvadd( DATA_SIZE
, input1_data
, input2_data
, results_data
);
205 scalar_vvadd_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
208 vt_vvadd_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
215 // --------------------------------------------------
217 start_cycles
= getCycles();
218 start_retired
= getInstRetired();
221 vvadd( DATA_SIZE
, input1_data
, input2_data
, results_data
);
225 scalar_vvadd_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
230 vt_vvadd_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
236 stop_cycles
= getCycles();
237 stop_retired
= getInstRetired();
238 num_cycles
= stop_cycles
- start_cycles
;
239 num_retired
= stop_retired
- start_retired
;
241 // printstr("stop_cycles: "); printhex(stop_cycles); printstr("\n");
242 // printstr("star_cycles: "); printhex(start_cycles); printstr("\n");
244 // --------------------------------------------------
245 // Print out the results
248 printArray( "results", DATA_SIZE
, results_data
);
251 // --------------------------------------------------
253 int correct
= verify( DATA_SIZE
, results_data
, verify_data
);
254 finishTest(correct
, num_cycles
, num_retired
);