1 // See LICENSE for license details.
3 //**************************************************************************
4 // Vector-Thread Vector Matrix Multiply benchmark
5 //--------------------------------------------------------------------------
7 // This benchmark multiplies two 2-D arrays together and writes the results to
8 // a third vector. The input data (and reference data) should be generated
9 // using the matmul_gendata.pl perl script and dumped to a file named
14 // Choose which implementation you wish to test... but leave only one on!
15 // (only the first one will be executed).
20 //--------------------------------------------------------------------------
21 // Input/Reference Data
23 //#include "dataset_test.h"
26 //--------------------------------------------------------------------------
29 int verify( int n
, float test
[], float correct
[] )
32 for ( i
= 0; i
< n
; i
++ ) {
33 if ( test
[i
] > 1.02*correct
[i
]
34 || test
[i
] < 0.98*correct
[i
]) {
36 printf(" test[%d] : %3.2f\n", i
, test
[i
]);
37 printf(" corr[%d] : %3.2f\n", i
, correct
[i
]);
39 // tell us which index fails + 2
40 // (so that if i==0,i==1 fails, we don't
41 // think it was a 'not-finished yet' or pass)
48 void finishTest( int correct
, long long num_cycles
, long long num_retired
)
50 int toHostValue
= correct
;
52 if ( toHostValue
== 1 )
53 printf( "*** PASSED ***\n" );
55 printf( "*** FAILED *** (tohost = %d)\n", toHostValue
);
58 // we no longer run in -testrun mode, which means we can't use
59 // the tohost register to communicate "test is done" and "test results"
60 // so instead we will communicate through print* functions!
63 printstr( "*** PASSED *** (num_cycles = 0x" );
65 printstr( ", num_inst_retired = 0x");
66 printhex(num_retired
);
71 printstr( "*** FAILED *** (num_cycles = 0x");
73 printstr( ", num_inst_retired = 0x");
74 printhex(num_retired
);
82 // deprecated - cr10/stats-enable register no longer exists
83 void setStats( int enable
)
85 #if ( !HOST_DEBUG && SET_STATS )
86 asm( "mtpcr %0, cr10" : : "r" (enable
) );
92 long long cycles
= 1337;
93 #if ( !HOST_DEBUG && SET_STATS )
94 __asm__
__volatile__( "rdcycle %0" : "=r" (cycles
) );
99 long long getInstRetired()
101 long long inst_retired
= 1338;
102 #if ( !HOST_DEBUG && SET_STATS )
103 __asm__
__volatile__( "rdinstret %0" : "=r" (inst_retired
) );
108 //--------------------------------------------------------------------------
111 // scalar C implementation
112 void matmul(const int lda
, const float A
[], const float B
[], float C
[] )
116 for ( j
= 0; j
< lda
; j
++ )
117 for ( i
= 0; i
< lda
; i
++ )
119 float cij
= C
[i
+ j
*lda
];
120 for ( k
= 0; k
< lda
; k
++ )
122 cij
+= A
[j
*lda
+ k
] * B
[k
*lda
+ i
];
129 // assembly implementations can be found in *_asm.S
131 //--------------------------------------------------------------------------
134 int main( int argc
, char* argv
[] )
137 long long start_cycles
= 0;
138 long long stop_cycles
= 0;
139 long long num_cycles
;
140 long long start_retired
= 0;
141 long long stop_retired
= 0;
142 long long num_retired
;
144 float results_data
[ARRAY_SIZE
];
145 for ( i
= 0; i
< DIM_SIZE
; i
++ )
146 for ( j
= 0; j
< DIM_SIZE
; j
++ )
147 results_data
[i
+ j
*DIM_SIZE
] = 0.0f
;
149 // Output the input array
152 printArray( "input1", ARRAY_SIZE
, input1_data
);
153 printArray( "input2", ARRAY_SIZE
, input2_data
);
154 printArray( "verify", ARRAY_SIZE
, verify_data
);
155 printArray( "results", ARRAY_SIZE
, results_data
);
158 // --------------------------------------------------
159 // If needed we preallocate everything in the caches
167 // --------------------------------------------------
169 start_cycles
= getCycles();
170 start_retired
= getInstRetired();
173 matmul( DIM_SIZE
, input1_data
, input2_data
, results_data
);
177 scalar_matmul_asm( DIM_SIZE
, input1_data
, input2_data
, results_data
);
182 vt_matmul_asm( DIM_SIZE
, input1_data
, input2_data
, results_data
);
189 stop_cycles
= getCycles();
190 stop_retired
= getInstRetired();
191 num_cycles
= stop_cycles
- start_cycles
;
192 num_retired
= stop_retired
- start_retired
;
195 // --------------------------------------------------
196 // Print out the results
199 printArray( "results", ARRAY_SIZE
, results_data
);
203 // --------------------------------------------------
205 int correct
= verify( ARRAY_SIZE
, results_data
, verify_data
);
206 finishTest(correct
, num_cycles
, num_retired
);