2 * Mesa 3-D graphics library
4 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
28 #ifndef __M_DEBUG_UTIL_H__
29 #define __M_DEBUG_UTIL_H__
32 #ifdef DEBUG_MATH /* This code only used for debugging */
38 /* Comment this out to deactivate the cycle counter.
39 * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher)
40 * (hope, you don't try to debug Mesa on a 386 ;)
42 #if defined(__GNUC__) && \
43 ((defined(__i386__) && defined(USE_X86_ASM)) || \
44 (defined(__sparc__) && defined(USE_SPARC_ASM)))
45 #define RUN_DEBUG_BENCHMARK
48 #define TEST_COUNT 128 /* size of the tested vector array */
50 #define REQUIRED_PRECISION 10 /* allow 4 bits to miss */
51 #define MAX_PRECISION 24 /* max. precision possible */
54 #ifdef RUN_DEBUG_BENCHMARK
55 /* Overhead of profiling counter in cycles. Automatically adjusted to
56 * your machine at run time - counter initialization should give very
59 extern long counter_overhead
;
61 /* This is the value of the environment variable MESA_PROFILE, and is
62 * used to determine if we should benchmark the functions as well as
63 * verify their correctness.
65 extern char *mesa_profile
;
67 /* Modify the number of tests if you like.
68 * We take the minimum of all results, because every error should be
69 * positive (time used by other processes, task switches etc).
70 * It is assumed that all calculations are done in the cache.
75 #if 1 /* PPro, PII, PIII version */
77 /* Profiling on the P6 architecture requires a little more work, due to
78 * the internal out-of-order execution. We must perform a serializing
79 * 'cpuid' instruction before and after the 'rdtsc' instructions to make
80 * sure no other uops are executed when we sample the timestamp counter.
82 #define INIT_COUNTER() \
85 counter_overhead = LONG_MAX; \
86 for ( cycle_i = 0 ; cycle_i < 8 ; cycle_i++ ) { \
87 long cycle_tmp1 = 0, cycle_tmp2 = 0; \
88 __asm__ __volatile__ ( "push %%ebx \n" \
89 "xor %%eax, %%eax \n" \
93 "xor %%eax, %%eax \n" \
97 "xor %%eax, %%eax \n" \
101 "xor %%eax, %%eax \n" \
104 : "=m" (cycle_tmp1), "=m" (cycle_tmp2) \
105 : : "eax", "ecx", "edx" ); \
106 if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \
107 counter_overhead = cycle_tmp2 - cycle_tmp1; \
112 #define BEGIN_RACE(x) \
114 for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \
115 long cycle_tmp1 = 0, cycle_tmp2 = 0; \
116 __asm__ __volatile__ ( "push %%ebx \n" \
117 "xor %%eax, %%eax \n" \
121 "xor %%eax, %%eax \n" \
124 : "=m" (cycle_tmp1) \
125 : : "eax", "ecx", "edx" );
127 #define END_RACE(x) \
128 __asm__ __volatile__ ( "push %%ebx \n" \
129 "xor %%eax, %%eax \n" \
133 "xor %%eax, %%eax \n" \
136 : "=m" (cycle_tmp2) \
137 : : "eax", "ecx", "edx" ); \
138 if ( x > (cycle_tmp2 - cycle_tmp1) ) { \
139 x = cycle_tmp2 - cycle_tmp1; \
142 x -= counter_overhead;
144 #else /* PPlain, PMMX version */
146 /* To ensure accurate results, we stall the pipelines with the
147 * non-pairable 'cdq' instruction. This ensures all the code being
148 * profiled is complete when the 'rdtsc' instruction executes.
150 #define INIT_COUNTER(x) \
154 for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) { \
155 long cycle_tmp1, cycle_tmp2, dummy; \
156 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \
157 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \
160 __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); \
163 __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \
164 if ( x > (cycle_tmp2 - cycle_tmp1) ) \
165 x = cycle_tmp2 - cycle_tmp1; \
169 #define BEGIN_RACE(x) \
171 for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \
172 long cycle_tmp1, cycle_tmp2, dummy; \
173 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \
174 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \
177 __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );
180 #define END_RACE(x) \
183 __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \
184 if ( x > (cycle_tmp2 - cycle_tmp1) ) \
185 x = cycle_tmp2 - cycle_tmp1; \
187 x -= counter_overhead;
191 #elif defined(__x86_64__)
193 #define rdtscll(val) do { \
195 __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); \
196 (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
199 /* Copied from i386 PIII version */
200 #define INIT_COUNTER() \
203 counter_overhead = LONG_MAX; \
204 for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \
205 unsigned long cycle_tmp1, cycle_tmp2; \
206 rdtscll(cycle_tmp1); \
207 rdtscll(cycle_tmp2); \
208 if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \
209 counter_overhead = cycle_tmp2 - cycle_tmp1; \
215 #define BEGIN_RACE(x) \
217 for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \
218 unsigned long cycle_tmp1, cycle_tmp2; \
219 rdtscll(cycle_tmp1); \
221 #define END_RACE(x) \
222 rdtscll(cycle_tmp2); \
223 if ( x > (cycle_tmp2 - cycle_tmp1) ) { \
224 x = cycle_tmp2 - cycle_tmp1; \
227 x -= counter_overhead;
229 #elif defined(__sparc__)
231 #define INIT_COUNTER() \
232 do { counter_overhead = 5; } while(0)
234 #define BEGIN_RACE(x) \
236 for (cycle_i = 0; cycle_i <10; cycle_i++) { \
237 register long cycle_tmp1 __asm__("l0"); \
238 register long cycle_tmp2 __asm__("l1"); \
239 /* rd %tick, %l0 */ \
240 __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1)); /* save timestamp */
242 #define END_RACE(x) \
243 /* rd %tick, %l1 */ \
244 __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2)); \
245 if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1; \
247 x -= counter_overhead;
250 #error Your processor is not supported for RUN_XFORM_BENCHMARK
255 #define BEGIN_RACE(x)
261 /* =============================================================
265 static GLfloat
rnd( void )
267 GLfloat f
= (GLfloat
)rand() / (GLfloat
)RAND_MAX
;
268 GLfloat gran
= (GLfloat
)(1 << 13);
270 f
= (GLfloat
)(GLint
)(f
* gran
) / gran
;
272 return f
* 2.0 - 1.0;
275 static int significand_match( GLfloat a
, GLfloat b
)
278 int a_ex
, b_ex
, d_ex
;
281 return MAX_PRECISION
; /* Exact match */
284 if ( a
== 0.0F
|| b
== 0.0F
) {
285 /* It would probably be better to check if the
286 * non-zero number is denormalized and return
287 * the index of the highest set bit here.
303 enum { NIL
= 0, ONE
= 1, NEG
= -1, VAR
= 2 };
305 /* Ensure our arrays are correctly aligned.
307 #if defined(__GNUC__)
308 # define ALIGN16(type, array) type array __attribute__ ((aligned (16)))
309 #elif defined(_MSC_VER)
310 # define ALIGN16(type, array) type array __declspec(align(16)) /* GH: Does this work? */
311 #elif defined(__xlC__)
312 # define ALIGN16(type, array) type __align (16) array
314 # warning "ALIGN16 will not 16-byte align!\n"
319 #endif /* DEBUG_MATH */
321 #endif /* __M_DEBUG_UTIL_H__ */