src/mesa/math/m_debug_util.h

   1 /* $Id: m_debug_util.h,v 1.4 2001/05/23 14:27:03 brianp Exp $ */
   2
   3 /*
   4  * Mesa 3-D graphics library
   5  * Version:  3.5
   6  *
   7  * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
   8  *
   9  * Permission is hereby granted, free of charge, to any person obtaining a
  10  * copy of this software and associated documentation files (the "Software"),
  11  * to deal in the Software without restriction, including without limitation
  12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13  * and/or sell copies of the Software, and to permit persons to whom the
  14  * Software is furnished to do so, subject to the following conditions:
  15  *
  16  * The above copyright notice and this permission notice shall be included
  17  * in all copies or substantial portions of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  22  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  23  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  24  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  * Authors:
  27  *    Gareth Hughes <gareth@valinux.com>
  28  */
  29
  30 #ifndef __M_DEBUG_UTIL_H__
  31 #define __M_DEBUG_UTIL_H__
  32
  33
  34 #ifdef DEBUG  /* This code only used for debugging */
  35
  36
  37 /* Comment this out to deactivate the cycle counter.
  38  * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher)
  39  * (hope, you don't try to debug Mesa on a 386 ;)
  40  */
  41 #if defined(__GNUC__) && \
  42     ((defined(__i386__) && defined(USE_X86_ASM)) || \
  43      (defined(__sparc__) && defined(USE_SPARC_ASM)))
  44 #define  RUN_DEBUG_BENCHMARK
  45 #endif
  46
  47 #define TEST_COUNT              128     /* size of the tested vector array   */
  48
  49 #define REQUIRED_PRECISION      10      /* allow 4 bits to miss              */
  50 #define MAX_PRECISION           24      /* max. precision possible           */
  51
  52
  53 #ifdef  RUN_DEBUG_BENCHMARK
  54 /* Overhead of profiling counter in cycles.  Automatically adjusted to
  55  * your machine at run time - counter initialization should give very
  56  * consistent results.
  57  */
  58 extern long counter_overhead;
  59
  60 /* This is the value of the environment variable MESA_PROFILE, and is
  61  * used to determine if we should benchmark the functions as well as
  62  * verify their correctness.
  63  */
  64 extern char *mesa_profile;
  65
  66 /* Modify the the number of tests if you like.
  67  * We take the minimum of all results, because every error should be
  68  * positive (time used by other processes, task switches etc).
  69  * It is assumed that all calculations are done in the cache.
  70  */
  71
  72 #if defined(__i386__)
  73
  74 #if 1 /* PPro, PII, PIII version */
  75
  76 /* Profiling on the P6 architecture requires a little more work, due to
  77  * the internal out-of-order execution.  We must perform a serializing
  78  * 'cpuid' instruction before and after the 'rdtsc' instructions to make
  79  * sure no other uops are executed when we sample the timestamp counter.
  80  */
  81 #define  INIT_COUNTER()                                                 \
  82    do {                                                                 \
  83       int cycle_i;                                                      \
  84       counter_overhead = LONG_MAX;                                      \
  85       for ( cycle_i = 0 ; cycle_i < 8 ; cycle_i++ ) {                   \
  86          long cycle_tmp1 = 0, cycle_tmp2 = 0;                           \
  87          __asm__ __volatile__ ( "push %%ebx       \n"                   \
  88                                 "xor %%eax, %%eax \n"                   \
  89                                 "cpuid            \n"                   \
  90                                 "rdtsc            \n"                   \
  91                                 "mov %%eax, %0    \n"                   \
  92                                 "xor %%eax, %%eax \n"                   \
  93                                 "cpuid            \n"                   \
  94                                 "pop %%ebx        \n"                   \
  95                                 "push %%ebx       \n"                   \
  96                                 "xor %%eax, %%eax \n"                   \
  97                                 "cpuid            \n"                   \
  98                                 "rdtsc            \n"                   \
  99                                 "mov %%eax, %1    \n"                   \
 100                                 "xor %%eax, %%eax \n"                   \
 101                                 "cpuid            \n"                   \
 102                                 "pop %%ebx        \n"                   \
 103                                 : "=m" (cycle_tmp1), "=m" (cycle_tmp2)  \
 104                                 : : "eax", "ecx", "edx" );              \
 105          if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) {          \
 106             counter_overhead = cycle_tmp2 - cycle_tmp1;                 \
 107          }                                                              \
 108       }                                                                 \
 109    } while (0)
 110
 111 #define  BEGIN_RACE(x)                                                  \
 112    x = LONG_MAX;                                                        \
 113    for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) {                     \
 114       long cycle_tmp1 = 0, cycle_tmp2 = 0;                              \
 115       __asm__ __volatile__ ( "push %%ebx       \n"                      \
 116                              "xor %%eax, %%eax \n"                      \
 117                              "cpuid            \n"                      \
 118                              "rdtsc            \n"                      \
 119                              "mov %%eax, %0    \n"                      \
 120                              "xor %%eax, %%eax \n"                      \
 121                              "cpuid            \n"                      \
 122                              "pop %%ebx        \n"                      \
 123                              : "=m" (cycle_tmp1)                        \
 124                              : : "eax", "ecx", "edx" );
 125
 126 #define END_RACE(x)                                                     \
 127       __asm__ __volatile__ ( "push %%ebx       \n"                      \
 128                              "xor %%eax, %%eax \n"                      \
 129                              "cpuid            \n"                      \
 130                              "rdtsc            \n"                      \
 131                              "mov %%eax, %0    \n"                      \
 132                              "xor %%eax, %%eax \n"                      \
 133                              "cpuid            \n"                      \
 134                              "pop %%ebx        \n"                      \
 135                              : "=m" (cycle_tmp2)                        \
 136                              : : "eax", "ecx", "edx" );                 \
 137       if ( x > (cycle_tmp2 - cycle_tmp1) ) {                            \
 138          x = cycle_tmp2 - cycle_tmp1;                                   \
 139       }                                                                 \
 140    }                                                                    \
 141    x -= counter_overhead;
 142
 143 #else /* PPlain, PMMX version */
 144
 145 /* To ensure accurate results, we stall the pipelines with the
 146  * non-pairable 'cdq' instruction.  This ensures all the code being
 147  * profiled is complete when the 'rdtsc' instruction executes.
 148  */
 149 #define  INIT_COUNTER(x)                                                \
 150    do {                                                                 \
 151       int cycle_i;                                                      \
 152       x = LONG_MAX;                                                     \
 153       for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) {                  \
 154          long cycle_tmp1, cycle_tmp2, dummy;                            \
 155          __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) );               \
 156          __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) );               \
 157          __asm__ ( "cdq" );                                             \
 158          __asm__ ( "cdq" );                                             \
 159          __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );         \
 160          __asm__ ( "cdq" );                                             \
 161          __asm__ ( "cdq" );                                             \
 162          __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) );         \
 163          if ( x > (cycle_tmp2 - cycle_tmp1) )                           \
 164             x = cycle_tmp2 - cycle_tmp1;                                \
 165       }                                                                 \
 166    } while (0)
 167
 168 #define  BEGIN_RACE(x)                                                  \
 169    x = LONG_MAX;                                                        \
 170    for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) {                     \
 171       long cycle_tmp1, cycle_tmp2, dummy;                               \
 172       __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) );                  \
 173       __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) );                  \
 174       __asm__ ( "cdq" );                                                \
 175       __asm__ ( "cdq" );                                                \
 176       __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );
 177
 178
 179 #define END_RACE(x)                                                     \
 180       __asm__ ( "cdq" );                                                \
 181       __asm__ ( "cdq" );                                                \
 182       __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) );            \
 183       if ( x > (cycle_tmp2 - cycle_tmp1) )                              \
 184          x = cycle_tmp2 - cycle_tmp1;                                   \
 185    }                                                                    \
 186    x -= counter_overhead;
 187
 188 #endif
 189
 190 #elif defined(__sparc__)
 191
 192 #define  INIT_COUNTER() \
 193          do { counter_overhead = 5; } while(0)
 194
 195 #define  BEGIN_RACE(x)                                                        \
 196 x = LONG_MAX;                                                                 \
 197 for (cycle_i = 0; cycle_i <10; cycle_i++) {                                   \
 198    register long cycle_tmp1 asm("l0");                                        \
 199    register long cycle_tmp2 asm("l1");                                        \
 200    /* rd %tick, %l0 */                                                        \
 201    __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1));  /*  save timestamp   */
 202
 203 #define END_RACE(x)                                                           \
 204    /* rd %tick, %l1 */                                                        \
 205    __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2));             \
 206    if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1;              \
 207 }                                                                             \
 208 x -= counter_overhead;
 209
 210 #else
 211 #error Your processor is not supported for RUN_XFORM_BENCHMARK
 212 #endif
 213
 214 #else
 215
 216 #define BEGIN_RACE(x)
 217 #define END_RACE(x)
 218
 219 #endif
 220
 221
 222 /* =============================================================
 223  * Helper functions
 224  */
 225
 226 static GLfloat rnd( void )
 227 {
 228    GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX;
 229    GLfloat gran = (GLfloat)(1 << 13);
 230
 231    f = (GLfloat)(GLint)(f * gran) / gran;
 232
 233    return f * 2.0 - 1.0;
 234 }
 235
 236 static int significand_match( GLfloat a, GLfloat b )
 237 {
 238    GLfloat d = a - b;
 239    int a_ex, b_ex, d_ex;
 240
 241    if ( d == 0.0F ) {
 242       return MAX_PRECISION;   /* Exact match */
 243    }
 244
 245    if ( a == 0.0F || b == 0.0F ) {
 246       /* It would probably be better to check if the
 247        * non-zero number is denormalized and return
 248        * the index of the highest set bit here.
 249        */
 250       return 0;
 251    }
 252
 253    frexp( a, &a_ex );
 254    frexp( b, &b_ex );
 255    frexp( d, &d_ex );
 256
 257    if ( a_ex < b_ex ) {
 258       return a_ex - d_ex;
 259    } else {
 260       return b_ex - d_ex;
 261    }
 262 }
 263
 264 enum { NIL = 0, ONE = 1, NEG = -1, VAR = 2 };
 265
 266 static void init_matrix( GLfloat *m )
 267 {
 268    m[0] = 63.0; m[4] = 43.0; m[ 8] = 29.0; m[12] = 43.0;
 269    m[1] = 55.0; m[5] = 17.0; m[ 9] = 31.0; m[13] =  7.0;
 270    m[2] = 44.0; m[6] =  9.0; m[10] =  7.0; m[14] =  3.0;
 271    m[3] = 11.0; m[7] = 23.0; m[11] = 91.0; m[15] =  9.0;
 272 }
 273
 274
 275 /* Ensure our arrays are correctly aligned.
 276  */
 277 #if defined(__GNUC__)
 278 #  define ALIGN16       __attribute__ ((aligned (16)))
 279 #elif defined(__MSC__)
 280 #  define ALIGN16       __declspec(align(16)) /* GH: Does this work? */
 281 #else
 282 #  warning "ALIGN16 will not 16-byte align!\n"
 283 #  define ALIGN16
 284 #endif
 285
 286
 287 #endif /* DEBUG */
 288
 289 #endif /* __M_DEBUG_UTIL_H__ */