src/util/u_math.h

   1 /**************************************************************************
   2  *
   3  * Copyright 2008 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 /**
  30  * Math utilities and approximations for common math functions.
  31  * Reduced precision is usually acceptable in shaders...
  32  *
  33  * "fast" is used in the names of functions which are low-precision,
  34  * or at least lower-precision than the normal C lib functions.
  35  */
  36
  37
  38 #ifndef U_MATH_H
  39 #define U_MATH_H
  40
  41
  42 #include "c99_math.h"
  43 #include <assert.h>
  44 #include <float.h>
  45 #include <stdarg.h>
  46
  47 #include "bitscan.h"
  48
  49 #ifdef __cplusplus
  50 extern "C" {
  51 #endif
  52
  53
  54 #ifndef M_SQRT2
  55 #define M_SQRT2 1.41421356237309504880
  56 #endif
  57
  58 #define POW2_TABLE_SIZE_LOG2 9
  59 #define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2)
  60 #define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2)
  61 #define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2))
  62 extern float pow2_table[POW2_TABLE_SIZE];
  63
  64
  65 /**
  66  * Initialize math module.  This should be called before using any
  67  * other functions in this module.
  68  */
  69 extern void
  70 util_init_math(void);
  71
  72
  73 union fi {
  74    float f;
  75    int32_t i;
  76    uint32_t ui;
  77 };
  78
  79
  80 union di {
  81    double d;
  82    int64_t i;
  83    uint64_t ui;
  84 };
  85
  86
  87 /**
  88  * Extract the IEEE float32 exponent.
  89  */
  90 static inline signed
  91 util_get_float32_exponent(float x)
  92 {
  93    union fi f;
  94
  95    f.f = x;
  96
  97    return ((f.ui >> 23) & 0xff) - 127;
  98 }
  99
 100
 101 /**
 102  * Fast version of 2^x
 103  * Identity: exp2(a + b) = exp2(a) * exp2(b)
 104  * Let ipart = int(x)
 105  * Let fpart = x - ipart;
 106  * So, exp2(x) = exp2(ipart) * exp2(fpart)
 107  * Compute exp2(ipart) with i << ipart
 108  * Compute exp2(fpart) with lookup table.
 109  */
 110 static inline float
 111 util_fast_exp2(float x)
 112 {
 113    int32_t ipart;
 114    float fpart, mpart;
 115    union fi epart;
 116
 117    if(x > 129.00000f)
 118       return 3.402823466e+38f;
 119
 120    if (x < -126.99999f)
 121       return 0.0f;
 122
 123    ipart = (int32_t) x;
 124    fpart = x - (float) ipart;
 125
 126    /* same as
 127     *   epart.f = (float) (1 << ipart)
 128     * but faster and without integer overflow for ipart > 31
 129     */
 130    epart.i = (ipart + 127 ) << 23;
 131
 132    mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)];
 133
 134    return epart.f * mpart;
 135 }
 136
 137
 138 /**
 139  * Fast approximation to exp(x).
 140  */
 141 static inline float
 142 util_fast_exp(float x)
 143 {
 144    const float k = 1.44269f; /* = log2(e) */
 145    return util_fast_exp2(k * x);
 146 }
 147
 148
 149 #define LOG2_TABLE_SIZE_LOG2 16
 150 #define LOG2_TABLE_SCALE (1 << LOG2_TABLE_SIZE_LOG2)
 151 #define LOG2_TABLE_SIZE (LOG2_TABLE_SCALE + 1)
 152 extern float log2_table[LOG2_TABLE_SIZE];
 153
 154
 155 /**
 156  * Fast approximation to log2(x).
 157  */
 158 static inline float
 159 util_fast_log2(float x)
 160 {
 161    union fi num;
 162    float epart, mpart;
 163    num.f = x;
 164    epart = (float)(((num.i & 0x7f800000) >> 23) - 127);
 165    /* mpart = log2_table[mantissa*LOG2_TABLE_SCALE + 0.5] */
 166    mpart = log2_table[((num.i & 0x007fffff) + (1 << (22 - LOG2_TABLE_SIZE_LOG2))) >> (23 - LOG2_TABLE_SIZE_LOG2)];
 167    return epart + mpart;
 168 }
 169
 170
 171 /**
 172  * Fast approximation to x^y.
 173  */
 174 static inline float
 175 util_fast_pow(float x, float y)
 176 {
 177    return util_fast_exp2(util_fast_log2(x) * y);
 178 }
 179
 180
 181 /**
 182  * Floor(x), returned as int.
 183  */
 184 static inline int
 185 util_ifloor(float f)
 186 {
 187    int ai, bi;
 188    double af, bf;
 189    union fi u;
 190    af = (3 << 22) + 0.5 + (double) f;
 191    bf = (3 << 22) + 0.5 - (double) f;
 192    u.f = (float) af;  ai = u.i;
 193    u.f = (float) bf;  bi = u.i;
 194    return (ai - bi) >> 1;
 195 }
 196
 197
 198 /**
 199  * Round float to nearest int.
 200  */
 201 static inline int
 202 util_iround(float f)
 203 {
 204 #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
 205    int r;
 206    __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
 207    return r;
 208 #elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
 209    int r;
 210    _asm {
 211       fld f
 212       fistp r
 213    }
 214    return r;
 215 #else
 216    if (f >= 0.0f)
 217       return (int) (f + 0.5f);
 218    else
 219       return (int) (f - 0.5f);
 220 #endif
 221 }
 222
 223
 224 /**
 225  * Approximate floating point comparison
 226  */
 227 static inline bool
 228 util_is_approx(float a, float b, float tol)
 229 {
 230    return fabsf(b - a) <= tol;
 231 }
 232
 233
 234 /**
 235  * util_is_X_inf_or_nan = test if x is NaN or +/- Inf
 236  * util_is_X_nan        = test if x is NaN
 237  * util_X_inf_sign      = return +1 for +Inf, -1 for -Inf, or 0 for not Inf
 238  *
 239  * NaN can be checked with x != x, however this fails with the fast math flag
 240  **/
 241
 242
 243 /**
 244  * Single-float
 245  */
 246 static inline bool
 247 util_is_inf_or_nan(float x)
 248 {
 249    union fi tmp;
 250    tmp.f = x;
 251    return (tmp.ui & 0x7f800000) == 0x7f800000;
 252 }
 253
 254
 255 static inline bool
 256 util_is_nan(float x)
 257 {
 258    union fi tmp;
 259    tmp.f = x;
 260    return (tmp.ui & 0x7fffffff) > 0x7f800000;
 261 }
 262
 263
 264 static inline int
 265 util_inf_sign(float x)
 266 {
 267    union fi tmp;
 268    tmp.f = x;
 269    if ((tmp.ui & 0x7fffffff) != 0x7f800000) {
 270       return 0;
 271    }
 272
 273    return (x < 0) ? -1 : 1;
 274 }
 275
 276
 277 /**
 278  * Double-float
 279  */
 280 static inline bool
 281 util_is_double_inf_or_nan(double x)
 282 {
 283    union di tmp;
 284    tmp.d = x;
 285    return (tmp.ui & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL;
 286 }
 287
 288
 289 static inline bool
 290 util_is_double_nan(double x)
 291 {
 292    union di tmp;
 293    tmp.d = x;
 294    return (tmp.ui & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL;
 295 }
 296
 297
 298 static inline int
 299 util_double_inf_sign(double x)
 300 {
 301    union di tmp;
 302    tmp.d = x;
 303    if ((tmp.ui & 0x7fffffffffffffffULL) != 0x7ff0000000000000ULL) {
 304       return 0;
 305    }
 306
 307    return (x < 0) ? -1 : 1;
 308 }
 309
 310
 311 /**
 312  * Half-float
 313  */
 314 static inline bool
 315 util_is_half_inf_or_nan(int16_t x)
 316 {
 317    return (x & 0x7c00) == 0x7c00;
 318 }
 319
 320
 321 static inline bool
 322 util_is_half_nan(int16_t x)
 323 {
 324    return (x & 0x7fff) > 0x7c00;
 325 }
 326
 327
 328 static inline int
 329 util_half_inf_sign(int16_t x)
 330 {
 331    if ((x & 0x7fff) != 0x7c00) {
 332       return 0;
 333    }
 334
 335    return (x < 0) ? -1 : 1;
 336 }
 337
 338
 339 /**
 340  * Return float bits.
 341  */
 342 static inline unsigned
 343 fui( float f )
 344 {
 345    union fi fi;
 346    fi.f = f;
 347    return fi.ui;
 348 }
 349
 350 static inline float
 351 uif(uint32_t ui)
 352 {
 353    union fi fi;
 354    fi.ui = ui;
 355    return fi.f;
 356 }
 357
 358
 359 /**
 360  * Convert uint8_t to float in [0, 1].
 361  */
 362 static inline float
 363 ubyte_to_float(uint8_t ub)
 364 {
 365    return (float) ub * (1.0f / 255.0f);
 366 }
 367
 368
 369 /**
 370  * Convert float in [0,1] to uint8_t in [0,255] with clamping.
 371  */
 372 static inline uint8_t
 373 float_to_ubyte(float f)
 374 {
 375    /* return 0 for NaN too */
 376    if (!(f > 0.0f)) {
 377       return (uint8_t) 0;
 378    }
 379    else if (f >= 1.0f) {
 380       return (uint8_t) 255;
 381    }
 382    else {
 383       union fi tmp;
 384       tmp.f = f;
 385       tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f;
 386       return (uint8_t) tmp.i;
 387    }
 388 }
 389
 390 /**
 391  * Convert uint16_t to float in [0, 1].
 392  */
 393 static inline float
 394 ushort_to_float(uint16_t us)
 395 {
 396    return (float) us * (1.0f / 65535.0f);
 397 }
 398
 399
 400 /**
 401  * Convert float in [0,1] to uint16_t in [0,65535] with clamping.
 402  */
 403 static inline uint16_t
 404 float_to_ushort(float f)
 405 {
 406    /* return 0 for NaN too */
 407    if (!(f > 0.0f)) {
 408       return (uint16_t) 0;
 409    }
 410    else if (f >= 1.0f) {
 411       return (uint16_t) 65535;
 412    }
 413    else {
 414       union fi tmp;
 415       tmp.f = f;
 416       tmp.f = tmp.f * (65535.0f/65536.0f) + 128.0f;
 417       return (uint16_t) tmp.i;
 418    }
 419 }
 420
 421 static inline float
 422 byte_to_float_tex(int8_t b)
 423 {
 424    return (b == -128) ? -1.0F : b * 1.0F / 127.0F;
 425 }
 426
 427 static inline int8_t
 428 float_to_byte_tex(float f)
 429 {
 430    return (int8_t) (127.0F * f);
 431 }
 432
 433 /**
 434  * Calc log base 2
 435  */
 436 static inline unsigned
 437 util_logbase2(unsigned n)
 438 {
 439 #if defined(HAVE___BUILTIN_CLZ)
 440    return ((sizeof(unsigned) * 8 - 1) - __builtin_clz(n | 1));
 441 #else
 442    unsigned pos = 0;
 443    if (n >= 1<<16) { n >>= 16; pos += 16; }
 444    if (n >= 1<< 8) { n >>=  8; pos +=  8; }
 445    if (n >= 1<< 4) { n >>=  4; pos +=  4; }
 446    if (n >= 1<< 2) { n >>=  2; pos +=  2; }
 447    if (n >= 1<< 1) {           pos +=  1; }
 448    return pos;
 449 #endif
 450 }
 451
 452 static inline uint64_t
 453 util_logbase2_64(uint64_t n)
 454 {
 455 #if defined(HAVE___BUILTIN_CLZLL)
 456    return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
 457 #else
 458    uint64_t pos = 0ull;
 459    if (n >= 1ull<<32) { n >>= 32; pos += 32; }
 460    if (n >= 1ull<<16) { n >>= 16; pos += 16; }
 461    if (n >= 1ull<< 8) { n >>=  8; pos +=  8; }
 462    if (n >= 1ull<< 4) { n >>=  4; pos +=  4; }
 463    if (n >= 1ull<< 2) { n >>=  2; pos +=  2; }
 464    if (n >= 1ull<< 1) {           pos +=  1; }
 465    return pos;
 466 #endif
 467 }
 468
 469 /**
 470  * Returns the ceiling of log n base 2, and 0 when n == 0. Equivalently,
 471  * returns the smallest x such that n <= 2**x.
 472  */
 473 static inline unsigned
 474 util_logbase2_ceil(unsigned n)
 475 {
 476    if (n <= 1)
 477       return 0;
 478
 479    return 1 + util_logbase2(n - 1);
 480 }
 481
 482 static inline uint64_t
 483 util_logbase2_ceil64(uint64_t n)
 484 {
 485    if (n <= 1)
 486       return 0;
 487
 488    return 1ull + util_logbase2_64(n - 1);
 489 }
 490
 491 /**
 492  * Returns the smallest power of two >= x
 493  */
 494 static inline unsigned
 495 util_next_power_of_two(unsigned x)
 496 {
 497 #if defined(HAVE___BUILTIN_CLZ)
 498    if (x <= 1)
 499        return 1;
 500
 501    return (1 << ((sizeof(unsigned) * 8) - __builtin_clz(x - 1)));
 502 #else
 503    unsigned val = x;
 504
 505    if (x <= 1)
 506       return 1;
 507
 508    if (util_is_power_of_two_or_zero(x))
 509       return x;
 510
 511    val--;
 512    val = (val >> 1) | val;
 513    val = (val >> 2) | val;
 514    val = (val >> 4) | val;
 515    val = (val >> 8) | val;
 516    val = (val >> 16) | val;
 517    val++;
 518    return val;
 519 #endif
 520 }
 521
 522 static inline uint64_t
 523 util_next_power_of_two64(uint64_t x)
 524 {
 525 #if defined(HAVE___BUILTIN_CLZLL)
 526    if (x <= 1)
 527        return 1;
 528
 529    return (1ull << ((sizeof(uint64_t) * 8) - __builtin_clzll(x - 1)));
 530 #else
 531    uint64_t val = x;
 532
 533    if (x <= 1)
 534       return 1;
 535
 536    if (util_is_power_of_two_or_zero64(x))
 537       return x;
 538
 539    val--;
 540    val = (val >> 1)  | val;
 541    val = (val >> 2)  | val;
 542    val = (val >> 4)  | val;
 543    val = (val >> 8)  | val;
 544    val = (val >> 16) | val;
 545    val = (val >> 32) | val;
 546    val++;
 547    return val;
 548 #endif
 549 }
 550
 551 /**
 552  * Reverse bits in n
 553  * Algorithm taken from:
 554  * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer
 555  */
 556 static inline unsigned
 557 util_bitreverse(unsigned n)
 558 {
 559     n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1);
 560     n = ((n >> 2) & 0x33333333u) | ((n & 0x33333333u) << 2);
 561     n = ((n >> 4) & 0x0f0f0f0fu) | ((n & 0x0f0f0f0fu) << 4);
 562     n = ((n >> 8) & 0x00ff00ffu) | ((n & 0x00ff00ffu) << 8);
 563     n = ((n >> 16) & 0xffffu) | ((n & 0xffffu) << 16);
 564     return n;
 565 }
 566
 567 /**
 568  * Convert from little endian to CPU byte order.
 569  */
 570
 571 #ifdef PIPE_ARCH_BIG_ENDIAN
 572 #define util_le64_to_cpu(x) util_bswap64(x)
 573 #define util_le32_to_cpu(x) util_bswap32(x)
 574 #define util_le16_to_cpu(x) util_bswap16(x)
 575 #else
 576 #define util_le64_to_cpu(x) (x)
 577 #define util_le32_to_cpu(x) (x)
 578 #define util_le16_to_cpu(x) (x)
 579 #endif
 580
 581 #define util_cpu_to_le64(x) util_le64_to_cpu(x)
 582 #define util_cpu_to_le32(x) util_le32_to_cpu(x)
 583 #define util_cpu_to_le16(x) util_le16_to_cpu(x)
 584
 585 /**
 586  * Reverse byte order of a 32 bit word.
 587  */
 588 static inline uint32_t
 589 util_bswap32(uint32_t n)
 590 {
 591 #if defined(HAVE___BUILTIN_BSWAP32)
 592    return __builtin_bswap32(n);
 593 #else
 594    return (n >> 24) |
 595           ((n >> 8) & 0x0000ff00) |
 596           ((n << 8) & 0x00ff0000) |
 597           (n << 24);
 598 #endif
 599 }
 600
 601 /**
 602  * Reverse byte order of a 64bit word.
 603  */
 604 static inline uint64_t
 605 util_bswap64(uint64_t n)
 606 {
 607 #if defined(HAVE___BUILTIN_BSWAP64)
 608    return __builtin_bswap64(n);
 609 #else
 610    return ((uint64_t)util_bswap32((uint32_t)n) << 32) |
 611           util_bswap32((n >> 32));
 612 #endif
 613 }
 614
 615
 616 /**
 617  * Reverse byte order of a 16 bit word.
 618  */
 619 static inline uint16_t
 620 util_bswap16(uint16_t n)
 621 {
 622    return (n >> 8) |
 623           (n << 8);
 624 }
 625
 626 static inline void*
 627 util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n)
 628 {
 629 #ifdef PIPE_ARCH_BIG_ENDIAN
 630    size_t i, e;
 631    assert(n % 4 == 0);
 632
 633    for (i = 0, e = n / 4; i < e; i++) {
 634       uint32_t * restrict d = (uint32_t* restrict)dest;
 635       const uint32_t * restrict s = (const uint32_t* restrict)src;
 636       d[i] = util_bswap32(s[i]);
 637    }
 638    return dest;
 639 #else
 640    return memcpy(dest, src, n);
 641 #endif
 642 }
 643
 644 /**
 645  * Clamp X to [MIN, MAX].
 646  * This is a macro to allow float, int, uint, etc. types.
 647  * We arbitrarily turn NaN into MIN.
 648  */
 649 #define CLAMP( X, MIN, MAX )  ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) )
 650
 651 #define MIN2( A, B )   ( (A)<(B) ? (A) : (B) )
 652 #define MAX2( A, B )   ( (A)>(B) ? (A) : (B) )
 653
 654 #define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C))
 655 #define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C))
 656
 657 #define MIN4( A, B, C, D ) ((A) < (B) ? MIN3(A, C, D) : MIN3(B, C, D))
 658 #define MAX4( A, B, C, D ) ((A) > (B) ? MAX3(A, C, D) : MAX3(B, C, D))
 659
 660
 661 /**
 662  * Align a value, only works pot alignemnts.
 663  */
 664 static inline int
 665 align(int value, int alignment)
 666 {
 667    return (value + alignment - 1) & ~(alignment - 1);
 668 }
 669
 670 static inline uint64_t
 671 align64(uint64_t value, unsigned alignment)
 672 {
 673    return (value + alignment - 1) & ~((uint64_t)alignment - 1);
 674 }
 675
 676 /**
 677  * Works like align but on npot alignments.
 678  */
 679 static inline size_t
 680 util_align_npot(size_t value, size_t alignment)
 681 {
 682    if (value % alignment)
 683       return value + (alignment - (value % alignment));
 684    return value;
 685 }
 686
 687 static inline unsigned
 688 u_minify(unsigned value, unsigned levels)
 689 {
 690     return MAX2(1, value >> levels);
 691 }
 692
 693 #ifndef COPY_4V
 694 #define COPY_4V( DST, SRC )         \
 695 do {                                \
 696    (DST)[0] = (SRC)[0];             \
 697    (DST)[1] = (SRC)[1];             \
 698    (DST)[2] = (SRC)[2];             \
 699    (DST)[3] = (SRC)[3];             \
 700 } while (0)
 701 #endif
 702
 703
 704 #ifndef COPY_4FV
 705 #define COPY_4FV( DST, SRC )  COPY_4V(DST, SRC)
 706 #endif
 707
 708
 709 #ifndef ASSIGN_4V
 710 #define ASSIGN_4V( DST, V0, V1, V2, V3 ) \
 711 do {                                     \
 712    (DST)[0] = (V0);                      \
 713    (DST)[1] = (V1);                      \
 714    (DST)[2] = (V2);                      \
 715    (DST)[3] = (V3);                      \
 716 } while (0)
 717 #endif
 718
 719
 720 static inline uint32_t
 721 util_unsigned_fixed(float value, unsigned frac_bits)
 722 {
 723    return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits));
 724 }
 725
 726 static inline int32_t
 727 util_signed_fixed(float value, unsigned frac_bits)
 728 {
 729    return (int32_t)(value * (1<<frac_bits));
 730 }
 731
 732 unsigned
 733 util_fpstate_get(void);
 734 unsigned
 735 util_fpstate_set_denorms_to_zero(unsigned current_fpstate);
 736 void
 737 util_fpstate_set(unsigned fpstate);
 738
 739
 740
 741 #ifdef __cplusplus
 742 }
 743 #endif
 744
 745 #endif /* U_MATH_H */