src/gallium/auxiliary/util/u_half.c

   1 #include "util/u_half.h"
   2
   3 /* see www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
   4  * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
   5  */
   6
   7 /* Note that using a 64K * 4 table is a terrible idea since it will not fit
   8  * in the L1 cache and will massively pollute the L2 cache as well
   9  *
  10  * These should instead fit in the L1 cache.
  11  *
  12  * TODO: we could use a denormal bias table instead of the mantissa/offset
  13  * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
  14  * but would involve more computation
  15  *
  16  * Note however that if denormals are never encountered, the L1 cache usage
  17  * is only about 4608 bytes anyway.
  18  */
  19 uint32_t util_half_to_float_mantissa_table[2048];
  20 uint32_t util_half_to_float_exponent_table[64];
  21 uint32_t util_half_to_float_offset_table[64];
  22 uint16_t util_float_to_half_base_table[512];
  23 uint8_t util_float_to_half_shift_table[512];
  24
  25 void util_half_init_tables(void)
  26 {
  27         int i;
  28
  29         /* zero */
  30         util_half_to_float_mantissa_table[0] = 0;
  31
  32         /* denormals */
  33         for(i = 1; i < 1024; ++i) {
  34                 unsigned int m = i << 13;
  35                 unsigned int e = 0;
  36
  37                 /* Normalize number */
  38                 while(!(m & 0x00800000)) {
  39                         e -= 0x00800000;
  40                         m<<=1;
  41                 }
  42                 m &= ~0x00800000;
  43                 e+= 0x38800000;
  44                 util_half_to_float_mantissa_table[i] = m | e;
  45         }
  46
  47         /* normals */
  48         for(i = 1024; i < 2048; ++i)
  49                 util_half_to_float_mantissa_table[i] = ((i-1024)<<13);
  50
  51         /* positive zero or denormals */
  52         util_half_to_float_exponent_table[0] = 0;
  53
  54         /* positive numbers */
  55         for(i = 1; i <= 30; ++i)
  56                 util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23);
  57
  58         /* positive infinity/NaN */
  59         util_half_to_float_exponent_table[31] = 0x7f800000;
  60
  61         /* negative zero or denormals */
  62         util_half_to_float_exponent_table[32] = 0x80000000;
  63
  64         /* negative numbers */
  65         for(i = 33; i <= 62; ++i)
  66                 util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
  67
  68         /* negative infinity/NaN */
  69         util_half_to_float_exponent_table[63] = 0xff800000;
  70
  71         /* positive zero or denormals */
  72         util_half_to_float_offset_table[0] = 0;
  73
  74         /* positive normals */
  75         for(i = 1; i < 32; ++i)
  76                 util_half_to_float_offset_table[i] = 1024;
  77
  78         /* negative zero or denormals */
  79         util_half_to_float_offset_table[32] = 0;
  80
  81         /* negative normals */
  82         for(i = 33; i < 64; ++i)
  83                 util_half_to_float_offset_table[i] = 1024;
  84
  85
  86
  87         /* very small numbers mapping to zero */
  88         for(i = -127; i < -24; ++i) {
  89                 util_float_to_half_base_table[127 + i] = 0;
  90                 util_float_to_half_shift_table[127 + i] = 24;
  91         }
  92
  93         /* small numbers mapping to denormals */
  94         for(i = -24; i < -14; ++i) {
  95                 util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i);
  96                 util_float_to_half_shift_table[127 + i] = -i - 1;
  97         }
  98
  99         /* normal numbers */
 100         for(i = -14; i < 16; ++i) {
 101                 util_float_to_half_base_table[127 + i] = (i + 15) << 10;
 102                 util_float_to_half_shift_table[127 + i] = 13;
 103         }
 104
 105         /* large numbers mapping to infinity */
 106         for(i = 16; i < 128; ++i) {
 107                 util_float_to_half_base_table[127 + i] = 0x7c00;
 108                 util_float_to_half_shift_table[127 + i] = 24;
 109         }
 110
 111         /* infinity and NaNs */
 112         util_float_to_half_base_table[255] = 0x7c00;
 113         util_float_to_half_shift_table[255] = 13;
 114
 115         /* negative numbers */
 116         for(i = 0; i < 256; ++i) {
 117                 util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000;
 118                 util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i];
 119         }
 120 }