src/gallium/auxiliary/util/u_half.c

   1 #include "util/u_half.h"
   2
   3 /* see www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
   4  * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
   5  */
   6
   7 /* Note that using a 64K * 4 table is a terrible idea since it will not fit
   8  * in the L1 cache and will massively pollute the L2 cache as well
   9  *
  10  * These should instead fit in the L1 cache.
  11  *
  12  * TODO: we could use a denormal bias table instead of the mantissa/offset
  13  * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
  14  * but would involve more computation
  15  *
  16  * Note however that if denormals are never encountered, the L1 cache usage
  17  * is only about 4608 bytes anyway.
  18  */
  19 uint32_t util_half_to_float_mantissa_table[2048];
  20 uint32_t util_half_to_float_exponent_table[64];
  21 uint32_t util_half_to_float_offset_table[64];
  22 uint16_t util_float_to_half_base_table[512];
  23 uint8_t util_float_to_half_shift_table[512];
  24
  25 /* called by u_gctors.cpp, which defines the prototype itself */
  26 void util_half_init_tables(void);
  27
  28 void util_half_init_tables(void)
  29 {
  30         int i;
  31
  32         /* zero */
  33         util_half_to_float_mantissa_table[0] = 0;
  34
  35         /* denormals */
  36         for(i = 1; i < 1024; ++i) {
  37                 unsigned int m = i << 13;
  38                 unsigned int e = 0;
  39
  40                 /* Normalize number */
  41                 while(!(m & 0x00800000)) {
  42                         e -= 0x00800000;
  43                         m<<=1;
  44                 }
  45                 m &= ~0x00800000;
  46                 e+= 0x38800000;
  47                 util_half_to_float_mantissa_table[i] = m | e;
  48         }
  49
  50         /* normals */
  51         for(i = 1024; i < 2048; ++i)
  52                 util_half_to_float_mantissa_table[i] = ((i-1024)<<13);
  53
  54         /* positive zero or denormals */
  55         util_half_to_float_exponent_table[0] = 0;
  56
  57         /* positive numbers */
  58         for(i = 1; i <= 30; ++i)
  59                 util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23);
  60
  61         /* positive infinity/NaN */
  62         util_half_to_float_exponent_table[31] = 0x7f800000;
  63
  64         /* negative zero or denormals */
  65         util_half_to_float_exponent_table[32] = 0x80000000;
  66
  67         /* negative numbers */
  68         for(i = 33; i <= 62; ++i)
  69                 util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
  70
  71         /* negative infinity/NaN */
  72         util_half_to_float_exponent_table[63] = 0xff800000;
  73
  74         /* positive zero or denormals */
  75         util_half_to_float_offset_table[0] = 0;
  76
  77         /* positive normals */
  78         for(i = 1; i < 32; ++i)
  79                 util_half_to_float_offset_table[i] = 1024;
  80
  81         /* negative zero or denormals */
  82         util_half_to_float_offset_table[32] = 0;
  83
  84         /* negative normals */
  85         for(i = 33; i < 64; ++i)
  86                 util_half_to_float_offset_table[i] = 1024;
  87
  88
  89
  90         /* very small numbers mapping to zero */
  91         for(i = -127; i < -24; ++i) {
  92                 util_float_to_half_base_table[127 + i] = 0;
  93                 util_float_to_half_shift_table[127 + i] = 24;
  94         }
  95
  96         /* small numbers mapping to denormals */
  97         for(i = -24; i < -14; ++i) {
  98                 util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i);
  99                 util_float_to_half_shift_table[127 + i] = -i - 1;
 100         }
 101
 102         /* normal numbers */
 103         for(i = -14; i < 16; ++i) {
 104                 util_float_to_half_base_table[127 + i] = (i + 15) << 10;
 105                 util_float_to_half_shift_table[127 + i] = 13;
 106         }
 107
 108         /* large numbers mapping to infinity */
 109         for(i = 16; i < 128; ++i) {
 110                 util_float_to_half_base_table[127 + i] = 0x7c00;
 111                 util_float_to_half_shift_table[127 + i] = 24;
 112         }
 113
 114         /* infinity and NaNs */
 115         util_float_to_half_base_table[255] = 0x7c00;
 116         util_float_to_half_shift_table[255] = 13;
 117
 118         /* negative numbers */
 119         for(i = 0; i < 256; ++i) {
 120                 util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000;
 121                 util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i];
 122         }
 123 }