src/gallium/auxiliary/util/u_half.c

   1
   2 /*
   3  * Copyright 2010 Luca Barbieri
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sublicense, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the
  14  * next paragraph) shall be included in all copies or substantial
  15  * portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  *
  25  **************************************************************************/
  26
  27 /* The code is a reimplementation of the algorithm in
  28  *  www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
  29  * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
  30  *
  31  * The table contents have been slightly changed so that the exponent
  32  * bias is now in the exponent table instead of the mantissa table (mostly
  33  * for cosmetic reasons, and because it theoretically allows a variant
  34  * that flushes denormal to zero but uses a mantissa table with 24-bit
  35  * entries).
  36  *
  37  * The tables are also constructed slightly differently.
  38  */
  39
  40 /* Note that using a 64K * 4 table is a terrible idea since it will not fit
  41  * in the L1 cache and will massively pollute the L2 cache as well
  42  *
  43  * These should instead fit in the L1 cache.
  44  *
  45  * TODO: we could use a denormal bias table instead of the mantissa/offset
  46  * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
  47  * but would involve more computation
  48  *
  49  * Note however that if denormals are never encountered, the L1 cache usage
  50  * is only about 4608 bytes anyway.
  51  */
  52
  53 #include "util/u_half.h"
  54 #include "util/u_init.h"
  55
  56 uint32_t util_half_to_float_mantissa_table[2048];
  57 uint32_t util_half_to_float_exponent_table[64];
  58 uint32_t util_half_to_float_offset_table[64];
  59 uint16_t util_float_to_half_base_table[512];
  60 uint8_t util_float_to_half_shift_table[512];
  61
  62 boolean util_half_inited;
  63
  64 void
  65 util_half_do_init(void)
  66 {
  67    int i;
  68
  69    /* zero */
  70    util_half_to_float_mantissa_table[0] = 0;
  71
  72    /* denormals */
  73    for(i = 1; i < 1024; ++i)
  74    {
  75       unsigned int m = i << 13;
  76       unsigned int e = 0;
  77
  78       /* Normalize number */
  79       while(!(m & 0x00800000))
  80       {
  81          e -= 0x00800000;
  82          m <<= 1;
  83       }
  84       m &= ~0x00800000;
  85       e += 0x38800000;
  86       util_half_to_float_mantissa_table[i] = m | e;
  87    }
  88
  89    /* normals */
  90    for(i = 1024; i < 2048; ++i)
  91       util_half_to_float_mantissa_table[i] = ((i - 1024) << 13);
  92
  93    /* positive zero or denormals */
  94    util_half_to_float_exponent_table[0] = 0;
  95
  96    /* positive numbers */
  97    for(i = 1; i <= 30; ++i)
  98       util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23);
  99
 100    /* positive infinity/NaN */
 101    util_half_to_float_exponent_table[31] = 0x7f800000;
 102
 103    /* negative zero or denormals */
 104    util_half_to_float_exponent_table[32] = 0x80000000;
 105
 106    /* negative numbers */
 107    for(i = 33; i <= 62; ++i)
 108       util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
 109
 110    /* negative infinity/NaN */
 111    util_half_to_float_exponent_table[63] = 0xff800000;
 112
 113    /* positive zero or denormals */
 114    util_half_to_float_offset_table[0] = 0;
 115
 116    /* positive normals */
 117    for(i = 1; i < 32; ++i)
 118       util_half_to_float_offset_table[i] = 1024;
 119
 120    /* negative zero or denormals */
 121    util_half_to_float_offset_table[32] = 0;
 122
 123    /* negative normals */
 124    for(i = 33; i < 64; ++i)
 125       util_half_to_float_offset_table[i] = 1024;
 126
 127    /* very small numbers mapping to zero */
 128    for(i = -127; i < -24; ++i)
 129    {
 130       util_float_to_half_base_table[127 + i] = 0;
 131       util_float_to_half_shift_table[127 + i] = 24;
 132    }
 133
 134    /* small numbers mapping to denormals */
 135    for(i = -24; i < -14; ++i)
 136    {
 137       util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i);
 138       util_float_to_half_shift_table[127 + i] = -i - 1;
 139    }
 140
 141    /* normal numbers */
 142    for(i = -14; i < 16; ++i)
 143    {
 144       util_float_to_half_base_table[127 + i] = (i + 15) << 10;
 145       util_float_to_half_shift_table[127 + i] = 13;
 146    }
 147
 148    /* large numbers mapping to infinity */
 149    for(i = 16; i < 128; ++i)
 150    {
 151       util_float_to_half_base_table[127 + i] = 0x7c00;
 152       util_float_to_half_shift_table[127 + i] = 24;
 153    }
 154
 155    /* infinity and NaNs */
 156    util_float_to_half_base_table[255] = 0x7c00;
 157    util_float_to_half_shift_table[255] = 13;
 158
 159    /* negative numbers */
 160    for(i = 0; i < 256; ++i)
 161    {
 162       util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000;
 163       util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i];
 164    }
 165 }