src/gallium/auxiliary/util/u_half.c

   1 /*
   2  * Copyright 2010 Luca Barbieri
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining
   5  * a copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sublicense, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the
  13  * next paragraph) shall be included in all copies or substantial
  14  * portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  19  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  20  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  21  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  22  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  **************************************************************************/
  25
  26 /* The code is a reimplementation of the algorithm in
  27  *  www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
  28  * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
  29  *
  30  * The table contents have been slightly changed so that the exponent
  31  * bias is now in the exponent table instead of the mantissa table (mostly
  32  * for cosmetic reasons, and because it theoretically allows a variant
  33  * that flushes denormal to zero but uses a mantissa table with 24-bit
  34  * entries).
  35  *
  36  * The tables are also constructed slightly differently.
  37  */
  38
  39 /* Note that using a 64K * 4 table is a terrible idea since it will not fit
  40  * in the L1 cache and will massively pollute the L2 cache as well
  41  *
  42  * These should instead fit in the L1 cache.
  43  *
  44  * TODO: we could use a denormal bias table instead of the mantissa/offset
  45  * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
  46  * but would involve more computation
  47  *
  48  * Note however that if denormals are never encountered, the L1 cache usage
  49  * is only about 4608 bytes anyway.
  50  */
  51
  52 #include "util/u_half.h"
  53 #include "util/u_init.h"
  54
  55 uint32_t util_half_to_float_mantissa_table[2048];
  56 uint32_t util_half_to_float_exponent_table[64];
  57 uint32_t util_half_to_float_offset_table[64];
  58 uint16_t util_float_to_half_base_table[512];
  59 uint8_t util_float_to_half_shift_table[512];
  60
  61 static void util_half_init_tables(void)
  62 {
  63         int i;
  64
  65         /* zero */
  66         util_half_to_float_mantissa_table[0] = 0;
  67
  68         /* denormals */
  69         for(i = 1; i < 1024; ++i) {
  70                 unsigned int m = i << 13;
  71                 unsigned int e = 0;
  72
  73                 /* Normalize number */
  74                 while(!(m & 0x00800000)) {
  75                         e -= 0x00800000;
  76                         m<<=1;
  77                 }
  78                 m &= ~0x00800000;
  79                 e+= 0x38800000;
  80                 util_half_to_float_mantissa_table[i] = m | e;
  81         }
  82
  83         /* normals */
  84         for(i = 1024; i < 2048; ++i)
  85                 util_half_to_float_mantissa_table[i] = ((i-1024)<<13);
  86
  87         /* positive zero or denormals */
  88         util_half_to_float_exponent_table[0] = 0;
  89
  90         /* positive numbers */
  91         for(i = 1; i <= 30; ++i)
  92                 util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23);
  93
  94         /* positive infinity/NaN */
  95         util_half_to_float_exponent_table[31] = 0x7f800000;
  96
  97         /* negative zero or denormals */
  98         util_half_to_float_exponent_table[32] = 0x80000000;
  99
 100         /* negative numbers */
 101         for(i = 33; i <= 62; ++i)
 102                 util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
 103
 104         /* negative infinity/NaN */
 105         util_half_to_float_exponent_table[63] = 0xff800000;
 106
 107         /* positive zero or denormals */
 108         util_half_to_float_offset_table[0] = 0;
 109
 110         /* positive normals */
 111         for(i = 1; i < 32; ++i)
 112                 util_half_to_float_offset_table[i] = 1024;
 113
 114         /* negative zero or denormals */
 115         util_half_to_float_offset_table[32] = 0;
 116
 117         /* negative normals */
 118         for(i = 33; i < 64; ++i)
 119                 util_half_to_float_offset_table[i] = 1024;
 120
 121
 122
 123         /* very small numbers mapping to zero */
 124         for(i = -127; i < -24; ++i) {
 125                 util_float_to_half_base_table[127 + i] = 0;
 126                 util_float_to_half_shift_table[127 + i] = 24;
 127         }
 128
 129         /* small numbers mapping to denormals */
 130         for(i = -24; i < -14; ++i) {
 131                 util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i);
 132                 util_float_to_half_shift_table[127 + i] = -i - 1;
 133         }
 134
 135         /* normal numbers */
 136         for(i = -14; i < 16; ++i) {
 137                 util_float_to_half_base_table[127 + i] = (i + 15) << 10;
 138                 util_float_to_half_shift_table[127 + i] = 13;
 139         }
 140
 141         /* large numbers mapping to infinity */
 142         for(i = 16; i < 128; ++i) {
 143                 util_float_to_half_base_table[127 + i] = 0x7c00;
 144                 util_float_to_half_shift_table[127 + i] = 24;
 145         }
 146
 147         /* infinity and NaNs */
 148         util_float_to_half_base_table[255] = 0x7c00;
 149         util_float_to_half_shift_table[255] = 13;
 150
 151         /* negative numbers */
 152         for(i = 0; i < 256; ++i) {
 153                 util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000;
 154                 util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i];
 155         }
 156 }
 157
 158 UTIL_INIT(util_half_init_tables);