util: Reimplement half <-> float conversions.

author James Benton <jbenton@vmware.com>

Tue, 12 Jun 2012 11:11:14 +0000 (12:11 +0100)

committer José Fonseca <jfonseca@vmware.com>

Fri, 29 Jun 2012 11:21:02 +0000 (12:21 +0100)
author James Benton <jbenton@vmware.com>
Tue, 12 Jun 2012 11:11:14 +0000 (12:11 +0100)
committer José Fonseca <jfonseca@vmware.com>
Fri, 29 Jun 2012 11:21:02 +0000 (12:21 +0100)
diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk

index 0c37dd31ab618e6db1b518d20ec9eb9c744217fb..11fc2256a82ad6c8485b34eb626f0bc5dc130539 100644 (file)
--- a/src/gallium/auxiliary/Android.mk
+++ b/src/gallium/auxiliary/Android.mk
@@ -44,8 +44,7 @@ $(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@
  
  $(intermediates)/indices/u_indices_gen.c \
  $(intermediates)/indices/u_unfilled_gen.c \
-$(intermediates)/util/u_format_srgb.c \
-$(intermediates)/util/u_half.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py
+$(intermediates)/util/u_format_srgb.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py
         $(transform-generated-source)
  
  $(intermediates)/util/u_format_table.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py $(LOCAL_PATH)/util/u_format.csv
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile

index a70ae7384fbf6c2c7e9e33fd2dcaea56cfdcf406..3ba3f9c40b461c23b2018ad2058da78443326f06 100644 (file)
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -39,6 +39,4 @@ util/u_format_srgb.c: util/u_format_srgb.py
  util/u_format_table.c: util/u_format_table.py util/u_format_pack.py util/u_format_parse.py util/u_format.csv
         $(PYTHON2) util/u_format_table.py util/u_format.csv > $@
  
-util/u_half.c: util/u_half.py
-       $(PYTHON2) util/u_half.py > $@
  # DO NOT DELETE
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources

index 277428b38be5d7638618a0302e161bb5d90a4b4f..28a176d68fa3d163b63ec9a96a9a6a683cd13649 100644 (file)
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -155,8 +155,7 @@ GENERATED_SOURCES := \
         indices/u_indices_gen.c \
         indices/u_unfilled_gen.c \
         util/u_format_srgb.c \
-       util/u_format_table.c \
-       util/u_half.c
+       util/u_format_table.c
  
  GALLIVM_SOURCES := \
          gallivm/lp_bld_arit.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript

index 07c420e138d5d4676f769739533439aae4bb9d2f..bfd5ec34c04dfd35a74369015b7b2e9d6afb386a 100644 (file)
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -35,13 +35,6 @@ env.CodeGenerate(
      command = python_cmd + ' $SCRIPT $SOURCE > $TARGET'
  )
  
-env.CodeGenerate(
-    target = 'util/u_half.c',
-    script = 'util/u_half.py',
-    source = [],
-    command = python_cmd + ' $SCRIPT > $TARGET'
-)
-
  env.Depends('util/u_format_table.c', [
      '#src/gallium/auxiliary/util/u_format_parse.py',
      'util/u_format_pack.py', 
diff --git a/src/gallium/auxiliary/util/.gitignore b/src/gallium/auxiliary/util/.gitignore

index 5dd0408effb54db4c8408ef2c30609e883a2d321..da74de623d73be041adcebafbcd639b19ef03a71 100644 (file)
--- a/src/gallium/auxiliary/util/.gitignore
+++ b/src/gallium/auxiliary/util/.gitignore
@@ -1,3 +1,2 @@
  u_format_srgb.c
  u_format_table.c
-u_half.c
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h

index ad030e90c67866bda07534df080662c9b174cdde..f7009f54844b140b9de5b57301291a7fda0d8dae 100644 (file)
--- a/src/gallium/auxiliary/util/u_half.h
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -35,51 +35,84 @@
  extern "C" {
  #endif
  
-extern const uint32_t util_half_to_float_mantissa_table[2048];
-extern const uint32_t util_half_to_float_exponent_table[64];
-extern const uint32_t util_half_to_float_offset_table[64];
-extern const uint16_t util_float_to_half_base_table[512];
-extern const uint8_t util_float_to_half_shift_table[512];
-
  /*
- * Note that if the half float is a signaling NaN, the x87 FPU will turn
- * it into a quiet NaN immediately upon loading into a float.
- *
- * Additionally, denormals may be flushed to zero.
+ * References for float <-> half conversions
   *
- * To avoid this, use the floatui functions instead of the float ones
- * when just doing conversion rather than computation on the resulting
- * floats.
+ *  http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
+ *  https://gist.github.com/2156668
+ *  https://gist.github.com/2144712
   */
  
-static INLINE uint32_t
-util_half_to_floatui(uint16_t h)
+static INLINE uint16_t
+util_float_to_half(float f)
  {
-   unsigned exp = h >> 10;
-   return util_half_to_float_mantissa_table[util_half_to_float_offset_table[exp] + (h & 0x3ff)] + util_half_to_float_exponent_table[exp];
+   uint32_t sign_mask  = 0x80000000;
+   uint32_t round_mask = ~0xfff;
+   uint32_t f32inf = 0xff << 23;
+   uint32_t f16inf = 0x1f << 23;
+   uint32_t sign;
+   union fi magic;
+   union fi f32;
+   uint16_t f16;
+
+   magic.ui = 0xf << 23;
+
+   f32.f = f;
+
+   /* Sign */
+   sign = f32.ui & sign_mask;
+   f32.ui ^= sign;
+
+   if (f32.ui == f32inf) {
+      /* Inf */
+      f16 = 0x7c00;
+   } else if (f32.ui > f32inf) {
+      /* NaN */
+      f16 = 0x7e00;
+   } else {
+      /* Number */
+      f32.ui &= round_mask;
+      f32.f  *= magic.f;
+      f32.ui -= round_mask;
+
+      /* Clamp to infinity if overflowed */
+      if (f32.ui > f16inf)
+         f32.ui = f16inf;
+
+      f16 = f32.ui >> 13;
+   }
+
+   /* Sign */
+   f16 |= sign >> 16;
+
+   return f16;
  }
  
  static INLINE float
-util_half_to_float(uint16_t h)
+util_half_to_float(uint16_t f16)
  {
-   union fi r;
-   r.ui = util_half_to_floatui(h);
-   return r.f;
-}
+   union fi infnan;
+   union fi magic;
+   union fi f32;
  
-static INLINE uint16_t
-util_floatui_to_half(uint32_t v)
-{
-   unsigned signexp = v >> 23;
-   return util_float_to_half_base_table[signexp] + ((v & 0x007fffff) >> util_float_to_half_shift_table[signexp]);
-}
+   infnan.ui = 0x8f << 23;
+   infnan.f = 65536.0f;
+   magic.ui  = 0xef << 23;
  
-static INLINE uint16_t
-util_float_to_half(float f)
-{
-   union fi i;
-   i.f = f;
-   return util_floatui_to_half(i.ui);
+   /* Exponent / Mantissa */
+   f32.ui = (f16 & 0x7fff) << 13;
+
+   /* Adjust */
+   f32.f *= magic.f;
+
+   /* Inf / NaN */
+   if (f32.f >= infnan.f)
+      f32.ui |= 0xff << 23;
+
+   /* Sign */
+   f32.ui |= (f16 & 0x8000) << 16;
+
+   return f32.f;
  }
  
  #ifdef __cplusplus
diff --git a/src/gallium/auxiliary/util/u_half.py b/src/gallium/auxiliary/util/u_half.py

deleted file mode 100644 (file)

index 915cf3b..0000000
--- a/src/gallium/auxiliary/util/u_half.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Copyright 2010 Luca Barbieri
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial
-# portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# *************************************************************************
-
-# The code is a reimplementation of the algorithm in
-#  www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
-# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
-#
-# The table contents have been slightly changed so that the exponent
-# bias is now in the exponent table instead of the mantissa table (mostly
-# for cosmetic reasons, and because it theoretically allows a variant
-# that flushes denormal to zero but uses a mantissa table with 24-bit
-# entries).
-#
-# The tables are also constructed slightly differently.
-#
-
-# Note that using a 64K * 4 table is a terrible idea since it will not fit
-# in the L1 cache and will massively pollute the L2 cache as well
-#
-# These should instead fit in the L1 cache.
-#
-# TODO: we could use a denormal bias table instead of the mantissa/offset
-# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
-# but would involve more computation
-#
-# Note however that if denormals are never encountered, the L1 cache usage
-# is only about 4608 bytes anyway.
-
-table_index = None
-table_length = None
-
-def begin(t, n, l):
-       global table_length
-       global table_index
-       table_index = 0
-       table_length = l
-       print
-       print "const " + t + " " + n + "[" + str(l) + "] = {"
-
-def value(v):
-       global table_index
-       table_index += 1
-       print "\t" + hex(v) + ","
-
-def end():
-       global table_length
-       global table_index
-       print "};"
-       assert table_index == table_length
-
-print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
-print "#include \"util/u_half.h\""
-
-begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
-# zero
-value(0)
-
-# denormals
-for i in xrange(1, 1024):
-       m = i << 13
-       e = 0
-
-       # normalize number
-       while (m & 0x00800000) == 0:
-               e -= 0x00800000
-               m <<= 1
-
-       m &= ~0x00800000
-       e += 0x38800000
-       value(m | e)
-
-# normals
-for i in xrange(1024, 2048):
-       value((i - 1024) << 13)
-end()
-
-begin("uint32_t", "util_half_to_float_exponent_table", 64)
-# positive zero or denormals
-value(0)
-
-# positive numbers
-for i in xrange(1, 31):
-       value(0x38000000 + (i << 23))
-
-# positive infinity/NaN
-value(0x7f800000)
-
-# negative zero or denormals
-value(0x80000000)
-
-# negative numbers
-for i in range(33, 63):
-       value(0xb8000000 + ((i - 32) << 23))
-
-# negative infinity/NaN
-value(0xff800000)
-end()
-
-begin("uint32_t", "util_half_to_float_offset_table", 64)
-# positive zero or denormals
-value(0)
-
-# positive normals
-for i in range(1, 32):
-       value(1024)
-
-# negative zero or denormals
-value(0)
-
-# negative normals
-for i in xrange(33, 64):
-       value(1024)
-end()
-
-begin("uint16_t", "util_float_to_half_base_table", 512)
-for sign in (0, 0x8000):
-       # very small numbers mapping to zero
-       for i in xrange(-127, -24):
-               value(sign | 0)
-
-       # small numbers mapping to denormals
-       for i in xrange(-24, -14):
-               value(sign | (0x400 >> (-14 -i)))
-
-       # normal numbers
-       for i in xrange(-14, 16):
-               value(sign | ((i + 15) << 10))
-
-       # large numbers mapping to infinity
-       for i in xrange(16, 128):
-               value(sign | 0x7c00)
-
-       # infinity and NaNs
-       value(sign | 0x7c00)
-end()
-
-begin("uint8_t", "util_float_to_half_shift_table", 512)
-for sign in (0, 0x8000):
-       # very small numbers mapping to zero
-       for i in xrange(-127, -24):
-               value(24)
-
-       # small numbers mapping to denormals
-       for i in xrange(-24, -14):
-               value(-1 - i)
-
-       # normal numbers
-       for i in xrange(-14, 16):
-               value(13)
-
-       # large numbers mapping to infinity
-       for i in xrange(16, 128):
-               value(24)
-
-       # infinity and NaNs
-       value(13)
-end()
-
author	James Benton <jbenton@vmware.com>
	Tue, 12 Jun 2012 11:11:14 +0000 (12:11 +0100)
committer	José Fonseca <jfonseca@vmware.com>
	Fri, 29 Jun 2012 11:21:02 +0000 (12:21 +0100)
src/gallium/auxiliary/Android.mk		patch \| blob \| history
src/gallium/auxiliary/Makefile		patch \| blob \| history
src/gallium/auxiliary/Makefile.sources		patch \| blob \| history
src/gallium/auxiliary/SConscript		patch \| blob \| history
src/gallium/auxiliary/util/.gitignore		patch \| blob \| history
src/gallium/auxiliary/util/u_half.h		patch \| blob \| history
src/gallium/auxiliary/util/u_half.py	[deleted file]	patch \| blob \| history