gallium/util: pregenerate half float tables

author Luca Barbieri <luca@luca-barbieri.com>

Fri, 2 Apr 2010 01:48:59 +0000 (03:48 +0200)

committer Luca Barbieri <luca@luca-barbieri.com>

Fri, 2 Apr 2010 04:16:18 +0000 (06:16 +0200)
author Luca Barbieri <luca@luca-barbieri.com>
Fri, 2 Apr 2010 01:48:59 +0000 (03:48 +0200)
committer Luca Barbieri <luca@luca-barbieri.com>
Fri, 2 Apr 2010 04:16:18 +0000 (06:16 +0200)
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile

index 1db4aaa43987e4fbd36d7eb5f87ff4ab4b266bd9..843778d81007d1658a2c033d981831f7783d1d02 100644 (file)
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -169,7 +169,8 @@ GALLIVM_CPP_SOURCES = \
  GENERATED_SOURCES = \
         indices/u_indices_gen.c \
         indices/u_unfilled_gen.c \
-       util/u_format_table.c
+       util/u_format_table.c \
+       util/u_half.c
  
  
  ifeq ($(MESA_LLVM),1)
@@ -198,3 +199,5 @@ util/u_format_table.c: util/u_format_table.py util/u_format_pack.py util/u_forma
  util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv
         python util/u_format_access.py util/u_format.csv > $@
  
+util/u_half.c: util/u_half.py
+       python util/u_half.py > $@
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript

index d0443db3f789085b2a1d63c7e9658f2c14604c6f..73d4150448faea679bec3726546510019cbd88ea 100644 (file)
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -29,6 +29,14 @@ env.CodeGenerate(
      source = ['util/u_format.csv'],
      command = 'python $SCRIPT $SOURCE > $TARGET'
  )
+
+env.CodeGenerate(
+    target = 'util/u_half.c',
+    script = 'util/u_half.py',
+    source = [],
+    command = 'python $SCRIPT > $TARGET'
+)
+
  env.Depends('util/u_format_table.c', [
      'util/u_format_parse.py', 
      'util/u_format_pack.py', 
diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c

index d3ee1f033966ae0e7cebf223615676041a50f54e..fae0a462dcbab38faaef435e58be061607a641e6 100644 (file)
--- a/src/gallium/auxiliary/util/u_format.c
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -124,5 +124,4 @@ void
  util_format_do_init(void)
  {
     util_format_s3tc_init();
-   util_half_init();
  }
diff --git a/src/gallium/auxiliary/util/u_half.c b/src/gallium/auxiliary/util/u_half.c

deleted file mode 100644 (file)

index 4c8f8a5..0000000
--- a/src/gallium/auxiliary/util/u_half.c
+++ /dev/null
@@ -1,165 +0,0 @@
-
-/*
- * Copyright 2010 Luca Barbieri
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* The code is a reimplementation of the algorithm in
- *  www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
- * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
- *
- * The table contents have been slightly changed so that the exponent
- * bias is now in the exponent table instead of the mantissa table (mostly
- * for cosmetic reasons, and because it theoretically allows a variant
- * that flushes denormal to zero but uses a mantissa table with 24-bit
- * entries).
- *
- * The tables are also constructed slightly differently.
- */
-
-/* Note that using a 64K * 4 table is a terrible idea since it will not fit
- * in the L1 cache and will massively pollute the L2 cache as well
- *
- * These should instead fit in the L1 cache.
- *
- * TODO: we could use a denormal bias table instead of the mantissa/offset
- * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
- * but would involve more computation
- *
- * Note however that if denormals are never encountered, the L1 cache usage
- * is only about 4608 bytes anyway.
- */
-
-#include "util/u_half.h"
-#include "util/u_init.h"
-
-uint32_t util_half_to_float_mantissa_table[2048];
-uint32_t util_half_to_float_exponent_table[64];
-uint32_t util_half_to_float_offset_table[64];
-uint16_t util_float_to_half_base_table[512];
-uint8_t util_float_to_half_shift_table[512];
-
-boolean util_half_inited;
-
-void
-util_half_do_init(void)
-{
-   int i;
-
-   /* zero */
-   util_half_to_float_mantissa_table[0] = 0;
-
-   /* denormals */
-   for(i = 1; i < 1024; ++i)
-   {
-      unsigned int m = i << 13;
-      unsigned int e = 0;
-
-      /* Normalize number */
-      while(!(m & 0x00800000))
-      {
-        e -= 0x00800000;
-        m <<= 1;
-      }
-      m &= ~0x00800000;
-      e += 0x38800000;
-      util_half_to_float_mantissa_table[i] = m | e;
-   }
-
-   /* normals */
-   for(i = 1024; i < 2048; ++i)
-      util_half_to_float_mantissa_table[i] = ((i - 1024) << 13);
-
-   /* positive zero or denormals */
-   util_half_to_float_exponent_table[0] = 0;
-
-   /* positive numbers */
-   for(i = 1; i <= 30; ++i)
-      util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23);
-
-   /* positive infinity/NaN */
-   util_half_to_float_exponent_table[31] = 0x7f800000;
-
-   /* negative zero or denormals */
-   util_half_to_float_exponent_table[32] = 0x80000000;
-
-   /* negative numbers */
-   for(i = 33; i <= 62; ++i)
-      util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
-
-   /* negative infinity/NaN */
-   util_half_to_float_exponent_table[63] = 0xff800000;
-
-   /* positive zero or denormals */
-   util_half_to_float_offset_table[0] = 0;
-
-   /* positive normals */
-   for(i = 1; i < 32; ++i)
-      util_half_to_float_offset_table[i] = 1024;
-
-   /* negative zero or denormals */
-   util_half_to_float_offset_table[32] = 0;
-
-   /* negative normals */
-   for(i = 33; i < 64; ++i)
-      util_half_to_float_offset_table[i] = 1024;
-
-   /* very small numbers mapping to zero */
-   for(i = -127; i < -24; ++i)
-   {
-      util_float_to_half_base_table[127 + i] = 0;
-      util_float_to_half_shift_table[127 + i] = 24;
-   }
-
-   /* small numbers mapping to denormals */
-   for(i = -24; i < -14; ++i)
-   {
-      util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i);
-      util_float_to_half_shift_table[127 + i] = -i - 1;
-   }
-
-   /* normal numbers */
-   for(i = -14; i < 16; ++i)
-   {
-      util_float_to_half_base_table[127 + i] = (i + 15) << 10;
-      util_float_to_half_shift_table[127 + i] = 13;
-   }
-
-   /* large numbers mapping to infinity */
-   for(i = 16; i < 128; ++i)
-   {
-      util_float_to_half_base_table[127 + i] = 0x7c00;
-      util_float_to_half_shift_table[127 + i] = 24;
-   }
-
-   /* infinity and NaNs */
-   util_float_to_half_base_table[255] = 0x7c00;
-   util_float_to_half_shift_table[255] = 13;
-
-   /* negative numbers */
-   for(i = 0; i < 256; ++i)
-   {
-      util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000;
-      util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i];
-   }
-}
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h

index 02f0f2419364c7f061884c93823130ed92370db5..a28b1fd1d93625d6c02d75ccc7e511b4f791d87f 100644 (file)
--- a/src/gallium/auxiliary/util/u_half.h
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -3,7 +3,6 @@
  
  #include "pipe/p_compiler.h"
  #include "util/u_math.h"
-#include "util/u_inline_init.h"
  
  #ifdef __cplusplus
  extern "C" {
@@ -56,8 +55,6 @@ util_float_to_half(float f)
     return util_floatui_to_half(i.ui);
  }
  
-UTIL_INLINE_INIT(util_half);
-
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/gallium/auxiliary/util/u_half.py b/src/gallium/auxiliary/util/u_half.py

new file mode 100644 (file)

index 0000000..a92f758
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_half.py
@@ -0,0 +1,179 @@
+# Copyright 2010 Luca Barbieri
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial
+# portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# *************************************************************************
+
+# The code is a reimplementation of the algorithm in
+#  www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
+# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
+#
+# The table contents have been slightly changed so that the exponent
+# bias is now in the exponent table instead of the mantissa table (mostly
+# for cosmetic reasons, and because it theoretically allows a variant
+# that flushes denormal to zero but uses a mantissa table with 24-bit
+# entries).
+#
+# The tables are also constructed slightly differently.
+#
+
+# Note that using a 64K * 4 table is a terrible idea since it will not fit
+# in the L1 cache and will massively pollute the L2 cache as well
+#
+# These should instead fit in the L1 cache.
+#
+# TODO: we could use a denormal bias table instead of the mantissa/offset
+# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
+# but would involve more computation
+#
+# Note however that if denormals are never encountered, the L1 cache usage
+# is only about 4608 bytes anyway.
+
+table_index = None
+table_length = None
+
+def begin(t, n, l):
+       global table_length
+       global table_index
+       table_index = 0
+       table_length = l
+       print
+       print t + " " + n + "[" + str(l) + "] = {"
+
+def value(v):
+       global table_index
+       table_index += 1
+       print "\t" + hex(v) + ","
+
+def end():
+       global table_length
+       global table_index
+       print "};"
+       assert table_index == table_length
+
+print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
+print "#include \"util/u_half.h\""
+
+begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
+# zero
+value(0)
+
+# denormals
+for i in xrange(1, 1024):
+       m = i << 13
+       e = 0
+
+       # normalize number
+       while (m & 0x00800000) == 0:
+               e -= 0x00800000;
+               m <<= 1;
+
+       m &= ~0x00800000;
+       e += 0x38800000;
+       value(m | e)
+
+# normals
+for i in xrange(1024, 2048):
+       value((i - 1024) << 13)
+end()
+
+begin("uint32_t", "util_half_to_float_exponent_table", 64)
+# positive zero or denormals
+value(0)
+
+# positive numbers
+for i in xrange(1, 31):
+       value(0x38000000 + (i << 23))
+
+# positive infinity/NaN
+value(0x7f800000)
+
+# negative zero or denormals
+value(0x80000000)
+
+# negative numbers
+for i in range(33, 63):
+       value(0xb8000000 + ((i - 32) << 23))
+
+# negative infinity/NaN
+value(0xff800000)
+end()
+
+begin("uint32_t", "util_half_to_float_offset_table", 64)
+# positive zero or denormals
+value(0)
+
+# positive normals
+for i in range(1, 32):
+       value(1024)
+
+# negative zero or denormals
+value(0)
+
+# negative normals
+for i in xrange(33, 64):
+       value(1024)
+end()
+
+begin("uint16_t", "util_float_to_half_base_table", 512)
+for sign in (0, 0x8000):
+       # very small numbers mapping to zero
+       for i in xrange(-127, -24):
+               value(sign | 0)
+
+       # small numbers mapping to denormals
+       for i in xrange(-24, -14):
+               value(sign | (0x400 >> (-14 -i)))
+
+       # normal numbers
+       for i in xrange(-14, 16):
+               value(sign | ((i + 15) << 10))
+
+       # large numbers mapping to infinity
+       for i in xrange(16, 128):
+               value(sign | 0x7c00)
+
+       # infinity and NaNs
+       value(sign | 0x7c00)
+end()
+
+begin("uint8_t", "util_float_to_half_shift_table", 512)
+for sign in (0, 0x8000):
+       # very small numbers mapping to zero
+       for i in xrange(-127, -24):
+               value(24)
+
+       # small numbers mapping to denormals
+       for i in xrange(-24, -14):
+               value(-1 - i)
+
+       # normal numbers
+       for i in xrange(-14, 16):
+               value(13)
+
+       # large numbers mapping to infinity
+       for i in xrange(16, 128):
+               value(24)
+
+       # infinity and NaNs
+       value(13)
+end()
+
author	Luca Barbieri <luca@luca-barbieri.com>
	Fri, 2 Apr 2010 01:48:59 +0000 (03:48 +0200)
committer	Luca Barbieri <luca@luca-barbieri.com>
	Fri, 2 Apr 2010 04:16:18 +0000 (06:16 +0200)
src/gallium/auxiliary/Makefile		patch \| blob \| history
src/gallium/auxiliary/SConscript		patch \| blob \| history
src/gallium/auxiliary/util/u_format.c		patch \| blob \| history
src/gallium/auxiliary/util/u_half.c	[deleted file]	patch \| blob \| history
src/gallium/auxiliary/util/u_half.h		patch \| blob \| history
src/gallium/auxiliary/util/u_half.py	[new file with mode: 0644]	patch \| blob