nir: Lower ldexp to arithmetic.

author Kenneth Graunke <kenneth@whitecape.org>

Wed, 20 Jan 2016 01:40:58 +0000 (17:40 -0800)

committer Kenneth Graunke <kenneth@whitecape.org>

Wed, 20 Jan 2016 02:10:30 +0000 (18:10 -0800)
author Kenneth Graunke <kenneth@whitecape.org>
Wed, 20 Jan 2016 01:40:58 +0000 (17:40 -0800)
committer Kenneth Graunke <kenneth@whitecape.org>
Wed, 20 Jan 2016 02:10:30 +0000 (18:10 -0800)
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py

index 1e80ba718ed635ae027ce4a70ec0e5dea9aa635b..188c5b1b0432ec303b85225b4c9275838c69bab6 100644 (file)
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -1,4 +1,5 @@
  #! /usr/bin/env python
+# -*- encoding: utf-8 -*-
  #
  # Copyright (C) 2014 Intel Corporation
  #
@@ -267,6 +268,68 @@ for op in ['flt', 'fge', 'feq', 'fne',
         ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
     ]
  
+def ldexp_to_arith(x, exp):
+   """
+   Translates
+      ldexp x exp
+   into
+
+      extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
+      resulting_biased_exp = extracted_biased_exp + exp;
+
+      if (resulting_biased_exp < 1) {
+         return copysign(0.0, x);
+      }
+
+      return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
+                         lshift(i2u(resulting_biased_exp), exp_shift));
+
+   which we can't actually implement as such, since NIR doesn't have
+   vectorized if-statements. We actually implement it without branches
+   using conditional-select:
+
+      extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
+      resulting_biased_exp = extracted_biased_exp + exp;
+
+      is_not_zero_or_underflow = gequal(resulting_biased_exp, 1);
+      x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x));
+      resulting_biased_exp = csel(is_not_zero_or_underflow,
+                                  resulting_biased_exp, 0);
+
+      return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
+                         lshift(i2u(resulting_biased_exp), exp_shift));
+   """
+
+   sign_mask = 0x80000000
+   exp_shift = 23
+   exp_width = 8
+
+   # Extract the biased exponent from <x>.
+   extracted_biased_exp = ('ushr', ('iabs', x), exp_shift)
+   resulting_biased_exp = ('iadd', extracted_biased_exp, exp)
+
+   # Test if result is ±0.0, subnormal, or underflow by checking if the
+   # resulting biased exponent would be less than 0x1. If so, the result is
+   # 0.0 with the sign of x. (Actually, invert the conditions so that
+   # immediate values are the second arguments, which is better for i965)
+   zero_sign_x = ('iand', x, sign_mask)
+
+   is_not_zero_or_underflow = ('ige', resulting_biased_exp, 0x1)
+
+   # We could test for overflows by checking if the resulting biased exponent
+   # would be greater than 0xFE. Turns out we don't need to because the GLSL
+   # spec says:
+   #
+   #    "If this product is too large to be represented in the
+   #     floating-point type, the result is undefined."
+
+   return ('bitfield_insert',
+           ('bcsel', is_not_zero_or_underflow, x, zero_sign_x),
+           ('bcsel', is_not_zero_or_underflow, resulting_biased_exp, 0),
+           exp_shift, exp_width)
+
+optimizations += [(('ldexp', 'x', 'exp'), ldexp_to_arith('x', 'exp'))]
+
  # This section contains "late" optimizations that should be run after the
  # regular optimizations have finished.  Optimizations should go here if
  # they help code generation but do not necessarily produce code that is
author	Kenneth Graunke <kenneth@whitecape.org>
	Wed, 20 Jan 2016 01:40:58 +0000 (17:40 -0800)
committer	Kenneth Graunke <kenneth@whitecape.org>
	Wed, 20 Jan 2016 02:10:30 +0000 (18:10 -0800)