nir/algebraic: support for power-of-two optimizations

author Rob Clark <robclark@freedesktop.org>

Sat, 7 May 2016 17:01:24 +0000 (13:01 -0400)

committer Rob Clark <robclark@freedesktop.org>

Fri, 3 Jun 2016 20:05:03 +0000 (16:05 -0400)
author Rob Clark <robclark@freedesktop.org>
Sat, 7 May 2016 17:01:24 +0000 (13:01 -0400)
committer Rob Clark <robclark@freedesktop.org>
Fri, 3 Jun 2016 20:05:03 +0000 (16:05 -0400)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h

index 20f652018903b79a403c93a80a3e45de9e7bce61..3f9309c04dd734cffeea83e4979b56efe9df9d5f 100644 (file)
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1651,6 +1651,9 @@ typedef struct nir_shader_compiler_options {
     /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
     bool lower_scmp;
  
+   /** enables rules to lower idiv by power-of-two: */
+   bool lower_idiv;
+
     /* Does the native fdot instruction replicate its result for four
      * components?  If so, then opt_algebraic_late will turn all fdotN
      * instructions into fdot_replicatedN instructions.
diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py

index 285f8534846c1fd7074b5d992a12d0266826c2fb..19ac6ee2ba4152fe3a276602e1c985c4b2ef14c8 100644 (file)
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -76,6 +76,7 @@ class Value(object):
           return Constant(val, name_base)
  
     __template = mako.template.Template("""
+#include "compiler/nir/nir_search_helpers.h"
  static const ${val.c_type} ${val.name} = {
     { ${val.type_enum}, ${val.bit_size} },
  % if isinstance(val, Constant):
@@ -84,6 +85,7 @@ static const ${val.c_type} ${val.name} = {
     ${val.index}, /* ${val.var_name} */
     ${'true' if val.is_constant else 'false'},
     ${val.type() or 'nir_type_invalid' },
+   ${val.cond if val.cond else 'NULL'},
  % elif isinstance(val, Expression):
     ${'true' if val.inexact else 'false'},
     nir_op_${val.opcode},
@@ -113,7 +115,7 @@ static const ${val.c_type} ${val.name} = {
                                      Variable=Variable,
                                      Expression=Expression)
  
-_constant_re = re.compile(r"(?P<value>[^@]+)(?:@(?P<bits>\d+))?")
+_constant_re = re.compile(r"(?P<value>[^@\(]+)(?:@(?P<bits>\d+))?")
  
  class Constant(Value):
     def __init__(self, val, name):
@@ -150,7 +152,8 @@ class Constant(Value):
           return "nir_type_float"
  
  _var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)"
-                          r"(?:@(?P<type>int|uint|bool|float)?(?P<bits>\d+)?)?")
+                          r"(?:@(?P<type>int|uint|bool|float)?(?P<bits>\d+)?)?"
+                          r"(?P<cond>\([^\)]+\))?")
  
  class Variable(Value):
     def __init__(self, val, name, varset):
@@ -161,6 +164,7 @@ class Variable(Value):
  
        self.var_name = m.group('name')
        self.is_constant = m.group('const') is not None
+      self.cond = m.group('cond')
        self.required_type = m.group('type')
        self.bit_size = int(m.group('bits')) if m.group('bits') else 0
  
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py

index f8db2b690ac2aefbb55c71b4bbf69c628eb9cb49..011263a14d7ab66f69f774b36b5e546ce85b8b59 100644 (file)
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -45,10 +45,11 @@ d = 'd'
  # however, be used for backend-requested lowering operations as those need to
  # happen regardless of precision.
  #
-# Variable names are specified as "[#]name[@type]" where "#" inicates that
-# the given variable will only match constants and the type indicates that
+# Variable names are specified as "[#]name[@type][(cond)]" where "#" inicates
+# that the given variable will only match constants and the type indicates that
  # the given variable will only match values from ALU instructions with the
-# given output type.
+# given output type, and (cond) specifies an additional condition function
+# (see nir_search_helpers.h).
  #
  # For constants, you have to be careful to make sure that it is the right
  # type because python is unaware of the source and destination types of the
@@ -62,6 +63,14 @@ d = 'd'
  # constructed value should have that bit-size.
  
  optimizations = [
+
+   (('imul', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b))),
+   (('imul', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b))))),
+   (('udiv', a, '#b@32(is_pos_power_of_two)'), ('ushr', a, ('find_lsb', b))),
+   (('idiv', a, '#b@32(is_pos_power_of_two)'), ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', b))), 'options->lower_idiv'),
+   (('idiv', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', ('iabs', b))))), 'options->lower_idiv'),
+   (('umod', a, '#b(is_pos_power_of_two)'),    ('iand', a, ('isub', b, 1))),
+
     (('fneg', ('fneg', a)), a),
     (('ineg', ('ineg', a)), a),
     (('fabs', ('fabs', a)), ('fabs', a)),
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c

index 2c2fd9208c2bcc53fc5ffbd591e05b819bb9c64a..b21fb2c979ee034c05a6964207318d565647b6af 100644 (file)
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -127,6 +127,9 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
               instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
              return false;
  
+         if (var->cond && !var->cond(instr, src, num_components, new_swizzle))
+            return false;
+
           if (var->type != nir_type_invalid) {
              if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
                 return false;
diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h

index a500febc9150bebd0ef1a9bab9e1e17d006d2ffd..f55d797e05e373808b8bb9e51f5b045029e8a0a9 100644 (file)
--- a/src/compiler/nir/nir_search.h
+++ b/src/compiler/nir/nir_search.h
@@ -68,6 +68,16 @@ typedef struct {
      * never match anything.
      */
     nir_alu_type type;
+
+   /** Optional condition fxn ptr
+    *
+    * This is only allowed in search expressions, and allows additional
+    * constraints to be placed on the match.  Typically used for 'is_constant'
+    * variables to require, for example, power-of-two in order for the search
+    * to match.
+    */
+   bool (*cond)(nir_alu_instr *instr, unsigned src,
+                unsigned num_components, const uint8_t *swizzle);
  } nir_search_variable;
  
  typedef struct {
diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h

new file mode 100644 (file)

index 0000000..5037944
--- /dev/null
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright © 2016 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef _NIR_SEARCH_HELPERS_
+#define _NIR_SEARCH_HELPERS_
+
+#include "nir.h"
+
+static inline bool
+__is_power_of_two(unsigned int x)
+{
+   return ((x != 0) && !(x & (x - 1)));
+}
+
+static inline bool
+is_pos_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
+                    const uint8_t *swizzle)
+{
+   nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
+
+   /* only constant src's: */
+   if (!val)
+      return false;
+
+   for (unsigned i = 0; i < num_components; i++) {
+      switch (nir_op_infos[instr->op].input_types[src]) {
+      case nir_type_int:
+         if (val->i32[swizzle[i]] < 0)
+            return false;
+         if (!__is_power_of_two(val->i32[swizzle[i]]))
+            return false;
+         break;
+      case nir_type_uint:
+         if (!__is_power_of_two(val->u32[swizzle[i]]))
+            return false;
+         break;
+      default:
+         return false;
+      }
+   }
+
+   return true;
+}
+
+static inline bool
+is_neg_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
+                    const uint8_t *swizzle)
+{
+   nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
+
+   /* only constant src's: */
+   if (!val)
+      return false;
+
+   for (unsigned i = 0; i < num_components; i++) {
+      switch (nir_op_infos[instr->op].input_types[src]) {
+      case nir_type_int:
+         if (val->i32[swizzle[i]] > 0)
+            return false;
+         if (!__is_power_of_two(abs(val->i32[swizzle[i]])))
+            return false;
+         break;
+      default:
+         return false;
+      }
+   }
+
+   return true;
+}
+
+#endif /* _NIR_SEARCH_ */
author	Rob Clark <robclark@freedesktop.org>
	Sat, 7 May 2016 17:01:24 +0000 (13:01 -0400)
committer	Rob Clark <robclark@freedesktop.org>
	Fri, 3 Jun 2016 20:05:03 +0000 (16:05 -0400)
src/compiler/nir/nir.h		patch \| blob \| history
src/compiler/nir/nir_algebraic.py		patch \| blob \| history
src/compiler/nir/nir_opt_algebraic.py		patch \| blob \| history
src/compiler/nir/nir_search.c		patch \| blob \| history
src/compiler/nir/nir_search.h		patch \| blob \| history
src/compiler/nir/nir_search_helpers.h	[new file with mode: 0644]	patch \| blob