nir/algebraic: Fix up extract_[iu]8 after loop unrolling

author Ian Romanick <ian.d.romanick@intel.com>

Thu, 28 Feb 2019 03:52:12 +0000 (19:52 -0800)

committer Ian Romanick <ian.d.romanick@intel.com>

Sat, 9 Mar 2019 06:24:19 +0000 (22:24 -0800)
author Ian Romanick <ian.d.romanick@intel.com>
Thu, 28 Feb 2019 03:52:12 +0000 (19:52 -0800)
committer Ian Romanick <ian.d.romanick@intel.com>
Sat, 9 Mar 2019 06:24:19 +0000 (22:24 -0800)
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py

index 5b2e7ee2405378896e8283641c9302a84b2651bd..ac6e5b99220be59e6844c5675c23446cd87011f4 100644 (file)
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -614,8 +614,26 @@ optimizations = [
     (('ishr', 'a@32', 24), ('extract_i8', a, 3), '!options->lower_extract_byte'),
     (('iand', 0xff, ('ushr', a, 16)), ('extract_u8', a, 2), '!options->lower_extract_byte'),
     (('iand', 0xff, ('ushr', a,  8)), ('extract_u8', a, 1), '!options->lower_extract_byte'),
-   (('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte'),
+   (('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte')
+]
+
+# The ('extract_u8', a, 0) pattern, above, can trigger in cases where the
+# shift count is based on a loop induction variable.  Once the loop is
+# unrolled, constant folding will generate patterns like those below.
+for op in ('ushr', 'ishr'):
+   optimizations.extend([(('extract_u8', (op, 'a@16',  8),     0), ('extract_u8', a, 1))])
+   optimizations.extend([(('extract_u8', (op, 'a@32',  8 * i), 0), ('extract_u8', a, i)) for i in range(1, 4)])
+   optimizations.extend([(('extract_u8', (op, 'a@64',  8 * i), 0), ('extract_u8', a, i)) for i in range(1, 8)])
+
+optimizations.extend([(('extract_u8', ('extract_u16', a, 1), 0), ('extract_u8', a, 2))])
  
+# The ('extract_[iu]8', a, 3) patterns, above, can trigger in cases where the
+# shift count is based on a loop induction variable.  Once the loop is
+# unrolled, constant folding will generate patterns like those below.
+for op in ('extract_u8', 'extract_i8'):
+   optimizations.extend([((op, ('ishl', 'a@32', 24 - 8 * i), 3), ('extract_u8', a, i)) for i in range(2, -1, -1)])
+
+optimizations.extend([
      # Word extraction
     (('ushr', ('ishl', 'a@32', 16), 16), ('extract_u16', a, 0), '!options->lower_extract_word'),
     (('ushr', 'a@32', 16), ('extract_u16', a, 1), '!options->lower_extract_word'),
@@ -798,7 +816,7 @@ optimizations = [
       'options->lower_unpack_snorm_4x8'),
  
     (('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'),
-]
+])
  
  # bit_size dependent lowerings
  for bit_size in [8, 16, 32, 64]:
author	Ian Romanick <ian.d.romanick@intel.com>
	Thu, 28 Feb 2019 03:52:12 +0000 (19:52 -0800)
committer	Ian Romanick <ian.d.romanick@intel.com>
	Sat, 9 Mar 2019 06:24:19 +0000 (22:24 -0800)