meson: inline `inc_common`

[mesa.git] / src / compiler / nir / nir_opt_algebraic.py
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py

index d65c8089211c0eb079148f9c20817da8ef05fd10..3302cd8d9e1ec9d9d86cb2cb0b434f295e581f7d 100644 (file)
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -271,6 +271,25 @@ for log2 in range(1, 7): # powers of two from 2 to 64
         (('iand@32', ('iadd@32', a, b_is_multiple), mask), ('iadd', ('iand', a, mask), b)),
     ])
  
+# To save space in the state tables, reduce to the set that is known to help.
+# Previously, this was range(1, 32).  In addition, a couple rules inside the
+# loop are commented out.  Revisit someday, probably after mesa/#2635 has some
+# resolution.
+for i in [1, 2, 16, 24]:
+    lo_mask = 0xffffffff >> i
+    hi_mask = (0xffffffff << i) & 0xffffffff
+
+    optimizations.extend([
+        # This pattern seems to only help in the soft-fp64 code.
+        (('ishl@32', ('iand', 'a@32', lo_mask), i), ('ishl', a, i)),
+#        (('ushr@32', ('iand', 'a@32', hi_mask), i), ('ushr', a, i)),
+#        (('ishr@32', ('iand', 'a@32', hi_mask), i), ('ishr', a, i)),
+
+        (('iand', ('ishl', 'a@32', i), hi_mask), ('ishl', a, i)),
+        (('iand', ('ushr', 'a@32', i), lo_mask), ('ushr', a, i)),
+#        (('iand', ('ishr', 'a@32', i), lo_mask), ('ushr', a, i)), # Yes, ushr is correct
+    ])
+
  optimizations.extend([
     # This is common for address calculations.  Reassociating may enable the
     # 'a<<c' to be CSE'd.  It also helps architectures that have an ISHLADD
@@ -607,7 +626,19 @@ optimizations.extend([
     (('ine', ('ineg', ('b2i32', 'a@1')), ('ineg', ('b2i32', 'b@1'))), ('ine', a, b)),
     (('b2i32', ('ine', 'a@1', 'b@1')), ('b2i32', ('ixor', a, b))),
  
-   (('iand', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('ior', 'a@32', 'b@32'), 0), '!options->lower_bitops'),
+   (('iand', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('ior', a, b), 0), '!options->lower_bitops'),
+   (('ior',  ('ine', 'a@32', 0), ('ine', 'b@32', 0)), ('ine', ('ior', a, b), 0), '!options->lower_bitops'),
+
+   # This pattern occurs coutresy of __flt64_nonnan in the soft-fp64 code.
+   # The first part of the iand comes from the !__feq64_nonnan.
+   #
+   # The second pattern is a reformulation of the first based on the relation
+   # (a == 0 || y == 0) <=> umin(a, y) == 0, where b in the first equation
+   # happens to be y == 0.
+   (('iand', ('inot', ('iand', ('ior', ('ieq', a, 0),  b), c)), ('ilt', a, 0)),
+    ('iand', ('inot', ('iand',                         b , c)), ('ilt', a, 0))),
+   (('iand', ('inot', ('iand', ('ieq', ('umin', a, b), 0), c)), ('ilt', a, 0)),
+    ('iand', ('inot', ('iand', ('ieq',             b , 0), c)), ('ilt', a, 0))),
  
     # These patterns can result when (a < b || a < c) => (a < min(b, c))
     # transformations occur before constant propagation and loop-unrolling.
@@ -737,8 +768,6 @@ optimizations.extend([
     (('ishr', a, 0), a),
     (('ushr', 0, a), 0),
     (('ushr', a, 0), a),
-   (('iand', 0xff, ('ushr@32', a, 24)), ('ushr', a, 24)),
-   (('iand', 0xffff, ('ushr@32', a, 16)), ('ushr', a, 16)),
     (('ior', ('ishl@16', a, b), ('ushr@16', a, ('iadd', 16, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'),
     (('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), '!options->lower_rotate'),
     (('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'),
@@ -924,6 +953,7 @@ optimizations.extend([
     (('ilt', 'a(is_lt_zero)',      'b(is_not_negative)'), True),
  
     (('ult', 0, 'a(is_gt_zero)'), True),
+   (('ult', a, 0), False),
  
     # Packing and then unpacking does nothing
     (('unpack_64_2x32_split_x', ('pack_64_2x32_split', a, b)), a),
@@ -1018,6 +1048,9 @@ optimizations.extend([
     (('~fadd', '#a',          ('fadd', 'b(is_not_const)', '#c')),  ('fadd', ('fadd', a,          c),           b)),
     (('~fadd', '#a', ('fneg', ('fadd', 'b(is_not_const)', '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
     (('iadd', '#a', ('iadd', 'b(is_not_const)', '#c')), ('iadd', ('iadd', a, c), b)),
+   (('iand', '#a', ('iand', 'b(is_not_const)', '#c')), ('iand', ('iand', a, c), b)),
+   (('ior',  '#a', ('ior',  'b(is_not_const)', '#c')), ('ior',  ('ior',  a, c), b)),
+   (('ixor', '#a', ('ixor', 'b(is_not_const)', '#c')), ('ixor', ('ixor', a, c), b)),
  
     # Drop mul-div by the same value when there's no wrapping.
     (('idiv', ('imul(no_signed_wrap)', a, b), b), a),
@@ -1705,6 +1738,9 @@ late_optimizations = [
     (('ior', a, a), a),
     (('iand', a, a), a),
  
+   (('iand', ('ine(is_used_once)', 'a@32', 0), ('ine', 'b@32', 0)), ('ine', ('umin', a, b), 0)),
+   (('ior',  ('ieq(is_used_once)', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('umin', a, b), 0)),
+
     (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
  
     (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),