reassoc: Optimize x > 0x1fff || y > 0x1fff into (x | y) > 0x1fff [PR56719]
authorJakub Jelinek <jakub@redhat.com>
Thu, 31 Dec 2020 09:19:06 +0000 (10:19 +0100)
committerJakub Jelinek <jakub@redhat.com>
Thu, 31 Dec 2020 09:19:06 +0000 (10:19 +0100)
The following patch adds an optimization mentioned in PR56719 #c8.
We already have the x != 0 && y != 0 && z != 0 into (x | y | z) != 0
and x != -1 && y != -1 && y != -1 into (x & y & z) != -1
optimizations, this patch just extends that to
x < C && y < C && z < C for power of two constants C into
(x | y | z) < C (for unsigned comparisons).

I didn't want to create too many buckets (there can be TYPE_PRECISION such
constants), so the patch instead just uses one buckets for all such
constants and loops over that bucket up to TYPE_PRECISION times.

2020-12-31  Jakub Jelinek  <jakub@redhat.com>

PR tree-optimization/56719
* tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize
x < C && y < C && z < C when C is a power of two constant into
(x | y | z) < C.

* gcc.dg/tree-ssa/pr56719.c: New test.

gcc/testsuite/gcc.dg/tree-ssa/pr56719.c [new file with mode: 0644]
gcc/tree-ssa-reassoc.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr56719.c b/gcc/testsuite/gcc.dg/tree-ssa/pr56719.c
new file mode 100644 (file)
index 0000000..5030b69
--- /dev/null
@@ -0,0 +1,33 @@
+/* PR tree-optimization/56719 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " > 1023;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " > 2047;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " > 8191;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " <= 1023;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " <= 4095;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " <= 8191;" 1 "optimized" } } */
+
+int
+f1 (int x, int y)
+{
+  return x > 0x3ffU || y > 0x3ffU;
+}
+
+int
+f2 (int x, int y, int z, unsigned w)
+{
+  return x > 0x1fffU || z > 0x7ffU || w > 0x7ffU || y > 0x1fffU;
+}
+
+int
+f3 (int x, int y)
+{
+  return x <= 0x3ffU && y <= 0x3ffU;
+}
+
+int
+f4 (int x, int y, unsigned z, unsigned w)
+{
+  return x <= 0x1fffU && z <= 0xfff && w <= 0xfff && y <= 0x1fffU;
+}
index e594230436dbca2b8c3ffe786857431e09d99460..4bc9004ded754fa95bb95d1d3679280fd0304146 100644 (file)
@@ -3317,7 +3317,9 @@ optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
 }
 
 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
-   and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.  */
+   and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
+   Also, handle x < C && y < C && z < C where C is power of two as
+   (x | y | z) < C.  */
 
 static bool
 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
@@ -3333,20 +3335,44 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
 
   for (i = first; i < length; i++)
     {
+      int idx;
+
       if (ranges[i].exp == NULL_TREE
          || TREE_CODE (ranges[i].exp) != SSA_NAME
          || !ranges[i].in_p
          || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
-         || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
-         || ranges[i].low == NULL_TREE
-         || ranges[i].low != ranges[i].high)
+         || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
        continue;
 
-      bool zero_p = integer_zerop (ranges[i].low);
-      if (!zero_p && !integer_all_onesp (ranges[i].low))
+      if (ranges[i].low != NULL_TREE
+         && ranges[i].high != NULL_TREE
+         && tree_int_cst_equal (ranges[i].low, ranges[i].high))
+       {
+         idx = !integer_zerop (ranges[i].low);
+         if (idx && !integer_all_onesp (ranges[i].low))
+           continue;
+       }
+      else if (ranges[i].high != NULL_TREE
+              && TREE_CODE (ranges[i].high) == INTEGER_CST)
+       {
+         wide_int w = wi::to_wide (ranges[i].high);
+         int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
+         int l = wi::clz (w);
+         idx = 2;
+         if (l <= 0
+             || l >= prec
+             || w != wi::mask (prec - l, false, prec))
+           continue;
+         if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
+                && ranges[i].low == NULL_TREE)
+               || (ranges[i].low
+                   && integer_zerop (ranges[i].low))))
+           continue;
+       }
+      else
        continue;
 
-      b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
+      b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx;
       if (buckets.length () <= b)
        buckets.safe_grow_cleared (b + 1, true);
       if (chains.length () <= (unsigned) i)
@@ -3359,6 +3385,44 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
     if (i && chains[i - 1])
       {
        int j, k = i;
+       if ((b % 3) == 2)
+         {
+           /* When ranges[X - 1].high + 1 is a power of two,
+              we need to process the same bucket up to
+              precision - 1 times, each time split the entries
+              with the same high bound into one chain and the
+              rest into another one to be processed later.  */
+           int this_prev = i;
+           int other_prev = 0;
+           for (j = chains[i - 1]; j; j = chains[j - 1])
+             {
+               if (tree_int_cst_equal (ranges[i - 1].high,
+                                       ranges[j - 1].high))
+                 {
+                   chains[this_prev - 1] = j;
+                   this_prev = j;
+                 }
+               else if (other_prev == 0)
+                 {
+                   buckets[b] = j;
+                   other_prev = j;
+                 }
+               else
+                 {
+                   chains[other_prev - 1] = j;
+                   other_prev = j;
+                 }
+             }
+           chains[this_prev - 1] = 0;
+           if (other_prev)
+             chains[other_prev - 1] = 0;
+           if (chains[i - 1] == 0)
+             {
+               if (other_prev)
+                 b--;
+               continue;
+             }
+         }
        for (j = chains[i - 1]; j; j = chains[j - 1])
          {
            gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
@@ -3426,8 +3490,8 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
                exp = gimple_assign_lhs (g);
              }
            g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
-                                    (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
-                                    op, exp);
+                                    (b % 3) == 1
+                                    ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp);
            gimple_seq_add_stmt_without_update (&seq, g);
            op = gimple_assign_lhs (g);
          }
@@ -3435,10 +3499,13 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
        if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
                               candidates.length (), opcode, ops, op,
                               seq, true, ranges[k - 1].low,
-                              ranges[k - 1].low, strict_overflow_p))
+                              ranges[k - 1].high, strict_overflow_p))
          any_changes = true;
        else
          gimple_seq_discard (seq);
+       if ((b % 3) == 2 && buckets[b] != i)
+         /* There is more work to do for this bucket.  */
+         b--;
       }
 
   return any_changes;