middle-end: Parity and popcount folding optimizations.
authorRoger Sayle <roger@nextmovesoftware.com>
Tue, 28 Jul 2020 21:55:12 +0000 (22:55 +0100)
committerRoger Sayle <roger@nextmovesoftware.com>
Tue, 28 Jul 2020 21:56:24 +0000 (22:56 +0100)
This patch implements several constant folding optimizations
for __builtin_parity and friends.  We canonicalize popcount(x)&1
as parity(x) in gimple, and potentially convert back again when
we expand to RTL.  parity(~x) is simplified to parity(x), which
is true for all integer modes with an even number of bits.
But probably most usefully, parity(x)^parity(y) can be simplified
to a parity(x^y), requiring only a single libcall or popcount.

This patch optimizes popcount and parity of an argument known to have
at most a single bit set, to be that single bit.  Hence, popcount(x&8)
is simplified to (x>>3)&1.   This generalizes the existing optimization
of popcount(x&1) being simplified to x&1, which is cleaned up with
this patch.

2020-07-28  Roger Sayle  <roger@nextmovesoftware.com>
    Richard Biener  <rguenther@suse.de>

gcc/ChangeLog
* match.pd (popcount(x)&1 -> parity(x)): New simplification.
(parity(~x) -> parity(x)): New simplification.
(parity(x)^parity(y) -> parity(x^y)): New simplification.
(parity(x&1) -> x&1): New simplification.
(popcount(x) -> x>>C): New simplification.

gcc/testsuite/ChangeLog
* gcc.dg/fold-popcount-5.c: New test.
* gcc.dg/fold-parity-1.c: Likewise.
* gcc.dg/fold-parity-2.c: Likewise.
* gcc.dg/fold-parity-3.c: Likewise.
* gcc.dg/fold-parity-4.c: Likewise.
* gcc.dg/fold-parity-5.c: Likewise.

gcc/match.pd
gcc/testsuite/gcc.dg/fold-parity-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/fold-parity-2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/fold-parity-3.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/fold-parity-4.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/fold-parity-5.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/fold-popcount-5.c [new file with mode: 0644]

index c6ae7a7db7aee88b8d42669133ddfe3f70f8761f..a052c9e3dbcc49e32df6f0c39f68f9b725d317c3 100644 (file)
@@ -5964,25 +5964,51 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (IFN_FMA @0 @1 @2))))
 
 /* POPCOUNT simplifications.  */
-(for popcount (BUILT_IN_POPCOUNT BUILT_IN_POPCOUNTL BUILT_IN_POPCOUNTLL
-              BUILT_IN_POPCOUNTIMAX)
-  /* popcount(X&1) is nop_expr(X&1).  */
-  (simplify
-    (popcount @0)
-    (if (tree_nonzero_bits (@0) == 1)
-      (convert @0)))
-  /* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero.  */
-  (simplify
-    (plus (popcount:s @0) (popcount:s @1))
-    (if (wi::bit_and (tree_nonzero_bits (@0), tree_nonzero_bits (@1)) == 0)
-      (popcount (bit_ior @0 @1))))
-  /* popcount(X) == 0 is X == 0, and related (in)equalities.  */
+/* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero.  */
+(simplify
+  (plus (POPCOUNT:s @0) (POPCOUNT:s @1))
+  (if (wi::bit_and (tree_nonzero_bits (@0), tree_nonzero_bits (@1)) == 0)
+    (POPCOUNT (bit_ior @0 @1))))
+
+/* popcount(X) == 0 is X == 0, and related (in)equalities.  */
+(for popcount (POPCOUNT)
   (for cmp (le eq ne gt)
        rep (eq eq ne ne)
     (simplify
       (cmp (popcount @0) integer_zerop)
       (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
 
+/* Canonicalize POPCOUNT(x)&1 as PARITY(X).  */
+(simplify
+  (bit_and (POPCOUNT @0) integer_onep)
+  (PARITY @0))
+
+/* PARITY simplifications.  */
+/* parity(~X) is parity(X).  */
+(simplify
+  (PARITY (bit_not @0))
+  (PARITY @0))
+
+/* parity(X)^parity(Y) is parity(X^Y).  */
+(simplify
+  (bit_xor (PARITY:s @0) (PARITY:s @1))
+  (PARITY (bit_xor @0 @1)))
+
+/* Common POPCOUNT/PARITY simplifications.  */
+/* popcount(X&C1) is (X>>C2)&1 when C1 == 1<<C2.  Same for parity(X&C1).  */
+(for pfun (POPCOUNT PARITY)
+  (simplify
+    (pfun @0)
+    (with { wide_int nz = tree_nonzero_bits (@0); }
+      (switch
+       (if (nz == 1)
+         (convert @0))
+       (if (wi::popcount (nz) == 1)
+         (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
+           (convert (rshift:utype (convert:utype @0)
+                                  { build_int_cst (integer_type_node,
+                                                   wi::ctz (nz)); }))))))))
+
 #if GIMPLE
 /* 64- and 32-bits branchless implementations of popcount are detected:
 
diff --git a/gcc/testsuite/gcc.dg/fold-parity-1.c b/gcc/testsuite/gcc.dg/fold-parity-1.c
new file mode 100644 (file)
index 0000000..3ba56c7
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-original" } */
+
+int foo(unsigned int x)
+{
+  return __builtin_popcount(x) & 1;
+}
+
+int fool(unsigned long x)
+{
+  return __builtin_popcountl(x) & 1;
+}
+
+int fooll(unsigned long long x)
+{
+  return __builtin_popcountll(x) & 1;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_popcount" 0 "original" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "original" } } */
+
diff --git a/gcc/testsuite/gcc.dg/fold-parity-2.c b/gcc/testsuite/gcc.dg/fold-parity-2.c
new file mode 100644 (file)
index 0000000..8c7acbf
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+  return __builtin_parity(~x);
+}
+
+int fool(unsigned long x)
+{
+  return __builtin_parityl(~x);
+}
+
+int fooll(unsigned long long x)
+{
+  return __builtin_parityll(~x);
+}
+
+/* { dg-final { scan-tree-dump-times "~" 0 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/fold-parity-3.c b/gcc/testsuite/gcc.dg/fold-parity-3.c
new file mode 100644 (file)
index 0000000..e0355cc
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+  return __builtin_parity(x&1);
+}
+
+int fool(unsigned long x)
+{
+  return __builtin_parityl(x&1);
+}
+
+int fooll(unsigned long long x)
+{
+  return __builtin_parityll(x&1);
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_parity" 0 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/fold-parity-4.c b/gcc/testsuite/gcc.dg/fold-parity-4.c
new file mode 100644 (file)
index 0000000..5dfedab
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x, unsigned int y)
+{
+  return __builtin_parity(x) ^ __builtin_parity(y);
+}
+
+int fool(unsigned long x, unsigned long y)
+{
+  return __builtin_parityl(x) ^ __builtin_parityl(y);
+}
+
+int fooll(unsigned long long x, unsigned long long y)
+{
+  return __builtin_parityll(x) ^ __builtin_parityll(y);
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/fold-parity-5.c b/gcc/testsuite/gcc.dg/fold-parity-5.c
new file mode 100644 (file)
index 0000000..69d3a6a
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int test_and4(unsigned int a)
+{
+  return __builtin_parity(a&4);
+}
+
+int test_and4l(unsigned long b)
+{
+  return __builtin_parityl(b&4);
+}
+
+int test_and4ll(unsigned long long c)
+{
+  return __builtin_parityll(c&4);
+}
+
+int test_shift(unsigned int d)
+{
+  int bits = 8*sizeof(unsigned int)-1;
+  return __builtin_parity(d<<31);
+}
+
+int test_shiftl(unsigned long e)
+{
+  int bits = 8*sizeof(unsigned long)-1;
+  return __builtin_parityl(e<<bits);
+}
+
+int test_shiftll(unsigned long long f)
+{
+  int bits = 8*sizeof(unsigned long long)-1;
+  return __builtin_parityll(f<<bits);
+}
+
+/* { dg-final { scan-tree-dump-times "parity" 0 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-5.c b/gcc/testsuite/gcc.dg/fold-popcount-5.c
new file mode 100644 (file)
index 0000000..943726f
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int test_and4(unsigned int a)
+{
+  return __builtin_popcount(a&4);
+}
+
+int test_and4l(unsigned long b)
+{
+  return __builtin_popcountl(b&4);
+}
+
+int test_and4ll(unsigned long long c)
+{
+  return __builtin_popcountll(c&4);
+}
+
+int test_shift(unsigned int d)
+{
+  int bits = 8*sizeof(unsigned int)-1;
+  return __builtin_popcount(d<<31);
+}
+
+int test_shiftl(unsigned long e)
+{
+  int bits = 8*sizeof(unsigned long)-1;
+  return __builtin_popcountl(e<<bits);
+}
+
+int test_shiftll(unsigned long long f)
+{
+  int bits = 8*sizeof(unsigned long long)-1;
+  return __builtin_popcountll(f<<bits);
+}
+
+/* { dg-final { scan-tree-dump-times "popcount" 0 "optimized" } } */
+