combine: Fix up simplify_shift_const_1 for nested ROTATEs [PR97386]
authorJakub Jelinek <jakub@redhat.com>
Tue, 13 Oct 2020 17:13:26 +0000 (19:13 +0200)
committerJakub Jelinek <jakub@redhat.com>
Tue, 13 Oct 2020 17:13:26 +0000 (19:13 +0200)
The following testcases are miscompiled (the first one since my improvements
to rotate discovery on GIMPLE, the other one for many years) because
combiner optimizes nested ROTATEs with narrowing SUBREG in between (i.e.
the outer rotate is performed in shorter precision than the inner one) to
just one ROTATE of the rotated constant.  While that (under certain
conditions) can work for shifts, it can't work for rotates where we can only
do that with rotates of the same precision.

2020-10-13  Jakub Jelinek  <jakub@redhat.com>

PR rtl-optimization/97386
* combine.c (simplify_shift_const_1): Don't optimize nested ROTATEs if
they have different modes.

* gcc.c-torture/execute/pr97386-1.c: New test.
* gcc.c-torture/execute/pr97386-2.c: New test.

gcc/combine.c
gcc/testsuite/gcc.c-torture/execute/pr97386-1.c [new file with mode: 0644]
gcc/testsuite/gcc.c-torture/execute/pr97386-2.c [new file with mode: 0644]

index c88382efbd3bdf1a1801f1837f244451922879ef..4782e1d9dccb0aa21d75a6aff4d2769c60242e78 100644 (file)
@@ -11003,8 +11003,11 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
                break;
              /* For ((int) (cstLL >> count)) >> cst2 just give up.  Queuing
                 up outer sign extension (often left and right shift) is
-                hardly more efficient than the original.  See PR70429.  */
-             if (code == ASHIFTRT && int_mode != int_result_mode)
+                hardly more efficient than the original.  See PR70429.
+                Similarly punt for rotates with different modes.
+                See PR97386.  */
+             if ((code == ASHIFTRT || code == ROTATE)
+                 && int_mode != int_result_mode)
                break;
 
              rtx count_rtx = gen_int_shift_amount (int_result_mode, count);
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97386-1.c b/gcc/testsuite/gcc.c-torture/execute/pr97386-1.c
new file mode 100644 (file)
index 0000000..c50e038
--- /dev/null
@@ -0,0 +1,16 @@
+/* PR rtl-optimization/97386 */
+
+__attribute__((noipa)) unsigned char
+foo (unsigned int c)
+{
+  return __builtin_bswap16 ((unsigned long long) (0xccccLLU << c | 0xccccLLU >> ((-c) & 63)));
+}
+
+int
+main ()
+{
+  unsigned char x = foo (0);
+  if (__CHAR_BIT__ == 8 && __SIZEOF_SHORT__ == 2 && x != 0xcc)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97386-2.c b/gcc/testsuite/gcc.c-torture/execute/pr97386-2.c
new file mode 100644 (file)
index 0000000..e61829d
--- /dev/null
@@ -0,0 +1,20 @@
+/* PR rtl-optimization/97386 */
+
+__attribute__((noipa)) unsigned
+foo (int x)
+{
+  unsigned long long a = (0x800000000000ccccULL << x) | (0x800000000000ccccULL >> (64 - x));
+  unsigned int b = a;
+  return (b << 24) | (b >> 8);
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ == 8
+      && __SIZEOF_INT__ == 4
+      &&  __SIZEOF_LONG_LONG__ == 8
+      && foo (1) != 0x99000199U)
+    __builtin_abort ();
+  return 0;
+}