re PR target/82498 (Missed optimization for x86 rotate instruction)
authorJakub Jelinek <jakub@redhat.com>
Thu, 12 Oct 2017 19:10:34 +0000 (21:10 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Thu, 12 Oct 2017 19:10:34 +0000 (21:10 +0200)
PR target/82498
* config/i386/i386.md (*ashl<mode>3_mask_1,
*<shift_insn><mode>3_mask_1, *<rotate_insn><mode>3_mask_1,
*<btsc><mode>_mask_1, *btr<mode>_mask_1): New define_insn_and_split
patterns.

* gcc.target/i386/pr82498-1.c: New test.
* gcc.target/i386/pr82498-2.c: New test.

From-SVN: r253695

gcc/ChangeLog
gcc/config/i386/i386.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr82498-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr82498-2.c [new file with mode: 0644]

index 92cdc5f0c9a9942aa9aee420c458aa1c7d3c695b..a9dbfce45c34a3869730a04c5393d17e38feeebd 100644 (file)
@@ -1,3 +1,11 @@
+2017-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/82498
+       * config/i386/i386.md (*ashl<mode>3_mask_1,
+       *<shift_insn><mode>3_mask_1, *<rotate_insn><mode>3_mask_1,
+       *<btsc><mode>_mask_1, *btr<mode>_mask_1): New define_insn_and_split
+       patterns.
+
 2017-10-12  Jan Hubicka  <hubicka@ucw.cz>
 
        * profile-count.h (safe_scale_64bit): Fix GCC4.x path.
index 9e1f85f2993b93ef101295c4ad8773bbe77b8256..2fa982c3b65564174a2caeae262c1d3e7c2e17ac 100644 (file)
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
+(define_insn_and_split "*ashl<mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+       (ashift:SWI48
+         (match_operand:SWI48 1 "nonimmediate_operand")
+         (and:QI
+           (match_operand:QI 2 "register_operand")
+           (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+          (ashift:SWI48 (match_dup 1)
+                        (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn "*bmi2_ashl<mode>3_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
        (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
+(define_insn_and_split "*<shift_insn><mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+       (any_shiftrt:SWI48
+         (match_operand:SWI48 1 "nonimmediate_operand")
+         (and:QI
+           (match_operand:QI 2 "register_operand")
+           (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+          (any_shiftrt:SWI48 (match_dup 1)
+                             (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn_and_split "*<shift_insn><mode>3_doubleword"
   [(set (match_operand:DWI 0 "register_operand" "=&r")
        (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
+(define_insn_and_split "*<rotate_insn><mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+       (any_rotate:SWI48
+         (match_operand:SWI48 1 "nonimmediate_operand")
+         (and:QI
+           (match_operand:QI 2 "register_operand")
+           (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+          (any_rotate:SWI48 (match_dup 1)
+                            (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 ;; Implement rotation using two double-precision
 ;; shift instructions and a scratch register.
 
       (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = gen_lowpart (QImode, operands[1]);")
 
+(define_insn_and_split "*<btsc><mode>_mask_1"
+  [(set (match_operand:SWI48 0 "register_operand")
+       (any_or:SWI48
+         (ashift:SWI48
+           (const_int 1)
+           (and:QI
+             (match_operand:QI 1 "register_operand")
+             (match_operand:QI 2 "const_int_operand")))
+         (match_operand:SWI48 3 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT
+   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+          (any_or:SWI48
+            (ashift:SWI48 (const_int 1)
+                          (match_dup 1))
+            (match_dup 3)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn "*btr<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
        (and:SWI48
       (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = gen_lowpart (QImode, operands[1]);")
 
+(define_insn_and_split "*btr<mode>_mask_1"
+  [(set (match_operand:SWI48 0 "register_operand")
+       (and:SWI48
+         (rotate:SWI48
+           (const_int -2)
+           (and:QI
+             (match_operand:QI 1 "register_operand")
+             (match_operand:QI 2 "const_int_operand")))
+         (match_operand:SWI48 3 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT
+   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+          (and:SWI48
+            (rotate:SWI48 (const_int -2)
+                          (match_dup 1))
+            (match_dup 3)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 ;; These instructions are never faster than the corresponding
 ;; and/ior/xor operations when using immediate operand, so with
 ;; 32-bit there's no point.  But in 64-bit, we can't hold the
index c8043641a4b02efe0e015b09c6e5c68faabe3431..9e77fa669e50a60f24bfd7f0ab0afa62bfd40827 100644 (file)
@@ -1,3 +1,9 @@
+2017-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/82498
+       * gcc.target/i386/pr82498-1.c: New test.
+       * gcc.target/i386/pr82498-2.c: New test.
+
 2017-10-12  Jan Hubicka  <hubicka@ucw.cz>
 
        * gcc.dg/predict-13.c: Update template for probaility change.
diff --git a/gcc/testsuite/gcc.target/i386/pr82498-1.c b/gcc/testsuite/gcc.target/i386/pr82498-1.c
new file mode 100644 (file)
index 0000000..78a6698
--- /dev/null
@@ -0,0 +1,52 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
+
+unsigned
+f1 (unsigned x, unsigned char y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f2 (unsigned x, unsigned y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f3 (unsigned x, unsigned short y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f4 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f5 (unsigned x, unsigned int y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f6 (unsigned x, unsigned short y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr82498-2.c b/gcc/testsuite/gcc.target/i386/pr82498-2.c
new file mode 100644 (file)
index 0000000..9e065ee
--- /dev/null
@@ -0,0 +1,46 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
+
+int
+f1 (int x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x >> y;
+}
+
+unsigned
+f2 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x >> y;
+}
+
+unsigned
+f3 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x << y;
+}
+
+unsigned
+f4 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x | (1U << y);
+}
+
+unsigned
+f5 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x ^ (1U << y);
+}
+
+unsigned
+f6 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x + 2) & ~(1U << y);
+}