re PR target/58762 ([missed optimization] Vectorizing abs(int).)
authorCong Hou <congh@google.com>
Wed, 30 Oct 2013 20:01:47 +0000 (16:01 -0400)
committerCong Hou <congh@gcc.gnu.org>
Wed, 30 Oct 2013 20:01:47 +0000 (16:01 -0400)
2013-10-30  Cong Hou  <congh@google.com>

    PR target/58762
    * config/i386/i386-protos.h (ix86_expand_sse2_abs): New function.
    * config/i386/i386.c (ix86_expand_sse2_abs): New function.
    * config/i386/sse.md: Add SSE2 support to abs (8/16/32-bit-int).

2013-10-30  Cong Hou  <congh@google.com>

    * gcc.target/i386/vect-abs-s8.c: New test.
    * gcc.target/i386/vect-abs-s16.c: New test.
    * gcc.target/i386/vect-abs-s32.c: New test.

From-SVN: r204229

gcc/ChangeLog
gcc/config/i386/i386-protos.h
gcc/config/i386/i386.c
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/vect-abs-s16.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/vect-abs-s32.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/vect-abs-s8.c [new file with mode: 0644]

index e3d2abda205e59a7ccb3f0f3a9ceb84ed2073870..0bf4cbc56d5b478628c2e1cb3d9ce9a3e2f153a8 100644 (file)
@@ -1,3 +1,10 @@
+2013-10-30  Cong Hou  <congh@google.com>
+
+       PR target/58762
+       * config/i386/i386-protos.h (ix86_expand_sse2_abs): New function.
+       * config/i386/i386.c (ix86_expand_sse2_abs): New function.
+       * config/i386/sse.md: Add SSE2 support to abs (8/16/32-bit-int).
+
 2013-10-18  Mikael Pettersson  <mikpelinux@gmail.com>
 
        PR rtl-optimization/58369
index b1d014a1a27f558716968c92beee10848956d95b..5799251404bed6e2b0f003f9f5f260f604d2b3f3 100644 (file)
@@ -240,6 +240,7 @@ extern void ix86_expand_mul_widen_evenodd (rtx, rtx, rtx, bool, bool);
 extern void ix86_expand_mul_widen_hilo (rtx, rtx, rtx, bool, bool);
 extern void ix86_expand_sse2_mulv4si3 (rtx, rtx, rtx);
 extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx, rtx);
+extern void ix86_expand_sse2_abs (rtx, rtx);
 
 extern bool ix86_bnd_prefixed_insn_p (rtx);
 
index 93a8b288c05dfc0ba29b7744c69571f428dfd510..3131efd2b5ccf7f8ebdd78d43395468a54365a5b 100644 (file)
@@ -42020,6 +42020,53 @@ ix86_bnd_prefixed_insn_p (rtx insn ATTRIBUTE_UNUSED)
   return false;
 }
 
+void
+ix86_expand_sse2_abs (rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx tmp0, tmp1;
+
+  switch (mode)
+    {
+      /* For 32-bit signed integer X, the best way to calculate the absolute
+        value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
+      case V4SImode:
+       tmp0 = expand_simple_binop (mode, ASHIFTRT, op1,
+                                   GEN_INT (GET_MODE_BITSIZE
+                                                (GET_MODE_INNER (mode)) - 1),
+                                   NULL, 0, OPTAB_DIRECT);
+       if (tmp0)
+         tmp1 = expand_simple_binop (mode, XOR, op1, tmp0,
+                                     NULL, 0, OPTAB_DIRECT);
+       if (tmp0 && tmp1)
+         expand_simple_binop (mode, MINUS, tmp1, tmp0,
+                              op0, 0, OPTAB_DIRECT);
+       break;
+
+      /* For 16-bit signed integer X, the best way to calculate the absolute
+        value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
+      case V8HImode:
+       tmp0 = expand_unop (mode, neg_optab, op1, NULL_RTX, 0);
+       if (tmp0)
+         expand_simple_binop (mode, SMAX, op1, tmp0, op0, 0,
+                              OPTAB_DIRECT);
+       break;
+
+      /* For 8-bit signed integer X, the best way to calculate the absolute
+        value of X is min ((unsigned char) X, (unsigned char) (-X)),
+        as SSE2 provides the PMINUB insn.  */
+      case V16QImode:
+       tmp0 = expand_unop (mode, neg_optab, op1, NULL_RTX, 0);
+       if (tmp0)
+         expand_simple_binop (V16QImode, UMIN, op1, tmp0, op0, 0,
+                              OPTAB_DIRECT);
+       break;
+
+      default:
+       break;
+    }
+}
+
 /* Expand an insert into a vector register through pinsr insn.
    Return true if successful.  */
 
index 041ca64749328179a21e95c1e82b88ebe6c70856..584a01116e6b35e9eb1fdf6530bdcfc4ab879b8a 100644 (file)
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
    (set_attr "mode" "DI")])
 
-(define_insn "abs<mode>2"
+(define_insn "*abs<mode>2"
   [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
        (abs:VI124_AVX2_48_AVX512F
          (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "abs<mode>2"
+  [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
+       (abs:VI124_AVX2_48_AVX512F
+         (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
+  "TARGET_SSE2"
+{
+  if (!TARGET_SSSE3)
+    {
+      ix86_expand_sse2_abs (operands[0], operands[1]);
+      DONE;
+    }
+})
+
 (define_insn "abs<mode>2"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
        (abs:MMXMODEI
index 690c6fc29291c18537fd5badb1e9f61fe0bd998c..2b56fbf8b90b7d5a5fbb2684003f6b3bafaffb22 100644 (file)
@@ -1,3 +1,9 @@
+2013-10-30  Cong Hou  <congh@google.com>
+
+       * gcc.target/i386/vect-abs-s8.c: New test.
+       * gcc.target/i386/vect-abs-s16.c: New test.
+       * gcc.target/i386/vect-abs-s32.c: New test.
+
 2013-10-30  Tobias Burnus  <burnus@net-b.de>
 
        * gcc.dg/cilk-plus/cilk-plus.exp: Add the libcilkrts library
diff --git a/gcc/testsuite/gcc.target/i386/vect-abs-s16.c b/gcc/testsuite/gcc.target/i386/vect-abs-s16.c
new file mode 100644 (file)
index 0000000..191ae34
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+
+
+void test (short* a, short* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-abs-s32.c b/gcc/testsuite/gcc.target/i386/vect-abs-s32.c
new file mode 100644 (file)
index 0000000..575e8ef
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+
+
+void test (int* a, int* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-abs-s8.c b/gcc/testsuite/gcc.target/i386/vect-abs-s8.c
new file mode 100644 (file)
index 0000000..3f3f3fa
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+
+
+void test (char* a, char* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */