re PR target/85572 (faster code for absolute value of __v2di)
authorJakub Jelinek <jakub@redhat.com>
Tue, 8 May 2018 12:16:19 +0000 (14:16 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Tue, 8 May 2018 12:16:19 +0000 (14:16 +0200)
PR target/85572
* config/i386/i386.c (ix86_expand_sse2_abs): Handle E_V2DImode and
E_V4DImode.
* config/i386/sse.md (abs<mode>2): Use VI_AVX2 iterator instead of
VI1248_AVX512VL_AVX512BW.  Handle V2DImode and V4DImode if not
TARGET_AVX512VL using ix86_expand_sse2_abs.  Formatting fixes.

* g++.dg/other/sse2-pr85572-1.C: New test.
* g++.dg/other/sse2-pr85572-2.C: New test.
* g++.dg/other/sse4-pr85572-1.C: New test.
* g++.dg/other/avx2-pr85572-1.C: New test.

From-SVN: r260041

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/g++.dg/other/avx2-pr85572-1.C [new file with mode: 0644]
gcc/testsuite/g++.dg/other/sse2-pr85572-1.C [new file with mode: 0644]
gcc/testsuite/g++.dg/other/sse2-pr85572-2.C [new file with mode: 0644]
gcc/testsuite/g++.dg/other/sse4-pr85572-1.C [new file with mode: 0644]

index 0ff143bf60309bb5684a250d035c3b846b52b910..8ab847b8f21bacb393e9c6d85fd8ef972e543031 100644 (file)
@@ -1,5 +1,12 @@
 2018-05-08  Jakub Jelinek  <jakub@redhat.com>
 
+       PR target/85572
+       * config/i386/i386.c (ix86_expand_sse2_abs): Handle E_V2DImode and
+       E_V4DImode.
+       * config/i386/sse.md (abs<mode>2): Use VI_AVX2 iterator instead of
+       VI1248_AVX512VL_AVX512BW.  Handle V2DImode and V4DImode if not
+       TARGET_AVX512VL using ix86_expand_sse2_abs.  Formatting fixes.
+
        PR target/85317
        * config/i386/i386.c (ix86_fold_builtin): Handle
        IX86_BUILTIN_{,P}MOVMSK{PS,PD,B}{,128,256}.
index d99ff968c904e6fed82dd6863a93eab6baf4c143..ab15ac39ae61bf506cd7240e4fb2fb96e685cd6b 100644 (file)
@@ -49837,39 +49837,70 @@ ix86_expand_sse2_abs (rtx target, rtx input)
 
   switch (mode)
     {
+    case E_V2DImode:
+    case E_V4DImode:
+      /* For 64-bit signed integer X, with SSE4.2 use
+        pxor t0, t0; pcmpgtq X, t0; pxor t0, X; psubq t0, X.
+        Otherwise handle it similarly to V4SImode, except use 64 as W instead of
+        32 and use logical instead of arithmetic right shift (which is
+        unimplemented) and subtract.  */
+      if (TARGET_SSE4_2)
+       {
+         tmp0 = gen_reg_rtx (mode);
+         tmp1 = gen_reg_rtx (mode);
+         emit_move_insn (tmp1, CONST0_RTX (mode));
+         if (mode == E_V2DImode)
+           emit_insn (gen_sse4_2_gtv2di3 (tmp0, tmp1, input));
+         else
+           emit_insn (gen_avx2_gtv4di3 (tmp0, tmp1, input));
+       }
+      else
+       {
+         tmp0 = expand_simple_binop (mode, LSHIFTRT, input,
+                                     GEN_INT (GET_MODE_UNIT_BITSIZE (mode)
+                                              - 1), NULL, 0, OPTAB_DIRECT);
+         tmp0 = expand_simple_unop (mode, NEG, tmp0, NULL, false);
+       }
+
+      tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
+                                 NULL, 0, OPTAB_DIRECT);
+      x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
+                              target, 0, OPTAB_DIRECT);
+      break;
+
+    case E_V4SImode:
       /* For 32-bit signed integer X, the best way to calculate the absolute
         value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
-      case E_V4SImode:
-       tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
-                                   GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
-                                   NULL, 0, OPTAB_DIRECT);
-       tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
-                                   NULL, 0, OPTAB_DIRECT);
-       x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
-                                target, 0, OPTAB_DIRECT);
-       break;
+      tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
+                                 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
+                                 NULL, 0, OPTAB_DIRECT);
+      tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
+                                 NULL, 0, OPTAB_DIRECT);
+      x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
+                              target, 0, OPTAB_DIRECT);
+      break;
 
+    case E_V8HImode:
       /* For 16-bit signed integer X, the best way to calculate the absolute
         value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
-      case E_V8HImode:
-       tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
+      tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
 
-       x = expand_simple_binop (mode, SMAX, tmp0, input,
-                                target, 0, OPTAB_DIRECT);
-       break;
+      x = expand_simple_binop (mode, SMAX, tmp0, input,
+                              target, 0, OPTAB_DIRECT);
+      break;
 
+    case E_V16QImode:
       /* For 8-bit signed integer X, the best way to calculate the absolute
         value of X is min ((unsigned char) X, (unsigned char) (-X)),
         as SSE2 provides the PMINUB insn.  */
-      case E_V16QImode:
-       tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
+      tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
 
-       x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
-                                target, 0, OPTAB_DIRECT);
-       break;
+      x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
+                              target, 0, OPTAB_DIRECT);
+      break;
 
-      default:
-       gcc_unreachable ();
+    default:
+      gcc_unreachable ();
     }
 
   if (x != target)
index aab4261343e68bd16d7b351cdb10e764bc2ac429..ae6294e559cbb65b1e970975c2b0248a4f5a5f8d 100644 (file)
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "abs<mode>2"
-  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
-       (abs:VI1248_AVX512VL_AVX512BW
-         (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
+  [(set (match_operand:VI_AVX2 0 "register_operand")
+       (abs:VI_AVX2
+         (match_operand:VI_AVX2 1 "vector_operand")))]
   "TARGET_SSE2"
 {
-  if (!TARGET_SSSE3)
+  if (!TARGET_SSSE3
+      || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
+         && !TARGET_AVX512VL))
     {
       ix86_expand_sse2_abs (operands[0], operands[1]);
       DONE;
index c6d35858e4c009424255940723824264075fd8bd..96a373ca9435b8b104110fca5decca8d1ef9a070 100644 (file)
@@ -1,5 +1,11 @@
 2018-05-08  Jakub Jelinek  <jakub@redhat.com>
 
+       PR target/85572
+       * g++.dg/other/sse2-pr85572-1.C: New test.
+       * g++.dg/other/sse2-pr85572-2.C: New test.
+       * g++.dg/other/sse4-pr85572-1.C: New test.
+       * g++.dg/other/avx2-pr85572-1.C: New test.
+
        PR target/85317
        * gcc.target/i386/pr85317.c: New test.
        * gcc.target/i386/avx2-vpmovmskb-2.c (avx2_test): Add asm volatile
diff --git a/gcc/testsuite/g++.dg/other/avx2-pr85572-1.C b/gcc/testsuite/g++.dg/other/avx2-pr85572-1.C
new file mode 100644 (file)
index 0000000..adde2ea
--- /dev/null
@@ -0,0 +1,21 @@
+// PR target/85572
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-O2 -mavx2 -mno-avx512f" }
+// { dg-final { scan-assembler-times {\mvpxor\M} 4 } }
+// { dg-final { scan-assembler-times {\mvpcmpgtq\M} 2 } }
+// { dg-final { scan-assembler-times {\mvpsubq\M} 2 } }
+
+typedef long long V __attribute__((vector_size (16)));
+typedef long long W __attribute__((vector_size (32)));
+
+V
+foo (V x)
+{
+  return x < 0 ? -x : x;
+}
+
+W
+bar (W x)
+{
+  return x < 0 ? -x : x;
+}
diff --git a/gcc/testsuite/g++.dg/other/sse2-pr85572-1.C b/gcc/testsuite/g++.dg/other/sse2-pr85572-1.C
new file mode 100644 (file)
index 0000000..e4c4423
--- /dev/null
@@ -0,0 +1,14 @@
+// PR target/85572
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-O2 -msse2 -mno-sse3" }
+// { dg-final { scan-assembler-times {\mpxor\M} 2 } }
+// { dg-final { scan-assembler-times {\mpsubq\M} 2 } }
+// { dg-final { scan-assembler-times {\mpsrlq\M} 1 } }
+
+typedef long long V __attribute__((vector_size (16)));
+
+V
+foo (V x)
+{
+  return x < 0 ? -x : x;
+}
diff --git a/gcc/testsuite/g++.dg/other/sse2-pr85572-2.C b/gcc/testsuite/g++.dg/other/sse2-pr85572-2.C
new file mode 100644 (file)
index 0000000..8eb399b
--- /dev/null
@@ -0,0 +1,45 @@
+// PR target/85572
+// { dg-do run { target i?86-*-* x86_64-*-* } }
+// { dg-options "-O2 -msse2" }
+// { dg-require-effective-target sse2_runtime }
+
+typedef long long V __attribute__((vector_size (16)));
+typedef long long W __attribute__((vector_size (32)));
+
+__attribute__((noipa)) V
+foo (V x)
+{
+  return x < 0 ? -x : x;
+}
+
+__attribute__((noipa)) void
+bar (W *x, W *y)
+{
+  *y = *x < 0 ? -*x : *x;
+}
+
+int
+main ()
+{
+  V a = { 11LL, -15LL };
+  V b = foo (a);
+  if (b[0] != 11LL || b[1] != 15LL)
+    __builtin_abort ();
+  V c = { -123456789123456LL, 654321654321654LL };
+  V d = foo (c);
+  if (d[0] != 123456789123456LL || d[1] != 654321654321654LL)
+    __builtin_abort ();
+  V e = { 0, 1 };
+  V f = foo (e);
+  if (f[0] != 0 || f[1] != 1)
+    __builtin_abort ();
+  W g = { 17LL, -32LL, -123456789123456LL, 654321654321654LL }, h;
+  bar (&g, &h);
+  if (h[0] != 17LL || h[1] != 32LL
+      || h[2] != 123456789123456LL || h[3] != 654321654321654LL)
+    __builtin_abort ();
+  W i = { 0, 1, -1, 0 }, j;
+  bar (&i, &j);
+  if (j[0] != 0 || j[1] != 1 || j[2] != 1 || j[3] != 0)
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/g++.dg/other/sse4-pr85572-1.C b/gcc/testsuite/g++.dg/other/sse4-pr85572-1.C
new file mode 100644 (file)
index 0000000..fe0adae
--- /dev/null
@@ -0,0 +1,14 @@
+// PR target/85572
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-O2 -msse4 -mno-avx" }
+// { dg-final { scan-assembler-times {\mpxor\M} 2 } }
+// { dg-final { scan-assembler-times {\mpcmpgtq\M} 1 } }
+// { dg-final { scan-assembler-times {\mpsubq\M} 1 } }
+
+typedef long long V __attribute__((vector_size (16)));
+
+V
+foo (V x)
+{
+  return x < 0 ? -x : x;
+}