re PR target/88547 (missed optimization for vector comparisons)
authorJakub Jelinek <jakub@redhat.com>
Fri, 21 Dec 2018 10:37:11 +0000 (11:37 +0100)
committerJakub Jelinek <jakub@gcc.gnu.org>
Fri, 21 Dec 2018 10:37:11 +0000 (11:37 +0100)
PR target/88547
* config/i386/i386.c (ix86_expand_int_sse_cmp): Optimize
x > y ? 0 : -1 into min (x, y) == x ? -1 : 0.

* gcc.target/i386/pr88547-1.c: Expect only 2 knotb and 2 knotw
insns instead of 4, check for vpminud, vpminuq and no vpsubd or
vpsubq.
* gcc.target/i386/sse2-pr88547-1.c: New test.
* gcc.target/i386/sse2-pr88547-2.c: New test.
* gcc.target/i386/sse4_1-pr88547-1.c: New test.
* gcc.target/i386/sse4_1-pr88547-2.c: New test.
* gcc.target/i386/avx2-pr88547-1.c: New test.
* gcc.target/i386/avx2-pr88547-2.c: New test.
* gcc.target/i386/avx512f-pr88547-2.c: New test.
* gcc.target/i386/avx512vl-pr88547-1.c: New test.
* gcc.target/i386/avx512vl-pr88547-2.c: New test.
* gcc.target/i386/avx512vl-pr88547-3.c: New test.
* gcc.target/i386/avx512f_cond_move.c (y): Change from unsigned int
array to int array.

From-SVN: r267322

15 files changed:
gcc/ChangeLog
gcc/config/i386/i386.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx2-pr88547-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx2-pr88547-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-pr88547-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f_cond_move.c
gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88547-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88547-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88547-1.c
gcc/testsuite/gcc.target/i386/sse2-pr88547-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse2-pr88547-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse4_1-pr88547-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse4_1-pr88547-2.c [new file with mode: 0644]

index c6641033e5e9d78783998ab592c90291c0cf9162..7f09e0066fb362408a14146c0c3b2b0914389cc7 100644 (file)
@@ -1,3 +1,9 @@
+2018-12-21  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/88547
+       * config/i386/i386.c (ix86_expand_int_sse_cmp): Optimize
+       x > y ? 0 : -1 into min (x, y) == x ? -1 : 0.
+
 2018-12-21  Jan Beulich  <jbeulich@suse.com>
 
        * config/i386/sse.md (vaesdec_<mode>, vaesdeclast_<mode>): Allow
index 0ea3bb3a4bce81d1a942053f8c3c49e9d1e145c1..02e24bc5796dc4d96eacc07fc6699bd6a012cedf 100644 (file)
@@ -24126,6 +24126,104 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
            }
        }
 
+      rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode);
+      rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode);
+      if (*negate)
+       std::swap (optrue, opfalse);
+
+      /* Transform x > y ? 0 : -1 (i.e. x <= y ? -1 : 0 or x <= y) when
+        not using integer masks into min (x, y) == x ? -1 : 0 (i.e.
+        min (x, y) == x).  While we add one instruction (the minimum),
+        we remove the need for two instructions in the negation, as the
+        result is done this way.
+        When using masks, do it for SI/DImode element types, as it is shorter
+        than the two subtractions.  */
+      if ((code != EQ
+          && GET_MODE_SIZE (mode) != 64
+          && vector_all_ones_operand (opfalse, data_mode)
+          && optrue == CONST0_RTX (data_mode))
+         || (code == GTU
+             && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4
+             /* Don't do it if not using integer masks and we'd end up with
+                the right values in the registers though.  */
+             && (GET_MODE_SIZE (mode) == 64
+                 || !vector_all_ones_operand (optrue, data_mode)
+                 || opfalse != CONST0_RTX (data_mode))))
+       {
+         rtx (*gen) (rtx, rtx, rtx) = NULL;
+
+         switch (mode)
+           {
+           case E_V16SImode:
+             gen = (code == GTU) ? gen_uminv16si3 : gen_sminv16si3;
+             break;
+           case E_V8DImode:
+             gen = (code == GTU) ? gen_uminv8di3 : gen_sminv8di3;
+             cop0 = force_reg (mode, cop0);
+             cop1 = force_reg (mode, cop1);
+             break;
+           case E_V32QImode:
+             if (TARGET_AVX2)
+               gen = (code == GTU) ? gen_uminv32qi3 : gen_sminv32qi3;
+             break;
+           case E_V16HImode:
+             if (TARGET_AVX2)
+               gen = (code == GTU) ? gen_uminv16hi3 : gen_sminv16hi3;
+             break;
+           case E_V8SImode:
+             if (TARGET_AVX2)
+               gen = (code == GTU) ? gen_uminv8si3 : gen_sminv8si3;
+             break;
+           case E_V4DImode:
+             if (TARGET_AVX512VL)
+               {
+                 gen = (code == GTU) ? gen_uminv4di3 : gen_sminv4di3;
+                 cop0 = force_reg (mode, cop0);
+                 cop1 = force_reg (mode, cop1);
+               }
+             break;
+           case E_V16QImode:
+             if (code == GTU && TARGET_SSE2)
+               gen = gen_uminv16qi3;
+             else if (code == GT && TARGET_SSE4_1)
+               gen = gen_sminv16qi3;
+             break;
+           case E_V8HImode:
+             if (code == GTU && TARGET_SSE4_1)
+               gen = gen_uminv8hi3;
+             else if (code == GT && TARGET_SSE2)
+               gen = gen_sminv8hi3;
+             break;
+           case E_V4SImode:
+             if (TARGET_SSE4_1)
+               gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3;
+             break;
+           case E_V2DImode:
+             if (TARGET_AVX512VL)
+               {
+                 gen = (code == GTU) ? gen_uminv2di3 : gen_sminv2di3;
+                 cop0 = force_reg (mode, cop0);
+                 cop1 = force_reg (mode, cop1);
+               }
+             break;
+           default:
+             break;
+           }
+
+         if (gen)
+           {
+             rtx tem = gen_reg_rtx (mode);
+             if (!vector_operand (cop0, mode))
+               cop0 = force_reg (mode, cop0);
+             if (!vector_operand (cop1, mode))
+               cop1 = force_reg (mode, cop1);
+             *negate = !*negate;
+             emit_insn (gen (tem, cop0, cop1));
+             cop1 = tem;
+             code = EQ;
+           }
+       }
+
       /* Unsigned parallel compare is not supported by the hardware.
         Play some tricks to turn this into a signed comparison
         against 0.  */
index d814641a966159b746d5bd314e6c010533240e7a..dcac93bb27507a3c432cb335c40528b19f9b408a 100644 (file)
@@ -1,3 +1,22 @@
+2018-12-21  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/88547
+       * gcc.target/i386/pr88547-1.c: Expect only 2 knotb and 2 knotw
+       insns instead of 4, check for vpminud, vpminuq and no vpsubd or
+       vpsubq.
+       * gcc.target/i386/sse2-pr88547-1.c: New test.
+       * gcc.target/i386/sse2-pr88547-2.c: New test.
+       * gcc.target/i386/sse4_1-pr88547-1.c: New test.
+       * gcc.target/i386/sse4_1-pr88547-2.c: New test.
+       * gcc.target/i386/avx2-pr88547-1.c: New test.
+       * gcc.target/i386/avx2-pr88547-2.c: New test.
+       * gcc.target/i386/avx512f-pr88547-2.c: New test.
+       * gcc.target/i386/avx512vl-pr88547-1.c: New test.
+       * gcc.target/i386/avx512vl-pr88547-2.c: New test.
+       * gcc.target/i386/avx512vl-pr88547-3.c: New test.
+       * gcc.target/i386/avx512f_cond_move.c (y): Change from unsigned int
+       array to int array.
+
 2018-12-20  Marek Polacek  <polacek@redhat.com>
 
        PR c++/88196 - ICE with class non-type template parameter.
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr88547-1.c b/gcc/testsuite/gcc.target/i386/avx2-pr88547-1.c
new file mode 100644 (file)
index 0000000..7da657f
--- /dev/null
@@ -0,0 +1,115 @@
+/* PR target/88547 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -mno-xop -mno-avx512f" } */
+/* { dg-final { scan-assembler-not "vpmingt\[bwd]\[\t ]" } } */
+/* { dg-final { scan-assembler-times "vpminub\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */
+
+typedef signed char v32qi __attribute__((vector_size(32)));
+typedef unsigned char v32uqi __attribute__((vector_size(32)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef unsigned short v16uhi __attribute__((vector_size(32)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef unsigned v8usi __attribute__((vector_size(32)));
+typedef long long v4di __attribute__((vector_size(32)));
+typedef unsigned long long v4udi __attribute__((vector_size(32)));
+
+__attribute__((noipa)) v32qi
+f1 (v32qi x, v32qi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v32uqi
+f2 (v32uqi x, v32uqi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v32qi
+f3 (v32qi x, v32qi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v32uqi
+f4 (v32uqi x, v32uqi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v16hi
+f5 (v16hi x, v16hi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v16uhi
+f6 (v16uhi x, v16uhi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v16hi
+f7 (v16hi x, v16hi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v16uhi
+f8 (v16uhi x, v16uhi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v8si
+f9 (v8si x, v8si y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v8usi
+f10 (v8usi x, v8usi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v8si
+f11 (v8si x, v8si y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v8usi
+f12 (v8usi x, v8usi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v4di
+f13 (v4di x, v4di y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v4udi
+f14 (v4udi x, v4udi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v4di
+f15 (v4di x, v4di y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v4udi
+f16 (v4udi x, v4udi y)
+{
+  return x >= y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr88547-2.c b/gcc/testsuite/gcc.target/i386/avx2-pr88547-2.c
new file mode 100644 (file)
index 0000000..6450ab0
--- /dev/null
@@ -0,0 +1,90 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-options "-O2 -mavx2" } */
+
+#ifndef CHECK
+#define CHECK "avx2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx2_test
+#endif
+
+#include CHECK
+
+#include "avx2-pr88547-1.c"
+
+#define NUM 256
+
+#define TEST_SIGNED(vtype, type, N, fn, op) \
+do                                                             \
+  {                                                            \
+    union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2;  \
+    int i, sign = 1;                                           \
+    type res;                                                  \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       src1.i[i] = i * i * sign;                               \
+       src2.i[i] = (i + 20) * sign;                            \
+       sign = -sign;                                           \
+      }                                                                \
+    for (i = 0; i < NUM; i += N)                               \
+      dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]);                \
+                                                               \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       res = src1.i[i] op src2.i[i] ? -1 : 0;                  \
+       if (res != dst.i[i])                                    \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, op) \
+do                                                             \
+  {                                                            \
+    union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2;  \
+    int i;                                                     \
+    type res;                                                  \
+                                                               \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       src1.i[i] = i * i;                                      \
+       src2.i[i] = i + 20;                                     \
+       if ((i % 4))                                            \
+         src2.i[i] |= (1ULL << (sizeof (type)                  \
+                                * __CHAR_BIT__ - 1));          \
+      }                                                                \
+                                                               \
+    for (i = 0; i < NUM; i += N)                               \
+      dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]);                \
+                                                               \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       res = src1.i[i] op src2.i[i] ? -1 : 0;                  \
+       if (res != dst.i[i])                                    \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+static void
+TEST (void)
+{
+  TEST_SIGNED (v32qi, signed char, 32, f1, <=);
+  TEST_UNSIGNED (v32uqi, unsigned char, 32, f2, <=);
+  TEST_SIGNED (v32qi, signed char, 32, f3, >=);
+  TEST_UNSIGNED (v32uqi, unsigned char, 32, f4, >=);
+  TEST_SIGNED (v16hi, short int, 16, f5, <=);
+  TEST_UNSIGNED (v16uhi, unsigned short int, 16, f6, <=);
+  TEST_SIGNED (v16hi, short int, 16, f7, >=);
+  TEST_UNSIGNED (v16uhi, unsigned short int, 16, f8, >=);
+  TEST_SIGNED (v8si, int, 8, f9, <=);
+  TEST_UNSIGNED (v8usi, unsigned int, 8, f10, <=);
+  TEST_SIGNED (v8si, int, 8, f11, >=);
+  TEST_UNSIGNED (v8usi, unsigned int, 8, f12, >=);
+  TEST_SIGNED (v4di, long long int, 4, f13, <=);
+  TEST_UNSIGNED (v4udi, unsigned long long int, 4, f14, <=);
+  TEST_SIGNED (v4di, long long int, 4, f15, >=);
+  TEST_UNSIGNED (v4udi, unsigned long long int, 4, f16, >=);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88547-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88547-2.c
new file mode 100644 (file)
index 0000000..25be500
--- /dev/null
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include "avx512-check.h"
+
+#include "pr88547-1.c"
+
+#define NUM 512
+
+#define TEST_SIGNED(vtype, type, N, fn, op) \
+do                                                             \
+  {                                                            \
+    union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2;  \
+    int i, sign = 1;                                           \
+    type res;                                                  \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       src1.i[i] = i * i * sign;                               \
+       src2.i[i] = (i + 20) * sign;                            \
+       sign = -sign;                                           \
+      }                                                                \
+    for (i = 0; i < NUM; i += N)                               \
+      dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]);                \
+                                                               \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       res = src1.i[i] op src2.i[i] ? -1 : 0;                  \
+       if (res != dst.i[i])                                    \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, op) \
+do                                                             \
+  {                                                            \
+    union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2;  \
+    int i;                                                     \
+    type res;                                                  \
+                                                               \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       src1.i[i] = i * i;                                      \
+       src2.i[i] = i + 20;                                     \
+       if ((i % 4))                                            \
+         src2.i[i] |= (1ULL << (sizeof (type)                  \
+                                * __CHAR_BIT__ - 1));          \
+      }                                                                \
+                                                               \
+    for (i = 0; i < NUM; i += N)                               \
+      dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]);                \
+                                                               \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       res = src1.i[i] op src2.i[i] ? -1 : 0;                  \
+       if (res != dst.i[i])                                    \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+static void
+test_512 (void)
+{
+  TEST_SIGNED (v64qi, signed char, 64, f1, <=);
+  TEST_UNSIGNED (v64uqi, unsigned char, 64, f2, <=);
+  TEST_SIGNED (v64qi, signed char, 64, f3, >=);
+  TEST_UNSIGNED (v64uqi, unsigned char, 64, f4, >=);
+  TEST_SIGNED (v32hi, short int, 32, f5, <=);
+  TEST_UNSIGNED (v32uhi, unsigned short int, 32, f6, <=);
+  TEST_SIGNED (v32hi, short int, 32, f7, >=);
+  TEST_UNSIGNED (v32uhi, unsigned short int, 32, f8, >=);
+  TEST_SIGNED (v16si, int, 16, f9, <=);
+  TEST_UNSIGNED (v16usi, unsigned int, 16, f10, <=);
+  TEST_SIGNED (v16si, int, 16, f11, >=);
+  TEST_UNSIGNED (v16usi, unsigned int, 16, f12, >=);
+  TEST_SIGNED (v8di, long long int, 8, f13, <=);
+  TEST_UNSIGNED (v8udi, unsigned long long int, 8, f14, <=);
+  TEST_SIGNED (v8di, long long int, 8, f15, >=);
+  TEST_UNSIGNED (v8udi, unsigned long long int, 8, f16, >=);
+}
index 10e470db5ca2d6d89ff91736a9652e841a088710..99a89f51202cc692d4797761fa5d61875beed5f2 100644 (file)
@@ -3,7 +3,7 @@
 /* { dg-final { scan-assembler-times "(?:vpblendmd|vmovdqa32)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 8 } } */
 
 unsigned int x[128];
-unsigned int y[128];
+int y[128];
 
 void
 foo () 
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c
new file mode 100644 (file)
index 0000000..aa64dc2
--- /dev/null
@@ -0,0 +1,14 @@
+/* PR target/88547 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-xop -mavx512vl -mno-avx512bw -mno-avx512dq" } */
+/* { dg-final { scan-assembler-not "vpmingt\[bwdq]\[\t ]" } } */
+/* { dg-final { scan-assembler-times "vpminub\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminsq\[\t ]" 2 } } */
+
+#include "avx2-pr88547-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-2.c
new file mode 100644 (file)
index 0000000..8631915
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512dq } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512dq" } */
+
+#define AVX512VL
+#define AVX512BW
+#define AVX512DQ
+
+#include "avx512f-pr88547-2.c"
+
+static void
+test_256 (void)
+{
+  test_512 ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-3.c
new file mode 100644 (file)
index 0000000..cf6f3a4
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512dq } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512dq" } */
+
+#define AVX512VL
+#define AVX512BW
+#define AVX512DQ
+#define CHECK "avx512-check.h"
+#define TEST test_512
+
+#include "avx2-pr88547-2.c"
+
+static void
+test_256 (void)
+{
+  return test_512 ();
+}
+
+static void
+test_128 (void)
+{
+}
index 1a248854ccd1d7fc39f883d4e38fd1d757923fa5..b6c82157bba6605c2ae2859f328157bebf8f87b2 100644 (file)
@@ -6,10 +6,14 @@
 /* { dg-final { scan-assembler-times "vpmovm2w\[\t  ]" 4 } } */
 /* { dg-final { scan-assembler-times "vpmovm2d\[\t  ]" 4 } } */
 /* { dg-final { scan-assembler-times "vpmovm2q\[\t  ]" 4 } } */
-/* { dg-final { scan-assembler-times "knotb\[\t  ]" 4 } } */
-/* { dg-final { scan-assembler-times "knotw\[\t  ]" 4 } } */
+/* { dg-final { scan-assembler-times "knotb\[\t  ]" 2 } } */
+/* { dg-final { scan-assembler-times "knotw\[\t  ]" 2 } } */
 /* { dg-final { scan-assembler-times "knotd\[\t  ]" 2 } } */
 /* { dg-final { scan-assembler-times "knotq\[\t  ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminud\[\t  ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[\t  ]" 2 } } */
+/* { dg-final { scan-assembler-not "vpsubd\[\t  ]" } } */
+/* { dg-final { scan-assembler-not "vpsubq\[\t  ]" } } */
 
 typedef signed char v64qi __attribute__((vector_size(64)));
 typedef unsigned char v64uqi __attribute__((vector_size(64)));
diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr88547-1.c b/gcc/testsuite/gcc.target/i386/sse2-pr88547-1.c
new file mode 100644 (file)
index 0000000..957b21f
--- /dev/null
@@ -0,0 +1,115 @@
+/* PR target/88547 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse3" } */
+/* { dg-final { scan-assembler-not "pmingtw\[\t ]" } } */
+/* { dg-final { scan-assembler-times "pminub\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "pminsw\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-not "pminsb\[\t ]" } } */
+/* { dg-final { scan-assembler-not "pminuw\[\t ]" } } */
+/* { dg-final { scan-assembler-not "pminud\[\t ]" } } */
+/* { dg-final { scan-assembler-not "pminuq\[\t ]" } } */
+
+typedef signed char v16qi __attribute__((vector_size(16)));
+typedef unsigned char v16uqi __attribute__((vector_size(16)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef unsigned short v8uhi __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef unsigned v4usi __attribute__((vector_size(16)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef unsigned long long v2udi __attribute__((vector_size(16)));
+
+v16qi
+f1 (v16qi x, v16qi y)
+{
+  return x <= y;
+}
+
+v16uqi
+f2 (v16uqi x, v16uqi y)
+{
+  return x <= y;
+}
+
+v16qi
+f3 (v16qi x, v16qi y)
+{
+  return x >= y;
+}
+
+v16uqi
+f4 (v16uqi x, v16uqi y)
+{
+  return x >= y;
+}
+
+v8hi
+f5 (v8hi x, v8hi y)
+{
+  return x <= y;
+}
+
+v8uhi
+f6 (v8uhi x, v8uhi y)
+{
+  return x <= y;
+}
+
+v8hi
+f7 (v8hi x, v8hi y)
+{
+  return x >= y;
+}
+
+v8uhi
+f8 (v8uhi x, v8uhi y)
+{
+  return x >= y;
+}
+
+v4si
+f9 (v4si x, v4si y)
+{
+  return x <= y;
+}
+
+v4usi
+f10 (v4usi x, v4usi y)
+{
+  return x <= y;
+}
+
+v4si
+f11 (v4si x, v4si y)
+{
+  return x >= y;
+}
+
+v4usi
+f12 (v4usi x, v4usi y)
+{
+  return x >= y;
+}
+
+v2di
+f13 (v2di x, v2di y)
+{
+  return x <= y;
+}
+
+v2udi
+f14 (v2udi x, v2udi y)
+{
+  return x <= y;
+}
+
+v2di
+f15 (v2di x, v2di y)
+{
+  return x >= y;
+}
+
+v2udi
+f16 (v2udi x, v2udi y)
+{
+  return x >= y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr88547-2.c b/gcc/testsuite/gcc.target/i386/sse2-pr88547-2.c
new file mode 100644 (file)
index 0000000..efdcee0
--- /dev/null
@@ -0,0 +1,90 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-options "-O2 -msse2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#include "sse2-pr88547-1.c"
+
+#define NUM 256
+
+#define TEST_SIGNED(vtype, type, N, fn, op) \
+do                                                             \
+  {                                                            \
+    union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2;  \
+    int i, sign = 1;                                           \
+    type res;                                                  \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       src1.i[i] = i * i * sign;                               \
+       src2.i[i] = (i + 20) * sign;                            \
+       sign = -sign;                                           \
+      }                                                                \
+    for (i = 0; i < NUM; i += N)                               \
+      dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]);                \
+                                                               \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       res = src1.i[i] op src2.i[i] ? -1 : 0;                  \
+       if (res != dst.i[i])                                    \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, op) \
+do                                                             \
+  {                                                            \
+    union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2;  \
+    int i;                                                     \
+    type res;                                                  \
+                                                               \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       src1.i[i] = i * i;                                      \
+       src2.i[i] = i + 20;                                     \
+       if ((i % 4))                                            \
+         src2.i[i] |= (1ULL << (sizeof (type)                  \
+                                * __CHAR_BIT__ - 1));          \
+      }                                                                \
+                                                               \
+    for (i = 0; i < NUM; i += N)                               \
+      dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]);                \
+                                                               \
+    for (i = 0; i < NUM; i++)                                  \
+      {                                                                \
+       res = src1.i[i] op src2.i[i] ? -1 : 0;                  \
+       if (res != dst.i[i])                                    \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+static void
+TEST (void)
+{
+  TEST_SIGNED (v16qi, signed char, 16, f1, <=);
+  TEST_UNSIGNED (v16uqi, unsigned char, 16, f2, <=);
+  TEST_SIGNED (v16qi, signed char, 16, f3, >=);
+  TEST_UNSIGNED (v16uqi, unsigned char, 16, f4, >=);
+  TEST_SIGNED (v8hi, short int, 8, f5, <=);
+  TEST_UNSIGNED (v8uhi, unsigned short int, 8, f6, <=);
+  TEST_SIGNED (v8hi, short int, 8, f7, >=);
+  TEST_UNSIGNED (v8uhi, unsigned short int, 8, f8, >=);
+  TEST_SIGNED (v4si, int, 4, f9, <=);
+  TEST_UNSIGNED (v4usi, unsigned int, 4, f10, <=);
+  TEST_SIGNED (v4si, int, 4, f11, >=);
+  TEST_UNSIGNED (v4usi, unsigned int, 4, f12, >=);
+  TEST_SIGNED (v2di, long long int, 2, f13, <=);
+  TEST_UNSIGNED (v2udi, unsigned long long int, 2, f14, <=);
+  TEST_SIGNED (v2di, long long int, 2, f15, >=);
+  TEST_UNSIGNED (v2udi, unsigned long long int, 2, f16, >=);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pr88547-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-pr88547-1.c
new file mode 100644 (file)
index 0000000..362b962
--- /dev/null
@@ -0,0 +1,12 @@
+/* PR target/88547 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1 -mno-sse4.2" } */
+/* { dg-final { scan-assembler-not "pmingt\[bwd]\[\t ]" } } */
+/* { dg-final { scan-assembler-times "pminub\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "pminsb\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "pminuw\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "pminsw\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "pminud\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "pminsd\[\t ]" 2 } } */
+
+#include "sse2-pr88547-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pr88547-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-pr88547-2.c
new file mode 100644 (file)
index 0000000..f903155
--- /dev/null
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "sse2-pr88547-2.c"