From d6b612a76f77e7c9ae7712df9312bbcf4a92f048 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 21 Dec 2018 11:37:11 +0100 Subject: [PATCH] re PR target/88547 (missed optimization for vector comparisons) PR target/88547 * config/i386/i386.c (ix86_expand_int_sse_cmp): Optimize x > y ? 0 : -1 into min (x, y) == x ? -1 : 0. * gcc.target/i386/pr88547-1.c: Expect only 2 knotb and 2 knotw insns instead of 4, check for vpminud, vpminuq and no vpsubd or vpsubq. * gcc.target/i386/sse2-pr88547-1.c: New test. * gcc.target/i386/sse2-pr88547-2.c: New test. * gcc.target/i386/sse4_1-pr88547-1.c: New test. * gcc.target/i386/sse4_1-pr88547-2.c: New test. * gcc.target/i386/avx2-pr88547-1.c: New test. * gcc.target/i386/avx2-pr88547-2.c: New test. * gcc.target/i386/avx512f-pr88547-2.c: New test. * gcc.target/i386/avx512vl-pr88547-1.c: New test. * gcc.target/i386/avx512vl-pr88547-2.c: New test. * gcc.target/i386/avx512vl-pr88547-3.c: New test. * gcc.target/i386/avx512f_cond_move.c (y): Change from unsigned int array to int array. From-SVN: r267322 --- gcc/ChangeLog | 6 + gcc/config/i386/i386.c | 98 +++++++++++++++ gcc/testsuite/ChangeLog | 19 +++ .../gcc.target/i386/avx2-pr88547-1.c | 115 ++++++++++++++++++ .../gcc.target/i386/avx2-pr88547-2.c | 90 ++++++++++++++ .../gcc.target/i386/avx512f-pr88547-2.c | 82 +++++++++++++ .../gcc.target/i386/avx512f_cond_move.c | 2 +- .../gcc.target/i386/avx512vl-pr88547-1.c | 14 +++ .../gcc.target/i386/avx512vl-pr88547-2.c | 22 ++++ .../gcc.target/i386/avx512vl-pr88547-3.c | 24 ++++ gcc/testsuite/gcc.target/i386/pr88547-1.c | 8 +- .../gcc.target/i386/sse2-pr88547-1.c | 115 ++++++++++++++++++ .../gcc.target/i386/sse2-pr88547-2.c | 90 ++++++++++++++ .../gcc.target/i386/sse4_1-pr88547-1.c | 12 ++ .../gcc.target/i386/sse4_1-pr88547-2.c | 8 ++ 15 files changed, 702 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr88547-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr88547-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr88547-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88547-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88547-3.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-pr88547-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-pr88547-2.c create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-pr88547-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-pr88547-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c6641033e5e..7f09e0066fb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2018-12-21 Jakub Jelinek + + PR target/88547 + * config/i386/i386.c (ix86_expand_int_sse_cmp): Optimize + x > y ? 0 : -1 into min (x, y) == x ? -1 : 0. + 2018-12-21 Jan Beulich * config/i386/sse.md (vaesdec_, vaesdeclast_): Allow diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 0ea3bb3a4bc..02e24bc5796 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -24126,6 +24126,104 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, } } + rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode); + rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode); + if (*negate) + std::swap (optrue, opfalse); + + /* Transform x > y ? 0 : -1 (i.e. x <= y ? -1 : 0 or x <= y) when + not using integer masks into min (x, y) == x ? -1 : 0 (i.e. + min (x, y) == x). While we add one instruction (the minimum), + we remove the need for two instructions in the negation, as the + result is done this way. + When using masks, do it for SI/DImode element types, as it is shorter + than the two subtractions. */ + if ((code != EQ + && GET_MODE_SIZE (mode) != 64 + && vector_all_ones_operand (opfalse, data_mode) + && optrue == CONST0_RTX (data_mode)) + || (code == GTU + && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4 + /* Don't do it if not using integer masks and we'd end up with + the right values in the registers though. */ + && (GET_MODE_SIZE (mode) == 64 + || !vector_all_ones_operand (optrue, data_mode) + || opfalse != CONST0_RTX (data_mode)))) + { + rtx (*gen) (rtx, rtx, rtx) = NULL; + + switch (mode) + { + case E_V16SImode: + gen = (code == GTU) ? gen_uminv16si3 : gen_sminv16si3; + break; + case E_V8DImode: + gen = (code == GTU) ? gen_uminv8di3 : gen_sminv8di3; + cop0 = force_reg (mode, cop0); + cop1 = force_reg (mode, cop1); + break; + case E_V32QImode: + if (TARGET_AVX2) + gen = (code == GTU) ? gen_uminv32qi3 : gen_sminv32qi3; + break; + case E_V16HImode: + if (TARGET_AVX2) + gen = (code == GTU) ? gen_uminv16hi3 : gen_sminv16hi3; + break; + case E_V8SImode: + if (TARGET_AVX2) + gen = (code == GTU) ? gen_uminv8si3 : gen_sminv8si3; + break; + case E_V4DImode: + if (TARGET_AVX512VL) + { + gen = (code == GTU) ? gen_uminv4di3 : gen_sminv4di3; + cop0 = force_reg (mode, cop0); + cop1 = force_reg (mode, cop1); + } + break; + case E_V16QImode: + if (code == GTU && TARGET_SSE2) + gen = gen_uminv16qi3; + else if (code == GT && TARGET_SSE4_1) + gen = gen_sminv16qi3; + break; + case E_V8HImode: + if (code == GTU && TARGET_SSE4_1) + gen = gen_uminv8hi3; + else if (code == GT && TARGET_SSE2) + gen = gen_sminv8hi3; + break; + case E_V4SImode: + if (TARGET_SSE4_1) + gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3; + break; + case E_V2DImode: + if (TARGET_AVX512VL) + { + gen = (code == GTU) ? gen_uminv2di3 : gen_sminv2di3; + cop0 = force_reg (mode, cop0); + cop1 = force_reg (mode, cop1); + } + break; + default: + break; + } + + if (gen) + { + rtx tem = gen_reg_rtx (mode); + if (!vector_operand (cop0, mode)) + cop0 = force_reg (mode, cop0); + if (!vector_operand (cop1, mode)) + cop1 = force_reg (mode, cop1); + *negate = !*negate; + emit_insn (gen (tem, cop0, cop1)); + cop1 = tem; + code = EQ; + } + } + /* Unsigned parallel compare is not supported by the hardware. Play some tricks to turn this into a signed comparison against 0. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d814641a966..dcac93bb275 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,22 @@ +2018-12-21 Jakub Jelinek + + PR target/88547 + * gcc.target/i386/pr88547-1.c: Expect only 2 knotb and 2 knotw + insns instead of 4, check for vpminud, vpminuq and no vpsubd or + vpsubq. + * gcc.target/i386/sse2-pr88547-1.c: New test. + * gcc.target/i386/sse2-pr88547-2.c: New test. + * gcc.target/i386/sse4_1-pr88547-1.c: New test. + * gcc.target/i386/sse4_1-pr88547-2.c: New test. + * gcc.target/i386/avx2-pr88547-1.c: New test. + * gcc.target/i386/avx2-pr88547-2.c: New test. + * gcc.target/i386/avx512f-pr88547-2.c: New test. + * gcc.target/i386/avx512vl-pr88547-1.c: New test. + * gcc.target/i386/avx512vl-pr88547-2.c: New test. + * gcc.target/i386/avx512vl-pr88547-3.c: New test. + * gcc.target/i386/avx512f_cond_move.c (y): Change from unsigned int + array to int array. + 2018-12-20 Marek Polacek PR c++/88196 - ICE with class non-type template parameter. diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr88547-1.c b/gcc/testsuite/gcc.target/i386/avx2-pr88547-1.c new file mode 100644 index 00000000000..7da657f7848 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr88547-1.c @@ -0,0 +1,115 @@ +/* PR target/88547 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2 -mno-xop -mno-avx512f" } */ +/* { dg-final { scan-assembler-not "vpmingt\[bwd]\[\t ]" } } */ +/* { dg-final { scan-assembler-times "vpminub\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */ + +typedef signed char v32qi __attribute__((vector_size(32))); +typedef unsigned char v32uqi __attribute__((vector_size(32))); +typedef short v16hi __attribute__((vector_size(32))); +typedef unsigned short v16uhi __attribute__((vector_size(32))); +typedef int v8si __attribute__((vector_size(32))); +typedef unsigned v8usi __attribute__((vector_size(32))); +typedef long long v4di __attribute__((vector_size(32))); +typedef unsigned long long v4udi __attribute__((vector_size(32))); + +__attribute__((noipa)) v32qi +f1 (v32qi x, v32qi y) +{ + return x <= y; +} + +__attribute__((noipa)) v32uqi +f2 (v32uqi x, v32uqi y) +{ + return x <= y; +} + +__attribute__((noipa)) v32qi +f3 (v32qi x, v32qi y) +{ + return x >= y; +} + +__attribute__((noipa)) v32uqi +f4 (v32uqi x, v32uqi y) +{ + return x >= y; +} + +__attribute__((noipa)) v16hi +f5 (v16hi x, v16hi y) +{ + return x <= y; +} + +__attribute__((noipa)) v16uhi +f6 (v16uhi x, v16uhi y) +{ + return x <= y; +} + +__attribute__((noipa)) v16hi +f7 (v16hi x, v16hi y) +{ + return x >= y; +} + +__attribute__((noipa)) v16uhi +f8 (v16uhi x, v16uhi y) +{ + return x >= y; +} + +__attribute__((noipa)) v8si +f9 (v8si x, v8si y) +{ + return x <= y; +} + +__attribute__((noipa)) v8usi +f10 (v8usi x, v8usi y) +{ + return x <= y; +} + +__attribute__((noipa)) v8si +f11 (v8si x, v8si y) +{ + return x >= y; +} + +__attribute__((noipa)) v8usi +f12 (v8usi x, v8usi y) +{ + return x >= y; +} + +__attribute__((noipa)) v4di +f13 (v4di x, v4di y) +{ + return x <= y; +} + +__attribute__((noipa)) v4udi +f14 (v4udi x, v4udi y) +{ + return x <= y; +} + +__attribute__((noipa)) v4di +f15 (v4di x, v4di y) +{ + return x >= y; +} + +__attribute__((noipa)) v4udi +f16 (v4udi x, v4udi y) +{ + return x >= y; +} diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr88547-2.c b/gcc/testsuite/gcc.target/i386/avx2-pr88547-2.c new file mode 100644 index 00000000000..6450ab02e23 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr88547-2.c @@ -0,0 +1,90 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-options "-O2 -mavx2" } */ + +#ifndef CHECK +#define CHECK "avx2-check.h" +#endif + +#ifndef TEST +#define TEST avx2_test +#endif + +#include CHECK + +#include "avx2-pr88547-1.c" + +#define NUM 256 + +#define TEST_SIGNED(vtype, type, N, fn, op) \ +do \ + { \ + union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ + int i, sign = 1; \ + type res; \ + for (i = 0; i < NUM; i++) \ + { \ + src1.i[i] = i * i * sign; \ + src2.i[i] = (i + 20) * sign; \ + sign = -sign; \ + } \ + for (i = 0; i < NUM; i += N) \ + dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ + \ + for (i = 0; i < NUM; i++) \ + { \ + res = src1.i[i] op src2.i[i] ? -1 : 0; \ + if (res != dst.i[i]) \ + abort (); \ + } \ + } \ +while (0) + +#define TEST_UNSIGNED(vtype, type, N, fn, op) \ +do \ + { \ + union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ + int i; \ + type res; \ + \ + for (i = 0; i < NUM; i++) \ + { \ + src1.i[i] = i * i; \ + src2.i[i] = i + 20; \ + if ((i % 4)) \ + src2.i[i] |= (1ULL << (sizeof (type) \ + * __CHAR_BIT__ - 1)); \ + } \ + \ + for (i = 0; i < NUM; i += N) \ + dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ + \ + for (i = 0; i < NUM; i++) \ + { \ + res = src1.i[i] op src2.i[i] ? -1 : 0; \ + if (res != dst.i[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +TEST (void) +{ + TEST_SIGNED (v32qi, signed char, 32, f1, <=); + TEST_UNSIGNED (v32uqi, unsigned char, 32, f2, <=); + TEST_SIGNED (v32qi, signed char, 32, f3, >=); + TEST_UNSIGNED (v32uqi, unsigned char, 32, f4, >=); + TEST_SIGNED (v16hi, short int, 16, f5, <=); + TEST_UNSIGNED (v16uhi, unsigned short int, 16, f6, <=); + TEST_SIGNED (v16hi, short int, 16, f7, >=); + TEST_UNSIGNED (v16uhi, unsigned short int, 16, f8, >=); + TEST_SIGNED (v8si, int, 8, f9, <=); + TEST_UNSIGNED (v8usi, unsigned int, 8, f10, <=); + TEST_SIGNED (v8si, int, 8, f11, >=); + TEST_UNSIGNED (v8usi, unsigned int, 8, f12, >=); + TEST_SIGNED (v4di, long long int, 4, f13, <=); + TEST_UNSIGNED (v4udi, unsigned long long int, 4, f14, <=); + TEST_SIGNED (v4di, long long int, 4, f15, >=); + TEST_UNSIGNED (v4udi, unsigned long long int, 4, f16, >=); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88547-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88547-2.c new file mode 100644 index 00000000000..25be5004f35 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88547-2.c @@ -0,0 +1,82 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512f" } */ + +#include "avx512-check.h" + +#include "pr88547-1.c" + +#define NUM 512 + +#define TEST_SIGNED(vtype, type, N, fn, op) \ +do \ + { \ + union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ + int i, sign = 1; \ + type res; \ + for (i = 0; i < NUM; i++) \ + { \ + src1.i[i] = i * i * sign; \ + src2.i[i] = (i + 20) * sign; \ + sign = -sign; \ + } \ + for (i = 0; i < NUM; i += N) \ + dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ + \ + for (i = 0; i < NUM; i++) \ + { \ + res = src1.i[i] op src2.i[i] ? -1 : 0; \ + if (res != dst.i[i]) \ + abort (); \ + } \ + } \ +while (0) + +#define TEST_UNSIGNED(vtype, type, N, fn, op) \ +do \ + { \ + union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ + int i; \ + type res; \ + \ + for (i = 0; i < NUM; i++) \ + { \ + src1.i[i] = i * i; \ + src2.i[i] = i + 20; \ + if ((i % 4)) \ + src2.i[i] |= (1ULL << (sizeof (type) \ + * __CHAR_BIT__ - 1)); \ + } \ + \ + for (i = 0; i < NUM; i += N) \ + dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ + \ + for (i = 0; i < NUM; i++) \ + { \ + res = src1.i[i] op src2.i[i] ? -1 : 0; \ + if (res != dst.i[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +test_512 (void) +{ + TEST_SIGNED (v64qi, signed char, 64, f1, <=); + TEST_UNSIGNED (v64uqi, unsigned char, 64, f2, <=); + TEST_SIGNED (v64qi, signed char, 64, f3, >=); + TEST_UNSIGNED (v64uqi, unsigned char, 64, f4, >=); + TEST_SIGNED (v32hi, short int, 32, f5, <=); + TEST_UNSIGNED (v32uhi, unsigned short int, 32, f6, <=); + TEST_SIGNED (v32hi, short int, 32, f7, >=); + TEST_UNSIGNED (v32uhi, unsigned short int, 32, f8, >=); + TEST_SIGNED (v16si, int, 16, f9, <=); + TEST_UNSIGNED (v16usi, unsigned int, 16, f10, <=); + TEST_SIGNED (v16si, int, 16, f11, >=); + TEST_UNSIGNED (v16usi, unsigned int, 16, f12, >=); + TEST_SIGNED (v8di, long long int, 8, f13, <=); + TEST_UNSIGNED (v8udi, unsigned long long int, 8, f14, <=); + TEST_SIGNED (v8di, long long int, 8, f15, >=); + TEST_UNSIGNED (v8udi, unsigned long long int, 8, f16, >=); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f_cond_move.c b/gcc/testsuite/gcc.target/i386/avx512f_cond_move.c index 10e470db5ca..99a89f51202 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f_cond_move.c +++ b/gcc/testsuite/gcc.target/i386/avx512f_cond_move.c @@ -3,7 +3,7 @@ /* { dg-final { scan-assembler-times "(?:vpblendmd|vmovdqa32)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 8 } } */ unsigned int x[128]; -unsigned int y[128]; +int y[128]; void foo () diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c new file mode 100644 index 00000000000..aa64dc2f06e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c @@ -0,0 +1,14 @@ +/* PR target/88547 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-xop -mavx512vl -mno-avx512bw -mno-avx512dq" } */ +/* { dg-final { scan-assembler-not "vpmingt\[bwdq]\[\t ]" } } */ +/* { dg-final { scan-assembler-times "vpminub\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminsq\[\t ]" 2 } } */ + +#include "avx2-pr88547-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-2.c new file mode 100644 index 00000000000..86319152b9e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-2.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512dq } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512dq" } */ + +#define AVX512VL +#define AVX512BW +#define AVX512DQ + +#include "avx512f-pr88547-2.c" + +static void +test_256 (void) +{ + test_512 (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-3.c new file mode 100644 index 00000000000..cf6f3a49533 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-3.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512dq } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512dq" } */ + +#define AVX512VL +#define AVX512BW +#define AVX512DQ +#define CHECK "avx512-check.h" +#define TEST test_512 + +#include "avx2-pr88547-2.c" + +static void +test_256 (void) +{ + return test_512 (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/pr88547-1.c b/gcc/testsuite/gcc.target/i386/pr88547-1.c index 1a248854ccd..b6c82157bba 100644 --- a/gcc/testsuite/gcc.target/i386/pr88547-1.c +++ b/gcc/testsuite/gcc.target/i386/pr88547-1.c @@ -6,10 +6,14 @@ /* { dg-final { scan-assembler-times "vpmovm2w\[\t ]" 4 } } */ /* { dg-final { scan-assembler-times "vpmovm2d\[\t ]" 4 } } */ /* { dg-final { scan-assembler-times "vpmovm2q\[\t ]" 4 } } */ -/* { dg-final { scan-assembler-times "knotb\[\t ]" 4 } } */ -/* { dg-final { scan-assembler-times "knotw\[\t ]" 4 } } */ +/* { dg-final { scan-assembler-times "knotb\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "knotw\[\t ]" 2 } } */ /* { dg-final { scan-assembler-times "knotd\[\t ]" 2 } } */ /* { dg-final { scan-assembler-times "knotq\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-not "vpsubd\[\t ]" } } */ +/* { dg-final { scan-assembler-not "vpsubq\[\t ]" } } */ typedef signed char v64qi __attribute__((vector_size(64))); typedef unsigned char v64uqi __attribute__((vector_size(64))); diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr88547-1.c b/gcc/testsuite/gcc.target/i386/sse2-pr88547-1.c new file mode 100644 index 00000000000..957b21f2fd3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-pr88547-1.c @@ -0,0 +1,115 @@ +/* PR target/88547 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-sse3" } */ +/* { dg-final { scan-assembler-not "pmingtw\[\t ]" } } */ +/* { dg-final { scan-assembler-times "pminub\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "pminsw\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-not "pminsb\[\t ]" } } */ +/* { dg-final { scan-assembler-not "pminuw\[\t ]" } } */ +/* { dg-final { scan-assembler-not "pminud\[\t ]" } } */ +/* { dg-final { scan-assembler-not "pminuq\[\t ]" } } */ + +typedef signed char v16qi __attribute__((vector_size(16))); +typedef unsigned char v16uqi __attribute__((vector_size(16))); +typedef short v8hi __attribute__((vector_size(16))); +typedef unsigned short v8uhi __attribute__((vector_size(16))); +typedef int v4si __attribute__((vector_size(16))); +typedef unsigned v4usi __attribute__((vector_size(16))); +typedef long long v2di __attribute__((vector_size(16))); +typedef unsigned long long v2udi __attribute__((vector_size(16))); + +v16qi +f1 (v16qi x, v16qi y) +{ + return x <= y; +} + +v16uqi +f2 (v16uqi x, v16uqi y) +{ + return x <= y; +} + +v16qi +f3 (v16qi x, v16qi y) +{ + return x >= y; +} + +v16uqi +f4 (v16uqi x, v16uqi y) +{ + return x >= y; +} + +v8hi +f5 (v8hi x, v8hi y) +{ + return x <= y; +} + +v8uhi +f6 (v8uhi x, v8uhi y) +{ + return x <= y; +} + +v8hi +f7 (v8hi x, v8hi y) +{ + return x >= y; +} + +v8uhi +f8 (v8uhi x, v8uhi y) +{ + return x >= y; +} + +v4si +f9 (v4si x, v4si y) +{ + return x <= y; +} + +v4usi +f10 (v4usi x, v4usi y) +{ + return x <= y; +} + +v4si +f11 (v4si x, v4si y) +{ + return x >= y; +} + +v4usi +f12 (v4usi x, v4usi y) +{ + return x >= y; +} + +v2di +f13 (v2di x, v2di y) +{ + return x <= y; +} + +v2udi +f14 (v2udi x, v2udi y) +{ + return x <= y; +} + +v2di +f15 (v2di x, v2di y) +{ + return x >= y; +} + +v2udi +f16 (v2udi x, v2udi y) +{ + return x >= y; +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr88547-2.c b/gcc/testsuite/gcc.target/i386/sse2-pr88547-2.c new file mode 100644 index 00000000000..efdcee0df01 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-pr88547-2.c @@ -0,0 +1,90 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-options "-O2 -msse2" } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#ifndef TEST +#define TEST sse2_test +#endif + +#include CHECK_H + +#include "sse2-pr88547-1.c" + +#define NUM 256 + +#define TEST_SIGNED(vtype, type, N, fn, op) \ +do \ + { \ + union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ + int i, sign = 1; \ + type res; \ + for (i = 0; i < NUM; i++) \ + { \ + src1.i[i] = i * i * sign; \ + src2.i[i] = (i + 20) * sign; \ + sign = -sign; \ + } \ + for (i = 0; i < NUM; i += N) \ + dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ + \ + for (i = 0; i < NUM; i++) \ + { \ + res = src1.i[i] op src2.i[i] ? -1 : 0; \ + if (res != dst.i[i]) \ + abort (); \ + } \ + } \ +while (0) + +#define TEST_UNSIGNED(vtype, type, N, fn, op) \ +do \ + { \ + union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ + int i; \ + type res; \ + \ + for (i = 0; i < NUM; i++) \ + { \ + src1.i[i] = i * i; \ + src2.i[i] = i + 20; \ + if ((i % 4)) \ + src2.i[i] |= (1ULL << (sizeof (type) \ + * __CHAR_BIT__ - 1)); \ + } \ + \ + for (i = 0; i < NUM; i += N) \ + dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ + \ + for (i = 0; i < NUM; i++) \ + { \ + res = src1.i[i] op src2.i[i] ? -1 : 0; \ + if (res != dst.i[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +TEST (void) +{ + TEST_SIGNED (v16qi, signed char, 16, f1, <=); + TEST_UNSIGNED (v16uqi, unsigned char, 16, f2, <=); + TEST_SIGNED (v16qi, signed char, 16, f3, >=); + TEST_UNSIGNED (v16uqi, unsigned char, 16, f4, >=); + TEST_SIGNED (v8hi, short int, 8, f5, <=); + TEST_UNSIGNED (v8uhi, unsigned short int, 8, f6, <=); + TEST_SIGNED (v8hi, short int, 8, f7, >=); + TEST_UNSIGNED (v8uhi, unsigned short int, 8, f8, >=); + TEST_SIGNED (v4si, int, 4, f9, <=); + TEST_UNSIGNED (v4usi, unsigned int, 4, f10, <=); + TEST_SIGNED (v4si, int, 4, f11, >=); + TEST_UNSIGNED (v4usi, unsigned int, 4, f12, >=); + TEST_SIGNED (v2di, long long int, 2, f13, <=); + TEST_UNSIGNED (v2udi, unsigned long long int, 2, f14, <=); + TEST_SIGNED (v2di, long long int, 2, f15, >=); + TEST_UNSIGNED (v2udi, unsigned long long int, 2, f16, >=); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pr88547-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-pr88547-1.c new file mode 100644 index 00000000000..362b962ee67 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pr88547-1.c @@ -0,0 +1,12 @@ +/* PR target/88547 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1 -mno-sse4.2" } */ +/* { dg-final { scan-assembler-not "pmingt\[bwd]\[\t ]" } } */ +/* { dg-final { scan-assembler-times "pminub\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "pminsb\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "pminuw\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "pminsw\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "pminud\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "pminsd\[\t ]" 2 } } */ + +#include "sse2-pr88547-1.c" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pr88547-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-pr88547-2.c new file mode 100644 index 00000000000..f903155113f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pr88547-2.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#define CHECK_H "sse4_1-check.h" +#define TEST sse4_1_test + +#include "sse2-pr88547-2.c" -- 2.30.2