From 4d4015db89b3e93dac9f8e08ce9127390d475f11 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 8 May 2018 14:16:19 +0200 Subject: [PATCH] re PR target/85572 (faster code for absolute value of __v2di) PR target/85572 * config/i386/i386.c (ix86_expand_sse2_abs): Handle E_V2DImode and E_V4DImode. * config/i386/sse.md (abs2): Use VI_AVX2 iterator instead of VI1248_AVX512VL_AVX512BW. Handle V2DImode and V4DImode if not TARGET_AVX512VL using ix86_expand_sse2_abs. Formatting fixes. * g++.dg/other/sse2-pr85572-1.C: New test. * g++.dg/other/sse2-pr85572-2.C: New test. * g++.dg/other/sse4-pr85572-1.C: New test. * g++.dg/other/avx2-pr85572-1.C: New test. From-SVN: r260041 --- gcc/ChangeLog | 7 ++ gcc/config/i386/i386.c | 73 +++++++++++++++------ gcc/config/i386/sse.md | 10 +-- gcc/testsuite/ChangeLog | 6 ++ gcc/testsuite/g++.dg/other/avx2-pr85572-1.C | 21 ++++++ gcc/testsuite/g++.dg/other/sse2-pr85572-1.C | 14 ++++ gcc/testsuite/g++.dg/other/sse2-pr85572-2.C | 45 +++++++++++++ gcc/testsuite/g++.dg/other/sse4-pr85572-1.C | 14 ++++ 8 files changed, 165 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/g++.dg/other/avx2-pr85572-1.C create mode 100644 gcc/testsuite/g++.dg/other/sse2-pr85572-1.C create mode 100644 gcc/testsuite/g++.dg/other/sse2-pr85572-2.C create mode 100644 gcc/testsuite/g++.dg/other/sse4-pr85572-1.C diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0ff143bf603..8ab847b8f21 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,12 @@ 2018-05-08 Jakub Jelinek + PR target/85572 + * config/i386/i386.c (ix86_expand_sse2_abs): Handle E_V2DImode and + E_V4DImode. + * config/i386/sse.md (abs2): Use VI_AVX2 iterator instead of + VI1248_AVX512VL_AVX512BW. Handle V2DImode and V4DImode if not + TARGET_AVX512VL using ix86_expand_sse2_abs. Formatting fixes. + PR target/85317 * config/i386/i386.c (ix86_fold_builtin): Handle IX86_BUILTIN_{,P}MOVMSK{PS,PD,B}{,128,256}. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d99ff968c90..ab15ac39ae6 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -49837,39 +49837,70 @@ ix86_expand_sse2_abs (rtx target, rtx input) switch (mode) { + case E_V2DImode: + case E_V4DImode: + /* For 64-bit signed integer X, with SSE4.2 use + pxor t0, t0; pcmpgtq X, t0; pxor t0, X; psubq t0, X. + Otherwise handle it similarly to V4SImode, except use 64 as W instead of + 32 and use logical instead of arithmetic right shift (which is + unimplemented) and subtract. */ + if (TARGET_SSE4_2) + { + tmp0 = gen_reg_rtx (mode); + tmp1 = gen_reg_rtx (mode); + emit_move_insn (tmp1, CONST0_RTX (mode)); + if (mode == E_V2DImode) + emit_insn (gen_sse4_2_gtv2di3 (tmp0, tmp1, input)); + else + emit_insn (gen_avx2_gtv4di3 (tmp0, tmp1, input)); + } + else + { + tmp0 = expand_simple_binop (mode, LSHIFTRT, input, + GEN_INT (GET_MODE_UNIT_BITSIZE (mode) + - 1), NULL, 0, OPTAB_DIRECT); + tmp0 = expand_simple_unop (mode, NEG, tmp0, NULL, false); + } + + tmp1 = expand_simple_binop (mode, XOR, tmp0, input, + NULL, 0, OPTAB_DIRECT); + x = expand_simple_binop (mode, MINUS, tmp1, tmp0, + target, 0, OPTAB_DIRECT); + break; + + case E_V4SImode: /* For 32-bit signed integer X, the best way to calculate the absolute value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */ - case E_V4SImode: - tmp0 = expand_simple_binop (mode, ASHIFTRT, input, - GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1), - NULL, 0, OPTAB_DIRECT); - tmp1 = expand_simple_binop (mode, XOR, tmp0, input, - NULL, 0, OPTAB_DIRECT); - x = expand_simple_binop (mode, MINUS, tmp1, tmp0, - target, 0, OPTAB_DIRECT); - break; + tmp0 = expand_simple_binop (mode, ASHIFTRT, input, + GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1), + NULL, 0, OPTAB_DIRECT); + tmp1 = expand_simple_binop (mode, XOR, tmp0, input, + NULL, 0, OPTAB_DIRECT); + x = expand_simple_binop (mode, MINUS, tmp1, tmp0, + target, 0, OPTAB_DIRECT); + break; + case E_V8HImode: /* For 16-bit signed integer X, the best way to calculate the absolute value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */ - case E_V8HImode: - tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); + tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); - x = expand_simple_binop (mode, SMAX, tmp0, input, - target, 0, OPTAB_DIRECT); - break; + x = expand_simple_binop (mode, SMAX, tmp0, input, + target, 0, OPTAB_DIRECT); + break; + case E_V16QImode: /* For 8-bit signed integer X, the best way to calculate the absolute value of X is min ((unsigned char) X, (unsigned char) (-X)), as SSE2 provides the PMINUB insn. */ - case E_V16QImode: - tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); + tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); - x = expand_simple_binop (V16QImode, UMIN, tmp0, input, - target, 0, OPTAB_DIRECT); - break; + x = expand_simple_binop (V16QImode, UMIN, tmp0, input, + target, 0, OPTAB_DIRECT); + break; - default: - gcc_unreachable (); + default: + gcc_unreachable (); } if (x != target) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index aab4261343e..ae6294e559c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15211,12 +15211,14 @@ (set_attr "mode" "")]) (define_expand "abs2" - [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand") - (abs:VI1248_AVX512VL_AVX512BW - (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))] + [(set (match_operand:VI_AVX2 0 "register_operand") + (abs:VI_AVX2 + (match_operand:VI_AVX2 1 "vector_operand")))] "TARGET_SSE2" { - if (!TARGET_SSSE3) + if (!TARGET_SSSE3 + || ((mode == V2DImode || mode == V4DImode) + && !TARGET_AVX512VL)) { ix86_expand_sse2_abs (operands[0], operands[1]); DONE; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c6d35858e4c..96a373ca943 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,11 @@ 2018-05-08 Jakub Jelinek + PR target/85572 + * g++.dg/other/sse2-pr85572-1.C: New test. + * g++.dg/other/sse2-pr85572-2.C: New test. + * g++.dg/other/sse4-pr85572-1.C: New test. + * g++.dg/other/avx2-pr85572-1.C: New test. + PR target/85317 * gcc.target/i386/pr85317.c: New test. * gcc.target/i386/avx2-vpmovmskb-2.c (avx2_test): Add asm volatile diff --git a/gcc/testsuite/g++.dg/other/avx2-pr85572-1.C b/gcc/testsuite/g++.dg/other/avx2-pr85572-1.C new file mode 100644 index 00000000000..adde2eaadda --- /dev/null +++ b/gcc/testsuite/g++.dg/other/avx2-pr85572-1.C @@ -0,0 +1,21 @@ +// PR target/85572 +// { dg-do compile { target i?86-*-* x86_64-*-* } } +// { dg-options "-O2 -mavx2 -mno-avx512f" } +// { dg-final { scan-assembler-times {\mvpxor\M} 4 } } +// { dg-final { scan-assembler-times {\mvpcmpgtq\M} 2 } } +// { dg-final { scan-assembler-times {\mvpsubq\M} 2 } } + +typedef long long V __attribute__((vector_size (16))); +typedef long long W __attribute__((vector_size (32))); + +V +foo (V x) +{ + return x < 0 ? -x : x; +} + +W +bar (W x) +{ + return x < 0 ? -x : x; +} diff --git a/gcc/testsuite/g++.dg/other/sse2-pr85572-1.C b/gcc/testsuite/g++.dg/other/sse2-pr85572-1.C new file mode 100644 index 00000000000..e4c44239424 --- /dev/null +++ b/gcc/testsuite/g++.dg/other/sse2-pr85572-1.C @@ -0,0 +1,14 @@ +// PR target/85572 +// { dg-do compile { target i?86-*-* x86_64-*-* } } +// { dg-options "-O2 -msse2 -mno-sse3" } +// { dg-final { scan-assembler-times {\mpxor\M} 2 } } +// { dg-final { scan-assembler-times {\mpsubq\M} 2 } } +// { dg-final { scan-assembler-times {\mpsrlq\M} 1 } } + +typedef long long V __attribute__((vector_size (16))); + +V +foo (V x) +{ + return x < 0 ? -x : x; +} diff --git a/gcc/testsuite/g++.dg/other/sse2-pr85572-2.C b/gcc/testsuite/g++.dg/other/sse2-pr85572-2.C new file mode 100644 index 00000000000..8eb399b36b4 --- /dev/null +++ b/gcc/testsuite/g++.dg/other/sse2-pr85572-2.C @@ -0,0 +1,45 @@ +// PR target/85572 +// { dg-do run { target i?86-*-* x86_64-*-* } } +// { dg-options "-O2 -msse2" } +// { dg-require-effective-target sse2_runtime } + +typedef long long V __attribute__((vector_size (16))); +typedef long long W __attribute__((vector_size (32))); + +__attribute__((noipa)) V +foo (V x) +{ + return x < 0 ? -x : x; +} + +__attribute__((noipa)) void +bar (W *x, W *y) +{ + *y = *x < 0 ? -*x : *x; +} + +int +main () +{ + V a = { 11LL, -15LL }; + V b = foo (a); + if (b[0] != 11LL || b[1] != 15LL) + __builtin_abort (); + V c = { -123456789123456LL, 654321654321654LL }; + V d = foo (c); + if (d[0] != 123456789123456LL || d[1] != 654321654321654LL) + __builtin_abort (); + V e = { 0, 1 }; + V f = foo (e); + if (f[0] != 0 || f[1] != 1) + __builtin_abort (); + W g = { 17LL, -32LL, -123456789123456LL, 654321654321654LL }, h; + bar (&g, &h); + if (h[0] != 17LL || h[1] != 32LL + || h[2] != 123456789123456LL || h[3] != 654321654321654LL) + __builtin_abort (); + W i = { 0, 1, -1, 0 }, j; + bar (&i, &j); + if (j[0] != 0 || j[1] != 1 || j[2] != 1 || j[3] != 0) + __builtin_abort (); +} diff --git a/gcc/testsuite/g++.dg/other/sse4-pr85572-1.C b/gcc/testsuite/g++.dg/other/sse4-pr85572-1.C new file mode 100644 index 00000000000..fe0adaeae29 --- /dev/null +++ b/gcc/testsuite/g++.dg/other/sse4-pr85572-1.C @@ -0,0 +1,14 @@ +// PR target/85572 +// { dg-do compile { target i?86-*-* x86_64-*-* } } +// { dg-options "-O2 -msse4 -mno-avx" } +// { dg-final { scan-assembler-times {\mpxor\M} 2 } } +// { dg-final { scan-assembler-times {\mpcmpgtq\M} 1 } } +// { dg-final { scan-assembler-times {\mpsubq\M} 1 } } + +typedef long long V __attribute__((vector_size (16))); + +V +foo (V x) +{ + return x < 0 ? -x : x; +} -- 2.30.2