From 70310982492071f98eacdac0747521769b0f0328 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 30 Nov 2020 13:27:16 +0800 Subject: [PATCH] Optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnleuw [PR96906] For signed comparisons, it handles cases that are eq or neq to 0. For unsigned comparisons, it additionaly handles cases that are le or gt to 0(equivilent to eq or neq to 0). Transform case eq to leu, case neq to gtu. .i.e. for -mavx512bw -mavx512vl transform eq case code from vpsubusw %xmm1, %xmm0, %xmm0 vpxor %xmm1, %xmm1, %xmm1 vpcmpeqw %xmm1, %xmm0, %k0 to vpcmpleuw %xmm1, %xmm0, %k0 .i.e. for -mavx512bw -mavx512vl transform neq case code from vpsubusw %xmm1, %xmm0, %xmm0 vpxor %xmm1, %xmm1, %xmm1 vpcmpneqw %xmm1, %xmm0, %k0 to vpcmpnleuw %xmm1, %xmm0, %k0 gcc/ChangeLog PR target/96906 * config/i386/sse.md (_ucmp3): Add a new define_split after this insn. gcc/testsuite/ChangeLog * gcc.target/i386/avx512bw-pr96906-1.c: New test. * gcc.target/i386/pr96906-1.c: Add -mno-avx512f. --- gcc/config/i386/sse.md | 38 +++++++++++ .../gcc.target/i386/avx512bw-pr96906-1.c | 68 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr96906-1.c | 2 +- 3 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 78f73676e88..94bb4457e39 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3098,6 +3098,44 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_int_iterator UNSPEC_PCMP_ITER + [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP]) + +(define_int_attr pcmp_signed_mask + [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")]) + +;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw. +;; For signed comparison, handle EQ 0: NEQ 4, +;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ. + +(define_split + [(set (match_operand: 0 "register_operand") + (unspec: + [(us_minus:VI12_AVX512VL + (match_operand:VI12_AVX512VL 1 "vector_operand") + (match_operand:VI12_AVX512VL 2 "vector_operand")) + (match_operand:VI12_AVX512VL 3 "const0_operand") + (match_operand:SI 4 "const_0_to_7_operand")] + UNSPEC_PCMP_ITER))] + "TARGET_AVX512BW + && ix86_binary_operator_ok (US_MINUS, mode, operands) + && (INTVAL (operands[4]) & ) == 0" + [(const_int 0)] + { + /* LE: 2, NLT: 5, NLE: 6, LT: 1 */ + int cmp_predicate = 2; /* LE */ + if (MEM_P (operands[1])) + { + std::swap (operands[1], operands[2]); + cmp_predicate = 5; /* NLT (GE) */ + } + if ((INTVAL (operands[4]) & 4) != 0) + cmp_predicate ^= 4; /* Invert the comparison to NLE (GT) or LT. */ + emit_insn (gen__ucmp3 (operands[0], operands[1],operands[2], + GEN_INT (cmp_predicate))); + DONE; + }) + (define_insn "avx512f_vmcmp3" [(set (match_operand: 0 "register_operand" "=k") (and: diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c new file mode 100644 index 00000000000..81d7e06b972 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c @@ -0,0 +1,68 @@ +/* PR target/96906 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl -masm=att" } */ +/* { dg-final { scan-assembler-times {(?n)vpcmpub[ \t]*\$2} 9 } } */ +/* { dg-final { scan-assembler-times {(?n)vpcmpub[ \t]*\$6} 9 } } */ +/* { dg-final { scan-assembler-times {(?n)vpcmpuw[ \t]*\$2} 9 } } */ +/* { dg-final { scan-assembler-times {(?n)vpcmpuw[ \t]*\$6} 9 } } */ + + +#include + +#define FOO(LENGTH,SUFFIX,TYPE,UTYPE,RTYPE,PRED) \ + __mmask##RTYPE \ + foo_##LENGTH##_##TYPE##_##PRED (__m##LENGTH##i x, __m##LENGTH##i y) \ + { \ + return \ + _mm##SUFFIX##_cmp_##TYPE##_mask (_mm##SUFFIX##_subs_##UTYPE (x, y), \ + _mm##SUFFIX##_setzero_si##LENGTH (), \ + PRED); \ + } \ + +FOO (128,, epi16, epu16, 8, 0); +FOO (128,, epi16, epu16, 8, 4); + +FOO (128,, epu16, epu16, 8, 0); +FOO (128,, epu16, epu16, 8, 2); +FOO (128,, epu16, epu16, 8, 4); +FOO (128,, epu16, epu16, 8, 6); + +FOO (256, 256, epi16, epu16, 16, 0); +FOO (256, 256, epi16, epu16, 16, 4); + +FOO (256, 256, epu16, epu16, 16, 0); +FOO (256, 256, epu16, epu16, 16, 2); +FOO (256, 256, epu16, epu16, 16, 4); +FOO (256, 256, epu16, epu16, 16, 6); + +FOO (512, 512, epi16, epu16, 32, 0); +FOO (512, 512, epi16, epu16, 32, 4); + +FOO (512, 512, epu16, epu16, 32, 0); +FOO (512, 512, epu16, epu16, 32, 2); +FOO (512, 512, epu16, epu16, 32, 4); +FOO (512, 512, epu16, epu16, 32, 6); + +FOO (128,, epi8, epu8, 16, 0); +FOO (128,, epi8, epu8, 16, 4); + +FOO (128,, epu8, epu8, 16, 0); +FOO (128,, epu8, epu8, 16, 2); +FOO (128,, epu8, epu8, 16, 4); +FOO (128,, epu8, epu8, 16, 6); + +FOO (256, 256, epi8, epu8, 32, 0); +FOO (256, 256, epi8, epu8, 32, 4); + +FOO (256, 256, epu8, epu8, 32, 0); +FOO (256, 256, epu8, epu8, 32, 2); +FOO (256, 256, epu8, epu8, 32, 4); +FOO (256, 256, epu8, epu8, 32, 6); + +FOO (512, 512, epi8, epu8, 64, 0); +FOO (512, 512, epi8, epu8, 64, 4); + +FOO (512, 512, epu8, epu8, 64, 0); +FOO (512, 512, epu8, epu8, 64, 2); +FOO (512, 512, epu8, epu8, 64, 4); +FOO (512, 512, epu8, epu8, 64, 6); diff --git a/gcc/testsuite/gcc.target/i386/pr96906-1.c b/gcc/testsuite/gcc.target/i386/pr96906-1.c index 9d836eb2bdd..b1b41bf522d 100644 --- a/gcc/testsuite/gcc.target/i386/pr96906-1.c +++ b/gcc/testsuite/gcc.target/i386/pr96906-1.c @@ -1,6 +1,6 @@ /* PR target/96906 */ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx2" } */ +/* { dg-options "-O2 -mavx2 -mno-avx512f" } */ /* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*xmm" 2 } } */ /* { dg-final { scan-assembler-times "\tvpminuw\[^\n\r]*xmm" 2 } } */ /* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*ymm" 2 } } */ -- 2.30.2