From: Jakub Jelinek Date: Thu, 6 Feb 2020 10:08:59 +0000 (+0100) Subject: i386: Improve avx* vector concatenation [PR93594] X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3f740c67dbb90177aa71d3c60ef9b0fd2f44dbd9;p=gcc.git i386: Improve avx* vector concatenation [PR93594] The following testcase shows that for _mm256_set*_m128i and similar intrinsics, we sometimes generate bad code. All 4 routines are expressing the same thing, a 128-bit vector zero padded to 256-bit vector, but only the 3rd one actually emits the desired vmovdqa %xmm0, %xmm0 insn, the others vpxor %xmm1, %xmm1, %xmm1; vinserti128 $0x1, %xmm1, %ymm0, %ymm0 The problem is that the cast builtins use UNSPEC_CAST which is after reload simplified using a splitter, but during combine it prevents optimizations. We do have avx_vec_concat* patterns that generate efficient code, both for this low part + zero concatenation special case and for other cases too, so the following define_insn_and_split just recognizes avx_vec_concat made of a low half of a cast and some other reg. 2020-02-06 Jakub Jelinek PR target/93594 * config/i386/predicates.md (avx_identity_operand): New predicate. * config/i386/sse.md (*avx_vec_concat_1): New define_insn_and_split. * gcc.target/i386/avx2-pr93594.c: New test. --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b5b465a922d..382e31368dc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,10 @@ 2020-02-06 Jakub Jelinek + PR target/93594 + * config/i386/predicates.md (avx_identity_operand): New predicate. + * config/i386/sse.md (*avx_vec_concat_1): New + define_insn_and_split. + PR libgomp/93515 * omp-low.c (use_pointer_for_field): For nested constructs, also look for map clauses on target construct. diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 1119366d54e..3ab9da45ffb 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1584,6 +1584,19 @@ return true; }) +;; Return true if OP is a parallel for identity permute. +(define_predicate "avx_identity_operand" + (and (match_code "parallel") + (match_code "const_int" "a")) +{ + int i, nelt = XVECLEN (op, 0); + + for (i = 0; i < nelt; ++i) + if (INTVAL (XVECEXP (op, 0, i)) != i) + return false; + return true; +}) + ;; Return true if OP is a proper third operand to vpblendw256. (define_predicate "avx2_pblendw_operand" (match_code "const_int") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ac4cf5be686..cfd79a83544 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -21358,6 +21358,24 @@ (set_attr "prefix" "maybe_evex") (set_attr "mode" "")]) +(define_insn_and_split "*avx_vec_concat_1" + [(set (match_operand:V_256_512 0 "register_operand") + (vec_concat:V_256_512 + (vec_select: + (unspec:V_256_512 + [(match_operand: 1 "nonimmediate_operand")] + UNSPEC_CAST) + (match_parallel 3 "avx_identity_operand" + [(match_operand 4 "const_int_operand")])) + (match_operand: 2 "nonimm_or_0_operand")))] + "TARGET_AVX + && (operands[2] == CONST0_RTX (mode) + || !MEM_P (operands[1])) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) (vec_concat:V_256_512 (match_dup 1) (match_dup 2)))]) + (define_insn "vcvtph2ps" [(set (match_operand:V4SF 0 "register_operand" "=v") (vec_select:V4SF diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5802f0d7c60..7b0b9c2c242 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-02-06 Jakub Jelinek + + PR target/93594 + * gcc.target/i386/avx2-pr93594.c: New test. + 2020-02-05 Martin Sebor PR tree-optimization/92765 diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr93594.c b/gcc/testsuite/gcc.target/i386/avx2-pr93594.c new file mode 100644 index 00000000000..963c8deeb39 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr93594.c @@ -0,0 +1,32 @@ +/* PR target/93594 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2 -masm=att" } */ +/* { dg-final { scan-assembler-times "vmovdqa\t%xmm0, %xmm0" 4 } } */ +/* { dg-final { scan-assembler-not "vpxor\t%" } } */ +/* { dg-final { scan-assembler-not "vinserti128\t\\\$" } } */ + +#include + +__m256i +foo (__m128i x) +{ + return _mm256_setr_m128i (x, _mm_setzero_si128 ()); +} + +__m256i +bar (__m128i x) +{ + return _mm256_set_m128i (_mm_setzero_si128 (), x); +} + +__m256i +baz (__m128i x) +{ + return _mm256_insertf128_si256 (_mm256_setzero_si256 (), x, 0); +} + +__m256i +qux (__m128i x) +{ + return _mm256_insertf128_si256 (_mm256_castsi128_si256 (x), _mm_setzero_si128 (), 1); +}