From 8a605c51cbb8f14cbd4d3bfd0d697924cb49b214 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 2 Oct 2014 09:29:49 +0200 Subject: [PATCH] re PR target/62128 (Use vpalignr for AVX2 rotation) PR target/62128 * config/i386/i386.c (expand_vec_perm_1): Try expand_vec_perm_palignr if it expands to a single insn only. (expand_vec_perm_palignr): Add SINGLE_INSN_ONLY_P argument. If true, fail unless in_order is true. Add forward declaration. (expand_vec_perm_vperm2f128): Fix up comment about which permutation is useful for one_operand_p. (ix86_expand_vec_perm_const_1): Adjust expand_vec_perm_palignr caller. From-SVN: r215796 --- gcc/ChangeLog | 11 +++++++++++ gcc/config/i386/i386.c | 23 +++++++++++++++++------ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 860004054d4..24e007d28a1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2014-10-02 Jakub Jelinek + + PR target/62128 + * config/i386/i386.c (expand_vec_perm_1): Try expand_vec_perm_palignr + if it expands to a single insn only. + (expand_vec_perm_palignr): Add SINGLE_INSN_ONLY_P argument. If true, + fail unless in_order is true. Add forward declaration. + (expand_vec_perm_vperm2f128): Fix up comment about which permutation + is useful for one_operand_p. + (ix86_expand_vec_perm_const_1): Adjust expand_vec_perm_palignr caller. + 2014-10-01 Jan Hubicka * cgraphclones.c (build_function_type_skip_args): Do not make new diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 8b70f6fa59c..8fdc414114c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -39636,6 +39636,7 @@ struct expand_vec_perm_d static bool canonicalize_perm (struct expand_vec_perm_d *d); static bool expand_vec_perm_1 (struct expand_vec_perm_d *d); static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d); +static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool); /* Get a vector mode of the same size as the original but with elements twice as wide. This is only guaranteed to apply to integral vectors. */ @@ -43225,6 +43226,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_pshufb (d)) return true; + /* Try the AVX2 vpalignr instruction. */ + if (expand_vec_perm_palignr (d, true)) + return true; + /* Try the AVX512F vpermi2 instructions. */ rtx vec[64]; enum machine_mode mode = d->vmode; @@ -43286,10 +43291,11 @@ expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d) the permutation using the SSSE3 palignr instruction. This succeeds when all of the elements in PERM fit within one vector and we merely need to shift them down so that a single vector permutation has a - chance to succeed. */ + chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only + the vpalignr instruction itself can perform the requested permutation. */ static bool -expand_vec_perm_palignr (struct expand_vec_perm_d *d) +expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p) { unsigned i, nelt = d->nelt; unsigned min, max; @@ -43320,8 +43326,9 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d) /* Given that we have SSSE3, we know we'll be able to implement the single operand permutation after the palignr with pshufb for - 128-bit vectors. */ - if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16) + 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed + first. */ + if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p) return true; dcopy = *d; @@ -43342,6 +43349,9 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d) } dcopy.one_operand_p = true; + if (single_insn_only_p && !in_order) + return false; + /* For AVX2, test whether we can permute the result in one instruction. */ if (d->testing_p) { @@ -43922,7 +43932,8 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d) return true; } - /* For one operand, the only useful vperm2f128 permutation is 0x10. */ + /* For one operand, the only useful vperm2f128 permutation is 0x01 + aka lanes swap. */ if (d->one_operand_p) return false; } @@ -44811,7 +44822,7 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_pshuflw_pshufhw (d)) return true; - if (expand_vec_perm_palignr (d)) + if (expand_vec_perm_palignr (d, false)) return true; if (expand_vec_perm_interleave2 (d)) -- 2.30.2