From 9f9f61155a4836aa57e19405a95852172670b8ef Mon Sep 17 00:00:00 2001 From: Ilya Tocar Date: Mon, 8 Dec 2014 11:45:35 +0000 Subject: [PATCH] Enable const permutations for V64QImode. gcc/ * config/i386/i386.c (expand_vec_perm_broadcast_1): Handle v64qi. (expand_vec_perm_vpermi2_vpshub2): New. (ix86_expand_vec_perm_const_1): Use it. (ix86_vectorize_vec_perm_const_ok): Handle v64qi. * config/i386/sse.md (VEC_PERM_CONST): Add v64qi. From-SVN: r218482 --- gcc/ChangeLog | 8 +++++ gcc/config/i386/i386.c | 81 ++++++++++++++++++++++++++++++++++++++++++ gcc/config/i386/sse.md | 2 +- 3 files changed, 90 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6f9096b4a65..15f5f26a52c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2014-12-08 Ilya Tocar + + * config/i386/i386.c (expand_vec_perm_broadcast_1): Handle v64qi. + (expand_vec_perm_vpermi2_vpshub2): New. + (ix86_expand_vec_perm_const_1): Use it. + (ix86_vectorize_vec_perm_const_ok): Handle v64qi. + * config/i386/sse.md (VEC_PERM_CONST): Add v64qi. + 2014-12-08 Ilya Enkovich * tree-chkp.c (chkp_build_returned_bound): Don't predict diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index aaf0b385c90..88dd9f4a9dd 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -48882,6 +48882,7 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); return true; + case V64QImode: case V32QImode: case V16HImode: case V8SImode: @@ -48915,6 +48916,78 @@ expand_vec_perm_broadcast (struct expand_vec_perm_d *d) return expand_vec_perm_broadcast_1 (d); } +/* Implement arbitrary permutations of two V64QImode operands + will 2 vpermi2w, 2 vpshufb and one vpor instruction. */ +static bool +expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d) +{ + if (!TARGET_AVX512BW || !(d->vmode == V64QImode)) + return false; + + if (d->testing_p) + return true; + + struct expand_vec_perm_d ds[2]; + rtx rperm[128], vperm, target0, target1; + unsigned int i, nelt; + machine_mode vmode; + + nelt = d->nelt; + vmode = V64QImode; + + for (i = 0; i < 2; i++) + { + ds[i] = *d; + ds[i].vmode = V32HImode; + ds[i].nelt = 32; + ds[i].target = gen_reg_rtx (V32HImode); + ds[i].op0 = gen_lowpart (V32HImode, d->op0); + ds[i].op1 = gen_lowpart (V32HImode, d->op1); + } + + /* Prepare permutations such that the first one takes care of + putting the even bytes into the right positions or one higher + positions (ds[0]) and the second one takes care of + putting the odd bytes into the right positions or one below + (ds[1]). */ + + for (i = 0; i < nelt; i++) + { + ds[i & 1].perm[i / 2] = d->perm[i] / 2; + if (i & 1) + { + rperm[i] = constm1_rtx; + rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1)); + } + else + { + rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1)); + rperm[i + 64] = constm1_rtx; + } + } + + bool ok = expand_vec_perm_1 (&ds[0]); + gcc_assert (ok); + ds[0].target = gen_lowpart (V64QImode, ds[0].target); + + ok = expand_vec_perm_1 (&ds[1]); + gcc_assert (ok); + ds[1].target = gen_lowpart (V64QImode, ds[1].target); + + vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm)); + vperm = force_reg (vmode, vperm); + target0 = gen_reg_rtx (V64QImode); + emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm)); + + vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64)); + vperm = force_reg (vmode, vperm); + target1 = gen_reg_rtx (V64QImode); + emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm)); + + emit_insn (gen_iorv64qi3 (d->target, target0, target1)); + return true; +} + /* Implement arbitrary permutation of two V32QImode and V16QImode operands with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed all the shorter instruction sequences. */ @@ -49089,6 +49162,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_vpshufb2_vpermq_even_odd (d)) return true; + if (expand_vec_perm_vpermi2_vpshub2 (d)) + return true; + /* ??? Look for narrow permutations whose element orderings would allow the promotion to a wider mode. */ @@ -49233,6 +49309,11 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, /* All implementable with a single vpermi2 insn. */ return true; break; + case V64QImode: + if (TARGET_AVX512BW) + /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */ + return true; + break; case V8SImode: case V8SFmode: case V4DFmode: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c3aaea370bb..734e6b46b9a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10713,7 +10713,7 @@ (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") - (V32HI "TARGET_AVX512BW")]) + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) (define_expand "vec_perm_const" [(match_operand:VEC_PERM_CONST 0 "register_operand") -- 2.30.2