From: Richard Sandiford Date: Tue, 9 Jan 2018 14:31:55 +0000 (+0000) Subject: [AArch64] Use vec_perm_indices helper routines X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=326ac20ea30c776ccff82bf6f3cea8088e33d3b9;p=gcc.git [AArch64] Use vec_perm_indices helper routines This patch makes the AArch64 vec_perm_const code use the new vec_perm_indices routines, instead of checking each element individually. This means that they extend naturally to variable-length vectors. Also, aarch64_evpc_dup was the only function that generated rtl when testing_p is true, and that looked accidental. The patch adds the missing check and then replaces the gen_rtx_REG/start_sequence/ end_sequence stuff with an assert that no rtl is generated. 2018-01-09 Richard Sandiford gcc/ * config/aarch64/aarch64.c (aarch64_evpc_trn): Use d.perm.series_p instead of checking each element individually. (aarch64_evpc_uzp): Likewise. (aarch64_evpc_zip): Likewise. (aarch64_evpc_ext): Likewise. (aarch64_evpc_rev): Likewise. (aarch64_evpc_dup): Test the encoding for a single duplicated element, instead of checking each element individually. Return true without generating rtl if (aarch64_vectorize_vec_perm_const): Use all_from_input_p to test whether all selected elements come from the same input, instead of checking each element individually. Remove calls to gen_rtx_REG, start_sequence and end_sequence and instead assert that no rtl is generated. From-SVN: r256385 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5ffba6348bc..5c7b04604b4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2018-01-09 Richard Sandiford + + * config/aarch64/aarch64.c (aarch64_evpc_trn): Use d.perm.series_p + instead of checking each element individually. + (aarch64_evpc_uzp): Likewise. + (aarch64_evpc_zip): Likewise. + (aarch64_evpc_ext): Likewise. + (aarch64_evpc_rev): Likewise. + (aarch64_evpc_dup): Test the encoding for a single duplicated element, + instead of checking each element individually. Return true without + generating rtl if + (aarch64_vectorize_vec_perm_const): Use all_from_input_p to test + whether all selected elements come from the same input, instead of + checking each element individually. Remove calls to gen_rtx_REG, + start_sequence and end_sequence and instead assert that no rtl is + generated. + 2018-01-09 Richard Sandiford * config/aarch64/aarch64.c (aarch64_legitimate_constant_p): Fix diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index e129362ef66..0aea458c4a2 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -13317,7 +13317,7 @@ aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel, static bool aarch64_evpc_trn (struct expand_vec_perm_d *d) { - unsigned int i, odd, mask, nelt = d->perm.length (); + unsigned int odd, nelt = d->perm.length (); rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13326,21 +13326,11 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d) /* Note that these are little-endian tests. We correct for big-endian later. */ - if (d->perm[0] == 0) - odd = 0; - else if (d->perm[0] == 1) - odd = 1; - else + odd = d->perm[0]; + if ((odd != 0 && odd != 1) + || !d->perm.series_p (0, 2, odd, 2) + || !d->perm.series_p (1, 2, nelt + odd, 2)) return false; - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); - - for (i = 0; i < nelt; i += 2) - { - if (d->perm[i] != i + odd) - return false; - if (d->perm[i + 1] != ((i + nelt + odd) & mask)) - return false; - } /* Success! */ if (d->testing_p) @@ -13364,7 +13354,7 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d) static bool aarch64_evpc_uzp (struct expand_vec_perm_d *d) { - unsigned int i, odd, mask, nelt = d->perm.length (); + unsigned int odd; rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13373,20 +13363,10 @@ aarch64_evpc_uzp (struct expand_vec_perm_d *d) /* Note that these are little-endian tests. We correct for big-endian later. */ - if (d->perm[0] == 0) - odd = 0; - else if (d->perm[0] == 1) - odd = 1; - else + odd = d->perm[0]; + if ((odd != 0 && odd != 1) + || !d->perm.series_p (0, 1, odd, 2)) return false; - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); - - for (i = 0; i < nelt; i++) - { - unsigned elt = (i * 2 + odd) & mask; - if (d->perm[i] != elt) - return false; - } /* Success! */ if (d->testing_p) @@ -13410,7 +13390,7 @@ aarch64_evpc_uzp (struct expand_vec_perm_d *d) static bool aarch64_evpc_zip (struct expand_vec_perm_d *d) { - unsigned int i, high, mask, nelt = d->perm.length (); + unsigned int high, nelt = d->perm.length (); rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13419,25 +13399,11 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) /* Note that these are little-endian tests. We correct for big-endian later. */ - high = nelt / 2; - if (d->perm[0] == high) - /* Do Nothing. */ - ; - else if (d->perm[0] == 0) - high = 0; - else + high = d->perm[0]; + if ((high != 0 && high * 2 != nelt) + || !d->perm.series_p (0, 2, high, 1) + || !d->perm.series_p (1, 2, high + nelt, 1)) return false; - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); - - for (i = 0; i < nelt / 2; i++) - { - unsigned elt = (i + high) & mask; - if (d->perm[i * 2] != elt) - return false; - elt = (elt + nelt) & mask; - if (d->perm[i * 2 + 1] != elt) - return false; - } /* Success! */ if (d->testing_p) @@ -13462,23 +13428,14 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) static bool aarch64_evpc_ext (struct expand_vec_perm_d *d) { - unsigned int i, nelt = d->perm.length (); + unsigned int nelt = d->perm.length (); rtx offset; unsigned int location = d->perm[0]; /* Always < nelt. */ /* Check if the extracted indices are increasing by one. */ - for (i = 1; i < nelt; i++) - { - unsigned int required = location + i; - if (d->one_vector_p) - { - /* We'll pass the same vector in twice, so allow indices to wrap. */ - required &= (nelt - 1); - } - if (d->perm[i] != required) - return false; - } + if (!d->perm.series_p (0, 1, location, 1)) + return false; /* Success! */ if (d->testing_p) @@ -13510,7 +13467,7 @@ aarch64_evpc_ext (struct expand_vec_perm_d *d) static bool aarch64_evpc_rev (struct expand_vec_perm_d *d) { - unsigned int i, j, diff, size, unspec, nelt = d->perm.length (); + unsigned int i, diff, size, unspec; if (!d->one_vector_p) return false; @@ -13526,18 +13483,10 @@ aarch64_evpc_rev (struct expand_vec_perm_d *d) else return false; - for (i = 0; i < nelt ; i += diff + 1) - for (j = 0; j <= diff; j += 1) - { - /* This is guaranteed to be true as the value of diff - is 7, 3, 1 and we should have enough elements in the - queue to generate this. Getting a vector mask with a - value of diff other than these values implies that - something is wrong by the time we get here. */ - gcc_assert (i + j < nelt); - if (d->perm[i + j] != i + diff - j) - return false; - } + unsigned int step = diff + 1; + for (i = 0; i < step; ++i) + if (!d->perm.series_p (i, step, diff - i, step)) + return false; /* Success! */ if (d->testing_p) @@ -13554,15 +13503,17 @@ aarch64_evpc_dup (struct expand_vec_perm_d *d) rtx out = d->target; rtx in0; machine_mode vmode = d->vmode; - unsigned int i, elt, nelt = d->perm.length (); + unsigned int elt; rtx lane; + if (d->perm.encoding ().encoded_nelts () != 1) + return false; + + /* Success! */ + if (d->testing_p) + return true; + elt = d->perm[0]; - for (i = 1; i < nelt; i++) - { - if (elt != d->perm[i]) - return false; - } /* The generic preparation in aarch64_expand_vec_perm_const_1 swaps the operand order and the permute indices if it finds @@ -13650,61 +13601,37 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1, const vec_perm_indices &sel) { struct expand_vec_perm_d d; - unsigned int i, which; - - d.vmode = vmode; - d.target = target; - d.op0 = op0; - d.op1 = op1; - d.testing_p = !target; - /* Calculate whether all elements are in one vector. */ - unsigned int nelt = sel.length (); - for (i = which = 0; i < nelt; ++i) + /* Check whether the mask can be applied to a single vector. */ + if (op0 && rtx_equal_p (op0, op1)) + d.one_vector_p = true; + else if (sel.all_from_input_p (0)) { - unsigned int ei = sel[i] & (2 * nelt - 1); - which |= (ei < nelt ? 1 : 2); + d.one_vector_p = true; + op1 = op0; } - - switch (which) + else if (sel.all_from_input_p (1)) { - default: - gcc_unreachable (); - - case 3: - d.one_vector_p = false; - if (d.testing_p || !rtx_equal_p (op0, op1)) - break; - - /* The elements of PERM do not suggest that only the first operand - is used, but both operands are identical. Allow easier matching - of the permutation by folding the permutation into the single - input vector. */ - /* Fall Through. */ - case 2: - d.op0 = op1; - d.one_vector_p = true; - break; - - case 1: - d.op1 = op0; d.one_vector_p = true; - break; + op0 = op1; } + else + d.one_vector_p = false; - d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt); + d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, + sel.nelts_per_input ()); + d.vmode = vmode; + d.target = target; + d.op0 = op0; + d.op1 = op1; + d.testing_p = !target; if (!d.testing_p) return aarch64_expand_vec_perm_const_1 (&d); - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); - if (!d.one_vector_p) - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); - - start_sequence (); + rtx_insn *last = get_last_insn (); bool ret = aarch64_expand_vec_perm_const_1 (&d); - end_sequence (); + gcc_assert (last == get_last_insn ()); return ret; }