bool testing_p;
};
+static bool aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
+
/* Generate a variable permutation. */
static void
return true;
}
+/* Try to re-encode the PERM constant so it combines odd and even elements.
+ This rewrites constants such as {0, 1, 4, 5}/V4SF to {0, 2}/V2DI.
+ We retry with this new constant with the full suite of patterns. */
+static bool
+aarch64_evpc_reencode (struct expand_vec_perm_d *d)
+{
+ expand_vec_perm_d newd;
+ unsigned HOST_WIDE_INT nelt;
+
+ if (d->vec_flags != VEC_ADVSIMD)
+ return false;
+
+ /* Get the new mode. Always twice the size of the inner
+ and half the elements. */
+ poly_uint64 vec_bits = GET_MODE_BITSIZE (d->vmode);
+ unsigned int new_elt_bits = GET_MODE_UNIT_BITSIZE (d->vmode) * 2;
+ auto new_elt_mode = int_mode_for_size (new_elt_bits, false).require ();
+ machine_mode new_mode = aarch64_simd_container_mode (new_elt_mode, vec_bits);
+
+ if (new_mode == word_mode)
+ return false;
+
+ /* to_constant is safe since this routine is specific to Advanced SIMD
+ vectors. */
+ nelt = d->perm.length ().to_constant ();
+
+ vec_perm_builder newpermconst;
+ newpermconst.new_vector (nelt / 2, nelt / 2, 1);
+
+ /* Convert the perm constant if we can. Require even, odd as the pairs. */
+ for (unsigned int i = 0; i < nelt; i += 2)
+ {
+ poly_int64 elt0 = d->perm[i];
+ poly_int64 elt1 = d->perm[i + 1];
+ poly_int64 newelt;
+ if (!multiple_p (elt0, 2, &newelt) || maybe_ne (elt0 + 1, elt1))
+ return false;
+ newpermconst.quick_push (newelt.to_constant ());
+ }
+ newpermconst.finalize ();
+
+ newd.vmode = new_mode;
+ newd.vec_flags = VEC_ADVSIMD;
+ newd.target = d->target ? gen_lowpart (new_mode, d->target) : NULL;
+ newd.op0 = d->op0 ? gen_lowpart (new_mode, d->op0) : NULL;
+ newd.op1 = d->op1 ? gen_lowpart (new_mode, d->op1) : NULL;
+ newd.testing_p = d->testing_p;
+ newd.one_vector_p = d->one_vector_p;
+
+ newd.perm.new_vector (newpermconst, newd.one_vector_p ? 1 : 2, nelt / 2);
+ return aarch64_expand_vec_perm_const_1 (&newd);
+}
+
/* Recognize patterns suitable for the UZP instructions. */
static bool
aarch64_evpc_uzp (struct expand_vec_perm_d *d)
return true;
else if (aarch64_evpc_sel (d))
return true;
+ else if (aarch64_evpc_reencode (d))
+ return true;
if (d->vec_flags == VEC_SVE_DATA)
return aarch64_evpc_sve_tbl (d);
else if (d->vec_flags == VEC_ADVSIMD)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(4*sizeof(float))))
+
+/* These are both dups. */
+vector float f(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, a, (vector int){0, 1, 0, 1});
+}
+vector float f1(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, a, (vector int){2, 3, 2, 3});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*dup[ \t]+v[0-9]+\.2d} 2 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#define vector __attribute__((vector_size(2*sizeof(float))))
+
+vector float f(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, b, (vector int){0, 2});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*zip1[ \t]+v[0-9]+\.2s} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#define vector __attribute__((vector_size(4*sizeof(float))))
+
+vector float f(vector float a, vector float b)
+{
+ /* This is the same as zip1 v.2d as {0, 1, 4, 5} can be converted to {0, 2}. */
+ return __builtin_shuffle (a, b, (vector int){0, 1, 4, 5});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*zip1[ \t]+v[0-9]+\.2d} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#define vector __attribute__((vector_size(4*sizeof(float))))
+
+vector float f(vector float a, vector float b)
+{
+ /* This is the same as zip1 v.2d as {4, 5, 0, 1} can be converted to {2, 0}. */
+ return __builtin_shuffle (a, b, (vector int){4, 5, 0, 1});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*zip1[ \t]+v[0-9]+\.2d} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#define vector __attribute__((vector_size(4*sizeof(float))))
+
+vector float f(vector float a, vector float b)
+{
+ /* This is the same as zip2 v.2d as {2, 3, 6, 7} can be converted to {1, 3}. */
+ return __builtin_shuffle (a, b, (vector int){2, 3, 6, 7});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*zip2[ \t]+v[0-9]+\.2d} 1 } } */