[(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
)
-(define_insn "*aarch64_simd_vec_copy_lane<mode>"
+(define_insn "@aarch64_simd_vec_copy_lane<mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_merge:VALL_F16
(vec_duplicate:VALL_F16
return true;
}
+/* Recognize patterns suitable for the INS instructions. */
+static bool
+aarch64_evpc_ins (struct expand_vec_perm_d *d)
+{
+ machine_mode mode = d->vmode;
+ unsigned HOST_WIDE_INT nelt;
+
+ if (d->vec_flags != VEC_ADVSIMD)
+ return false;
+
+ /* to_constant is safe since this routine is specific to Advanced SIMD
+ vectors. */
+ nelt = d->perm.length ().to_constant ();
+ rtx insv = d->op0;
+
+ HOST_WIDE_INT idx = -1;
+
+ for (unsigned HOST_WIDE_INT i = 0; i < nelt; i++)
+ {
+ HOST_WIDE_INT elt;
+ if (!d->perm[i].is_constant (&elt))
+ return false;
+ if (elt == (HOST_WIDE_INT) i)
+ continue;
+ if (idx != -1)
+ {
+ idx = -1;
+ break;
+ }
+ idx = i;
+ }
+
+ if (idx == -1)
+ {
+ insv = d->op1;
+ for (unsigned HOST_WIDE_INT i = 0; i < nelt; i++)
+ {
+ if (d->perm[i].to_constant () == (HOST_WIDE_INT) (i + nelt))
+ continue;
+ if (idx != -1)
+ return false;
+ idx = i;
+ }
+
+ if (idx == -1)
+ return false;
+ }
+
+ if (d->testing_p)
+ return true;
+
+ gcc_assert (idx != -1);
+
+ unsigned extractindex = d->perm[idx].to_constant ();
+ rtx extractv = d->op0;
+ if (extractindex >= nelt)
+ {
+ extractv = d->op1;
+ extractindex -= nelt;
+ }
+ gcc_assert (extractindex < nelt);
+
+ emit_move_insn (d->target, insv);
+ insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
+ expand_operand ops[5];
+ create_output_operand (&ops[0], d->target, mode);
+ create_input_operand (&ops[1], d->target, mode);
+ create_integer_operand (&ops[2], 1 << idx);
+ create_input_operand (&ops[3], extractv, mode);
+ create_integer_operand (&ops[4], extractindex);
+ expand_insn (icode, 5, ops);
+
+ return true;
+}
+
static bool
aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
{
return true;
else if (aarch64_evpc_sel (d))
return true;
+ else if (aarch64_evpc_ins (d))
+ return true;
else if (aarch64_evpc_reencode (d))
return true;
if (d->vec_flags == VEC_SVE_DATA)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(4*sizeof(float))))
+
+vector float f0(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, a, (vector int){3, 1, 2, 3});
+}
+vector float f1(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, a, (vector int){0, 0, 2, 3});
+}
+vector float f2(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, a, (vector int){0, 1, 0, 3});
+}
+vector float f3(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, a, (vector int){0, 1, 2, 0});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*ins[ \t]+v[0-9]+\.s} 4 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(8*sizeof(short))))
+
+vector short f0(vector short a, vector short b)
+{
+ return __builtin_shuffle (a, a, (vector short){6, 1, 2, 3, 4, 5, 6, 7});
+}
+vector short f2(vector short a, vector short b)
+{
+ return __builtin_shuffle (a, a, (vector short){0, 1, 2, 1, 4, 5, 6, 7});
+}
+vector short f4(vector short a, vector short b)
+{
+ return __builtin_shuffle (a, a, (vector short){0, 1, 2, 3, 0, 5, 6, 7});
+}
+vector short f6(vector short a, vector short b)
+{
+ return __builtin_shuffle (a, a, (vector short){0, 1, 2, 3, 4, 5, 6, 1});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*ins[ \t]+v[0-9]+\.h} 4 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(4*sizeof(float))))
+
+vector float f0(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, b, (vector int){0, 5, 6, 7});
+}
+vector float f1(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, b, (vector int){4, 0, 6, 7});
+}
+vector float f2(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, b, (vector int){4, 5, 0, 7});
+}
+vector float f3(vector float a, vector float b)
+{
+ return __builtin_shuffle (a, b, (vector int){4, 5, 6, 0});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*ins[ \t]+v[0-9]+\.s} 4 } } */