+2019-12-19 Richard Sandiford <richard.sandiford@arm.com>
+
+ * config/aarch64/aarch64.c (aarch64_can_change_mode_class):
+ Don't allow changes between partial SVE modes and other kinds
+ of mode. Don't allow changes between two partial SVE modes
+ if they have different container or element sizes.
+
2019-12-19 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64.c (aarch64_function_value_1): New function,
aarch64_can_change_mode_class (machine_mode from,
machine_mode to, reg_class_t)
{
+ unsigned int from_flags = aarch64_classify_vector_mode (from);
+ unsigned int to_flags = aarch64_classify_vector_mode (to);
+
+ bool from_sve_p = (from_flags & VEC_ANY_SVE);
+ bool to_sve_p = (to_flags & VEC_ANY_SVE);
+
+ bool from_partial_sve_p = from_sve_p && (from_flags & VEC_PARTIAL);
+ bool to_partial_sve_p = to_sve_p && (to_flags & VEC_PARTIAL);
+
+ /* Don't allow changes between partial SVE modes and other modes.
+ The contents of partial SVE modes are distributed evenly across
+ the register, whereas GCC expects them to be clustered together. */
+ if (from_partial_sve_p != to_partial_sve_p)
+ return false;
+
+ /* Similarly reject changes between partial SVE modes that have
+ different patterns of significant and insignificant bits. */
+ if (from_partial_sve_p
+ && (aarch64_sve_container_bits (from) != aarch64_sve_container_bits (to)
+ || GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to)))
+ return false;
+
if (BYTES_BIG_ENDIAN)
{
- bool from_sve_p = aarch64_sve_data_mode_p (from);
- bool to_sve_p = aarch64_sve_data_mode_p (to);
-
/* Don't allow changes between SVE data modes and non-SVE modes.
See the comment at the head of aarch64-sve.md for details. */
if (from_sve_p != to_sve_p)
+2019-12-19 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.target/aarch64/sve/mixed_size_8.c: New test.
+
2019-12-19 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/pcs/gnu_vectors_3.c: New test.
--- /dev/null
+/* { dg-options "-O2 -msve-vector-bits=512" } */
+
+typedef int int32x16_t __attribute__((vector_size(64)));
+typedef int int32x8_t __attribute__((vector_size(32)));
+
+int32x8_t
+f1 (int32x16_t x)
+{
+ union u { int32x16_t full; int32x8_t pair[2]; } u;
+ u.full = x | 2;
+ return u.pair[0] + (int32x8_t) { 1, 2, 3, 4, 5, 6, 7, 8 };
+}
+
+int32x8_t
+f2 (int32x16_t x)
+{
+ union u { int32x16_t full; int32x8_t pair[2]; } u;
+ u.full = x | 2;
+ return u.pair[1] + (int32x8_t) { 1, 2, 3, 4, 5, 6, 7, 8 };
+}
+
+/* We could do something more efficient than spill the int32x16_t and
+ reload the int32x8_t. The important thing is that we don't do
+ something like:
+
+ orr z0.s, z0.s, #2
+ index z1.d, #1, #1
+ add z0.s, z0.s, z1.s
+ st1w z0.d, p0, [x8]
+
+ We're supposed to add z1 to one half of the ORR result instead. */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d} 2 } } */