From 76607e7e5f5ef67e4a469fdf2a320118ff2fa25a Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 19 Dec 2019 13:36:16 +0000
Subject: [PATCH] [AArch64] Reject invalid subregs involving partial SVE modes

When adding partial SVE modes, I'd remembered to handle reloads
in a similar way to full big-endian SVE vectors, but forgot the
just-as-important mode-change rules.

2019-12-19  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/aarch64.c (aarch64_can_change_mode_class):
	Don't allow changes between partial SVE modes and other kinds
	of mode.  Don't allow changes between two partial SVE modes
	if they have different container or element sizes.

gcc/testsuite/
	* gcc.target/aarch64/sve/mixed_size_8.c: New test.

From-SVN: r279572
---
 gcc/ChangeLog                                 |  7 ++++
 gcc/config/aarch64/aarch64.c                  | 25 ++++++++++++--
 gcc/testsuite/ChangeLog                       |  4 +++
 .../gcc.target/aarch64/sve/mixed_size_8.c     | 34 +++++++++++++++++++
 4 files changed, 67 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mixed_size_8.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 63a5ee430db..542bdf6e910 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2019-12-19  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_can_change_mode_class):
+	Don't allow changes between partial SVE modes and other kinds
+	of mode.  Don't allow changes between two partial SVE modes
+	if they have different container or element sizes.
+
 2019-12-19  Richard Sandiford  <richard.sandiford@arm.com>
 
 	* config/aarch64/aarch64.c (aarch64_function_value_1): New function,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index cf6aa7e4c9f..88baf96efc2 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -21457,11 +21457,30 @@ static bool
 aarch64_can_change_mode_class (machine_mode from,
 			       machine_mode to, reg_class_t)
 {
+  unsigned int from_flags = aarch64_classify_vector_mode (from);
+  unsigned int to_flags = aarch64_classify_vector_mode (to);
+
+  bool from_sve_p = (from_flags & VEC_ANY_SVE);
+  bool to_sve_p = (to_flags & VEC_ANY_SVE);
+
+  bool from_partial_sve_p = from_sve_p && (from_flags & VEC_PARTIAL);
+  bool to_partial_sve_p = to_sve_p && (to_flags & VEC_PARTIAL);
+
+  /* Don't allow changes between partial SVE modes and other modes.
+     The contents of partial SVE modes are distributed evenly across
+     the register, whereas GCC expects them to be clustered together.  */
+  if (from_partial_sve_p != to_partial_sve_p)
+    return false;
+
+  /* Similarly reject changes between partial SVE modes that have
+     different patterns of significant and insignificant bits.  */
+  if (from_partial_sve_p
+      && (aarch64_sve_container_bits (from) != aarch64_sve_container_bits (to)
+	  || GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to)))
+    return false;
+
   if (BYTES_BIG_ENDIAN)
     {
-      bool from_sve_p = aarch64_sve_data_mode_p (from);
-      bool to_sve_p = aarch64_sve_data_mode_p (to);
-
       /* Don't allow changes between SVE data modes and non-SVE modes.
 	 See the comment at the head of aarch64-sve.md for details.  */
       if (from_sve_p != to_sve_p)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3385971dba8..7d474882c63 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2019-12-19  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* gcc.target/aarch64/sve/mixed_size_8.c: New test.
+
 2019-12-19  Richard Sandiford  <richard.sandiford@arm.com>
 
 	* gcc.target/aarch64/sve/pcs/gnu_vectors_3.c: New test.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_8.c b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_8.c
new file mode 100644
index 00000000000..f9e95d34011
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_8.c
@@ -0,0 +1,34 @@
+/* { dg-options "-O2 -msve-vector-bits=512" } */
+
+typedef int int32x16_t __attribute__((vector_size(64)));
+typedef int int32x8_t __attribute__((vector_size(32)));
+
+int32x8_t
+f1 (int32x16_t x)
+{
+  union u { int32x16_t full; int32x8_t pair[2]; } u;
+  u.full = x | 2;
+  return u.pair[0] + (int32x8_t) { 1, 2, 3, 4, 5, 6, 7, 8 };
+}
+
+int32x8_t
+f2 (int32x16_t x)
+{
+  union u { int32x16_t full; int32x8_t pair[2]; } u;
+  u.full = x | 2;
+  return u.pair[1] + (int32x8_t) { 1, 2, 3, 4, 5, 6, 7, 8 };
+}
+
+/* We could do something more efficient than spill the int32x16_t and
+   reload the int32x8_t.  The important thing is that we don't do
+   something like:
+
+	orr	z0.s, z0.s, #2
+	index	z1.d, #1, #1
+	add	z0.s, z0.s, z1.s
+	st1w	z0.d, p0, [x8]
+
+   We're supposed to add z1 to one half of the ORR result instead.  */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d} 2 } } */
-- 
2.30.2