From 550a338052c374cb1f6c07ffd883c4046565fdd4 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Wed, 16 Oct 2019 10:53:40 +0000
Subject: [PATCH] [AArch64] Add partial SVE vector modes

This patch adds extra vector modes that represent a half, quarter or
eighth of what an SVE vector can hold.  This is useful for describing
the memory vector involved in an extending load or truncating store.
It might also be useful in future for representing "unpacked" SVE
registers, i.e. registers that contain values in the low bits of a
wider containing element.

The new modes could have the same width as an Advanced SIMD mode for
certain -msve-vector-bits=N options, so we need to ensure that they
come later in the mode list and that Advanced SIMD modes always "win".

2019-10-16  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* genmodes.c (mode_data::order): New field.
	(blank_mode): Update accordingly.
	(VECTOR_MODES_WITH_PREFIX): Add an order parameter.
	(make_vector_modes): Likewise.
	(VECTOR_MODES): Update use accordingly.
	(cmp_modes): Sort by the new order field ahead of sorting by size.
	* config/aarch64/aarch64-modes.def (VNx2QI, VN2xHI, VNx2SI)
	(VNx4QI, VNx4HI, VNx8QI): New partial vector modes.
	* config/aarch64/aarch64.c (VEC_PARTIAL): New flag value.
	(aarch64_classify_vector_mode): Handle the new partial modes.
	(aarch64_vl_bytes): New function.
	(aarch64_hard_regno_nregs): Use it instead of BYTES_PER_SVE_VECTOR
	when counting the number of registers in an SVE mode.
	(aarch64_class_max_nregs): Likewise.
	(aarch64_hard_regno_mode_ok): Don't allow partial vectors
	in registers yet.
	(aarch64_classify_address): Treat partial vectors analogously
	to full vectors.
	(aarch64_print_address_internal): Consolidate the printing of
	MUL VL addresses, using aarch64_vl_bytes as the number of
	bytes represented by "VL".
	(aarch64_vector_mode_supported_p): Reject partial vector modes.

From-SVN: r277062
---
 gcc/ChangeLog                        | 25 ++++++++
 gcc/config/aarch64/aarch64-modes.def | 38 +++++++++++-
 gcc/config/aarch64/aarch64.c         | 92 ++++++++++++++++++++--------
 gcc/genmodes.c                       | 22 ++++---
 4 files changed, 144 insertions(+), 33 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c54f216a780..56ad96db752 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,28 @@
+2019-10-16  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* genmodes.c (mode_data::order): New field.
+	(blank_mode): Update accordingly.
+	(VECTOR_MODES_WITH_PREFIX): Add an order parameter.
+	(make_vector_modes): Likewise.
+	(VECTOR_MODES): Update use accordingly.
+	(cmp_modes): Sort by the new order field ahead of sorting by size.
+	* config/aarch64/aarch64-modes.def (VNx2QI, VN2xHI, VNx2SI)
+	(VNx4QI, VNx4HI, VNx8QI): New partial vector modes.
+	* config/aarch64/aarch64.c (VEC_PARTIAL): New flag value.
+	(aarch64_classify_vector_mode): Handle the new partial modes.
+	(aarch64_vl_bytes): New function.
+	(aarch64_hard_regno_nregs): Use it instead of BYTES_PER_SVE_VECTOR
+	when counting the number of registers in an SVE mode.
+	(aarch64_class_max_nregs): Likewise.
+	(aarch64_hard_regno_mode_ok): Don't allow partial vectors
+	in registers yet.
+	(aarch64_classify_address): Treat partial vectors analogously
+	to full vectors.
+	(aarch64_print_address_internal): Consolidate the printing of
+	MUL VL addresses, using aarch64_vl_bytes as the number of
+	bytes represented by "VL".
+	(aarch64_vector_mode_supported_p): Reject partial vector modes.
+
 2019-10-16  Richard Sandiford  <richard.sandiford@arm.com>
 
 	* config/aarch64/aarch64.c (aarch64_layout_frame): Use is_constant
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index e034ffc969b..a9b1bce1356 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -82,8 +82,8 @@ INT_MODE (XI, 64);
    strictly necessary to set the alignment here, since the default would
    be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer.  */
 #define SVE_MODES(NVECS, VB, VH, VS, VD) \
-  VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS); \
-  VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS); \
+  VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, 0); \
+  VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, 0); \
   \
   ADJUST_NUNITS (VB##QI, aarch64_sve_vg * NVECS * 8); \
   ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \
@@ -108,6 +108,40 @@ SVE_MODES (2, VNx32, VNx16, VNx8, VNx4)
 SVE_MODES (3, VNx48, VNx24, VNx12, VNx6)
 SVE_MODES (4, VNx64, VNx32, VNx16, VNx8)
 
+/* Partial SVE vectors:
+
+      VNx2QI VNx4QI VNx8QI
+      VNx2HI VNx4HI
+      VNx2SI
+
+   In memory they occupy contiguous locations, in the same way as fixed-length
+   vectors.  E.g. VNx8QImode is half the size of VNx16QImode.
+
+   Passing 1 as the final argument ensures that the modes come after all
+   other modes in the GET_MODE_WIDER chain, so that we never pick them
+   in preference to a full vector mode.  */
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 8, 1);
+
+ADJUST_NUNITS (VNx2QI, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2HI, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2SI, aarch64_sve_vg);
+
+ADJUST_NUNITS (VNx4QI, aarch64_sve_vg * 2);
+ADJUST_NUNITS (VNx4HI, aarch64_sve_vg * 2);
+
+ADJUST_NUNITS (VNx8QI, aarch64_sve_vg * 4);
+
+ADJUST_ALIGNMENT (VNx2QI, 1);
+ADJUST_ALIGNMENT (VNx4QI, 1);
+ADJUST_ALIGNMENT (VNx8QI, 1);
+
+ADJUST_ALIGNMENT (VNx2HI, 2);
+ADJUST_ALIGNMENT (VNx4HI, 2);
+
+ADJUST_ALIGNMENT (VNx2SI, 4);
+
 /* Quad float: 128-bit floating mode for long doubles.  */
 FLOAT_MODE (TF, 16, ieee_quad_format);
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0537e7fe359..1f0e74ac1a4 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1538,6 +1538,9 @@ const unsigned int VEC_SVE_PRED = 4;
 /* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
    a structure of 2, 3 or 4 vectors.  */
 const unsigned int VEC_STRUCT   = 8;
+/* Can be used in combination with VEC_SVE_DATA to indicate that the
+   vector has fewer significant bytes than a full SVE vector.  */
+const unsigned int VEC_PARTIAL  = 16;
 /* Useful combinations of the above.  */
 const unsigned int VEC_ANY_SVE  = VEC_SVE_DATA | VEC_SVE_PRED;
 const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
@@ -1558,7 +1561,17 @@ aarch64_classify_vector_mode (machine_mode mode)
      of -msve-vector-bits.  */
   switch (mode)
     {
-    /* Single SVE vectors.  */
+    /* Partial SVE QI vectors.  */
+    case E_VNx2QImode:
+    case E_VNx4QImode:
+    case E_VNx8QImode:
+    /* Partial SVE HI vectors.  */
+    case E_VNx2HImode:
+    case E_VNx4HImode:
+    /* Partial SVE SI vector.  */
+    case E_VNx2SImode:
+      return TARGET_SVE ? VEC_SVE_DATA | VEC_PARTIAL : 0;
+
     case E_VNx16QImode:
     case E_VNx8HImode:
     case E_VNx4SImode:
@@ -1641,6 +1654,24 @@ aarch64_sve_data_mode_p (machine_mode mode)
   return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
 }
 
+/* Return the number of defined bytes in one constituent vector of
+   SVE mode MODE, which has vector flags VEC_FLAGS.  */
+static poly_int64
+aarch64_vl_bytes (machine_mode mode, unsigned int vec_flags)
+{
+  if (vec_flags & VEC_PARTIAL)
+    /* A single partial vector.  */
+    return GET_MODE_SIZE (mode);
+
+  if (vec_flags & VEC_SVE_DATA)
+    /* A single vector or a tuple.  */
+    return BYTES_PER_SVE_VECTOR;
+
+  /* A single predicate.  */
+  gcc_assert (vec_flags & VEC_SVE_PRED);
+  return BYTES_PER_SVE_PRED;
+}
+
 /* Implement target hook TARGET_ARRAY_MODE.  */
 static opt_machine_mode
 aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
@@ -1769,10 +1800,13 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
     case FP_REGS:
     case FP_LO_REGS:
     case FP_LO8_REGS:
-      if (aarch64_sve_data_mode_p (mode))
-	return exact_div (GET_MODE_SIZE (mode),
-			  BYTES_PER_SVE_VECTOR).to_constant ();
-      return CEIL (lowest_size, UNITS_PER_VREG);
+      {
+	unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+	if (vec_flags & VEC_SVE_DATA)
+	  return exact_div (GET_MODE_SIZE (mode),
+			    aarch64_vl_bytes (mode, vec_flags)).to_constant ();
+	return CEIL (lowest_size, UNITS_PER_VREG);
+      }
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
@@ -1796,6 +1830,11 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
     return mode == DImode;
 
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  /* At the moment, partial vector modes are only useful for memory
+     references, but that could change in future.  */
+  if (vec_flags & VEC_PARTIAL)
+    return false;
+
   if (vec_flags & VEC_SVE_PRED)
     return PR_REGNUM_P (regno);
 
@@ -7441,9 +7480,15 @@ aarch64_classify_address (struct aarch64_address_info *info,
 
   HOST_WIDE_INT const_size;
 
+  /* Whether a vector mode is partial doesn't affect address legitimacy.
+     Partial vectors like VNx8QImode allow the same indexed addressing
+     mode and MUL VL addressing mode as full vectors like VNx16QImode;
+     in both cases, MUL VL counts multiples of GET_MODE_SIZE.  */
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  vec_flags &= ~VEC_PARTIAL;
+
   /* On BE, we use load/store pair for all large int mode load/stores.
      TI/TFmode may also use a load/store pair.  */
-  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
   bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
   bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
 			    || type == ADDR_QUERY_LDP_STP_N
@@ -8948,7 +8993,7 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
 				aarch64_addr_query_type type)
 {
   struct aarch64_address_info addr;
-  unsigned int size;
+  unsigned int size, vec_flags;
 
   /* Check all addresses are Pmode - including ILP32.  */
   if (GET_MODE (x) != Pmode
@@ -8964,26 +9009,24 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
       {
       case ADDRESS_REG_IMM:
 	if (known_eq (addr.const_offset, 0))
-	  asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
-	else if (aarch64_sve_data_mode_p (mode))
 	  {
-	    HOST_WIDE_INT vnum
-	      = exact_div (addr.const_offset,
-			   BYTES_PER_SVE_VECTOR).to_constant ();
-	    asm_fprintf (f, "[%s, #%wd, mul vl]",
-			 reg_names[REGNO (addr.base)], vnum);
+	    asm_fprintf (f, "[%s]", reg_names[REGNO (addr.base)]);
+	    return true;
 	  }
-	else if (aarch64_sve_pred_mode_p (mode))
+
+	vec_flags = aarch64_classify_vector_mode (mode);
+	if (vec_flags & VEC_ANY_SVE)
 	  {
 	    HOST_WIDE_INT vnum
 	      = exact_div (addr.const_offset,
-			   BYTES_PER_SVE_PRED).to_constant ();
+			   aarch64_vl_bytes (mode, vec_flags)).to_constant ();
 	    asm_fprintf (f, "[%s, #%wd, mul vl]",
 			 reg_names[REGNO (addr.base)], vnum);
+	    return true;
 	  }
-	else
-	  asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
-		       INTVAL (addr.offset));
+
+	asm_fprintf (f, "[%s, %wd]", reg_names[REGNO (addr.base)],
+		     INTVAL (addr.offset));
 	return true;
 
       case ADDRESS_REG_REG:
@@ -9395,7 +9438,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
      can hold MODE, but at the moment we need to handle all modes.
      Just ignore any runtime parts for registers that can't store them.  */
   HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
-  unsigned int nregs;
+  unsigned int nregs, vec_flags;
   switch (regclass)
     {
     case TAILCALL_ADDR_REGS:
@@ -9406,11 +9449,12 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
     case FP_REGS:
     case FP_LO_REGS:
     case FP_LO8_REGS:
-      if (aarch64_sve_data_mode_p (mode)
+      vec_flags = aarch64_classify_vector_mode (mode);
+      if ((vec_flags & VEC_SVE_DATA)
 	  && constant_multiple_p (GET_MODE_SIZE (mode),
-				  BYTES_PER_SVE_VECTOR, &nregs))
+				  aarch64_vl_bytes (mode, vec_flags), &nregs))
 	return nregs;
-      return (aarch64_vector_data_mode_p (mode)
+      return (vec_flags & VEC_ADVSIMD
 	      ? CEIL (lowest_size, UNITS_PER_VREG)
 	      : CEIL (lowest_size, UNITS_PER_WORD));
     case STACK_REG:
@@ -15057,7 +15101,7 @@ static bool
 aarch64_vector_mode_supported_p (machine_mode mode)
 {
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
-  return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
+  return vec_flags != 0 && (vec_flags & (VEC_STRUCT | VEC_PARTIAL)) == 0;
 }
 
 /* Return the full-width SVE vector mode for element mode MODE, if one
diff --git a/gcc/genmodes.c b/gcc/genmodes.c
index f33eefa2494..95522d6b539 100644
--- a/gcc/genmodes.c
+++ b/gcc/genmodes.c
@@ -53,6 +53,7 @@ struct mode_data
 
   const char *name;		/* printable mode name -- SI, not SImode */
   enum mode_class cl;		/* this mode class */
+  unsigned int order;		/* top-level sorting order */
   unsigned int precision;	/* size in bits, equiv to TYPE_PRECISION */
   unsigned int bytesize;	/* storage size in addressable units */
   unsigned int ncomponents;	/* number of subunits */
@@ -85,7 +86,7 @@ static struct mode_data *void_mode;
 
 static const struct mode_data blank_mode = {
   0, "<unknown>", MAX_MODE_CLASS,
-  -1U, -1U, -1U, -1U,
+  0, -1U, -1U, -1U, -1U,
   0, 0, 0, 0, 0, 0,
   "<unknown>", 0, 0, 0, 0, false, false, 0
 };
@@ -484,14 +485,15 @@ make_complex_modes (enum mode_class cl,
     }
 }
 
-/* For all modes in class CL, construct vector modes of width
-   WIDTH, having as many components as necessary.  */
-#define VECTOR_MODES_WITH_PREFIX(PREFIX, C, W) \
-  make_vector_modes (MODE_##C, #PREFIX, W, __FILE__, __LINE__)
-#define VECTOR_MODES(C, W) VECTOR_MODES_WITH_PREFIX (V, C, W)
+/* For all modes in class CL, construct vector modes of width WIDTH,
+   having as many components as necessary.  ORDER is the sorting order
+   of the mode, with smaller numbers indicating a higher priority.  */
+#define VECTOR_MODES_WITH_PREFIX(PREFIX, C, W, ORDER) \
+  make_vector_modes (MODE_##C, #PREFIX, W, ORDER, __FILE__, __LINE__)
+#define VECTOR_MODES(C, W) VECTOR_MODES_WITH_PREFIX (V, C, W, 0)
 static void ATTRIBUTE_UNUSED
 make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width,
-		   const char *file, unsigned int line)
+		   unsigned int order, const char *file, unsigned int line)
 {
   struct mode_data *m;
   struct mode_data *v;
@@ -530,6 +532,7 @@ make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width,
 	}
 
       v = new_mode (vclass, xstrdup (buf), file, line);
+      v->order = order;
       v->component = m;
       v->ncomponents = ncomponents;
     }
@@ -832,6 +835,11 @@ cmp_modes (const void *a, const void *b)
   const struct mode_data *const m = *(const struct mode_data *const*)a;
   const struct mode_data *const n = *(const struct mode_data *const*)b;
 
+  if (m->order > n->order)
+    return 1;
+  else if (m->order < n->order)
+    return -1;
+
   if (m->bytesize > n->bytesize)
     return 1;
   else if (m->bytesize < n->bytesize)
-- 
2.30.2