[AArch64] Add truncation for partial SVE modes
authorRichard Sandiford <richard.sandiford@arm.com>
Sat, 16 Nov 2019 11:14:51 +0000 (11:14 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Sat, 16 Nov 2019 11:14:51 +0000 (11:14 +0000)
This patch adds support for "truncating" to a partial SVE vector from
either a full SVE vector or a wider partial vector.  This truncation is
actually a no-op and so should have zero cost in the vector cost model.

2019-11-16  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* config/aarch64/aarch64-sve.md
(trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2): New pattern.
* config/aarch64/aarch64.c (aarch64_integer_truncation_p): New
function.
(aarch64_sve_adjust_stmt_cost): Call it.

gcc/testsuite/
* gcc.target/aarch64/sve/mask_struct_load_1.c: Add
--param aarch64-sve-compare-costs=0.
* gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise.
* gcc.target/aarch64/sve/pack_1.c: Likewise.
* gcc.target/aarch64/sve/truncate_1.c: New test.

From-SVN: r278344

gcc/ChangeLog
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_1.c
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_2.c
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_3.c
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_4.c
gcc/testsuite/gcc.target/aarch64/sve/mask_struct_load_5.c
gcc/testsuite/gcc.target/aarch64/sve/pack_1.c
gcc/testsuite/gcc.target/aarch64/sve/truncate_1.c [new file with mode: 0644]

index 83931c5643082689d17241d490d1eebd138a564d..7fe9a114cfe7a8576fb1e585fe73882bdcf0a768 100644 (file)
@@ -1,3 +1,11 @@
+2019-11-16  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/aarch64-sve.md
+       (trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2): New pattern.
+       * config/aarch64/aarch64.c (aarch64_integer_truncation_p): New
+       function.
+       (aarch64_sve_adjust_stmt_cost): Call it.
+
 2019-11-16  Richard Sandiford  <richard.sandiford@arm.com>
 
        * config/aarch64/aarch64-sve.md
index ce1bd58c0b99c15fec9ad1fb1b9d9450a9f36e4c..158a1786a97f8664d9fd13d6eb27e26dbde4b01d 100644 (file)
@@ -72,6 +72,7 @@
 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
 ;; ---- [INT] General unary arithmetic corresponding to unspecs
 ;; ---- [INT] Sign and zero extension
+;; ---- [INT] Truncation
 ;; ---- [INT] Logical inverse
 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
 ;; ---- [FP] General unary arithmetic corresponding to unspecs
   [(set_attr "movprfx" "*,yes,yes")]
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [INT] Truncation
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Truncate to a partial SVE vector from either a full vector or a
+;; wider partial vector.  This is a no-op, because we can just ignore
+;; the unused upper bits of the source.
+(define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
+  [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
+       (truncate:SVE_PARTIAL_I
+         (match_operand:SVE_HSDI 1 "register_operand" "w")))]
+  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  {
+    operands[1] = aarch64_replace_reg_mode (operands[1],
+                                           <SVE_PARTIAL_I:MODE>mode);
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Logical inverse
 ;; -------------------------------------------------------------------------
index 305c6da2316ac1d6510b7b3bb753abbb37ec8ade..f710aa2e795c1c50d640c8f0bcbab7845a7e8529 100644 (file)
@@ -12901,6 +12901,21 @@ aarch64_extending_load_p (stmt_vec_info stmt_info)
          && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
 }
 
+/* Return true if STMT_INFO is an integer truncation.  */
+static bool
+aarch64_integer_truncation_p (stmt_vec_info stmt_info)
+{
+  gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
+  if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
+    return false;
+
+  tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
+  tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
+  return (INTEGRAL_TYPE_P (lhs_type)
+         && INTEGRAL_TYPE_P (rhs_type)
+         && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
+}
+
 /* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
    for STMT_INFO, which has cost kind KIND.  Adjust the cost as necessary
    for SVE targets.  */
@@ -12919,6 +12934,11 @@ aarch64_sve_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
   if (kind == vector_stmt && aarch64_extending_load_p (stmt_info))
     stmt_cost = 0;
 
+  /* For similar reasons, vector_stmt integer truncations are a no-op,
+     because we can just ignore the unused upper bits of the source.  */
+  if (kind == vector_stmt && aarch64_integer_truncation_p (stmt_info))
+    stmt_cost = 0;
+
   return stmt_cost;
 }
 
index e13aa9f65736d4bd5cfcd3920d9df7165ae195f6..28f99e034f190f907af3b47c2b7a7a82ea4595e2 100644 (file)
@@ -1,3 +1,14 @@
+2019-11-16  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/sve/mask_struct_load_1.c: Add
+       --param aarch64-sve-compare-costs=0.
+       * gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise.
+       * gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise.
+       * gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise.
+       * gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise.
+       * gcc.target/aarch64/sve/pack_1.c: Likewise.
+       * gcc.target/aarch64/sve/truncate_1.c: New test.
+
 2019-11-16  Richard Sandiford  <richard.sandiford@arm.com>
 
        * gcc.target/aarch64/sve/load_extend_1.c: New test.
index d4503322d77fce28777c226bc8caf18b764bb52b..03b2b93df07b8ccb90ac394b3ae1ba89777cc916 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
index 812b96723cb64e6b3572f23dac3cc9d32fe1e1ce..87ac3178be02b9dd29c7aab2066e9824a29ac204 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
index 29702ab55f249c3ebd0baf44981870524098e1e4..54806f93ad9b86bdd39e5ea23ce3807dbd600623 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
index 436da08576005d2134e2cc94e4aca682981248c4..4c73004f68dfa980998fded7bc3a4e038affca1c 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
index 08e58247f00f6b77a4c1e75c7ab2b25776f4945c..da367e4fd79dc2b1ffde08d6ae0a8b63bf334f39 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
index d9de9963d09d45ef3d9af3a0fefbb928921557be..7c7bfdd030c0686d0d1d854c09548c2b37154de5 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/truncate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/truncate_1.c
new file mode 100644 (file)
index 0000000..aeb0645
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE1, TYPE2, SHIFT)                                 \
+  void                                                                 \
+  f_##TYPE1##_##TYPE2 (TYPE2 *restrict dst, TYPE1 *restrict src1,      \
+                      TYPE1 *restrict src2, int n)                     \
+  {                                                                    \
+    for (int i = 0; i < n; ++i)                                                \
+      dst[i] = (TYPE1) (src1[i] + src2[i]) >> SHIFT;                   \
+  }
+
+#define TEST_ALL(T) \
+  T (uint16_t, uint8_t, 2) \
+  T (uint32_t, uint8_t, 18) \
+  T (uint64_t, uint8_t, 34) \
+  T (uint32_t, uint16_t, 3) \
+  T (uint64_t, uint16_t, 19) \
+  T (uint64_t, uint32_t, 4)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #18\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #34\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #19\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #4\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.d,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 1 } } */