[AArch64] Make more use of SVE conditional constant moves
authorRichard Sandiford <richard.sandiford@arm.com>
Wed, 14 Aug 2019 09:18:14 +0000 (09:18 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Wed, 14 Aug 2019 09:18:14 +0000 (09:18 +0000)
This patch extends the SVE UNSPEC_SEL patterns so that they can use:

(1) MOV /M of a duplicated integer constant
(2) MOV /M of a duplicated floating-point constant bitcast to an integer,
    accepting the same constants as (1)
(3) FMOV /M of a duplicated floating-point constant
(4) MOV /Z of a duplicated integer constant
(5) MOV /Z of a duplicated floating-point constant bitcast to an integer,
    accepting the same constants as (4)
(6) MOVPRFXed FMOV /M of a duplicated floating-point constant

We already handled (4) with a special pattern; the rest are new.

2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
    Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

gcc/
* config/aarch64/aarch64.c (aarch64_bit_representation): New function.
(aarch64_print_vector_float_operand): Also handle 8-bit floats.
(aarch64_print_operand): Add support for %I.
(aarch64_sve_dup_immediate_p): Handle scalars as well as vectors.
Bitcast floating-point constants to the corresponding integer constant.
(aarch64_float_const_representable_p): Handle vectors as well
as scalars.
(aarch64_expand_sve_vcond): Make sure that the operands are valid
for the new vcond_mask_<mode><vpred> expander.
* config/aarch64/predicates.md (aarch64_sve_dup_immediate): Also
test aarch64_float_const_representable_p.
(aarch64_sve_reg_or_dup_imm): New predicate.
* config/aarch64/aarch64-sve.md (vec_extract<vpred><Vel>): Use
gen_vcond_mask_<mode><vpred> instead of
gen_aarch64_sve_dup<mode>_const.
(vcond_mask_<mode><vpred>): Turn into a define_expand that
accepts aarch64_sve_reg_or_dup_imm and aarch64_simd_reg_or_zero
for operands 1 and 2 respectively.  Force operand 2 into a
register if operand 1 is a register.  Fold old define_insn...
(aarch64_sve_dup<mode>_const): ...and this define_insn...
(*vcond_mask_<mode><vpred>): ...into this new pattern.  Handle
floating-point constants that can be moved as integers.  Add
alternatives for MOV /M and FMOV /M.
(vcond<mode><v_int_equiv>, vcondu<mode><v_int_equiv>)
(vcond<mode><v_fp_equiv>): Accept nonmemory_operand for operands
1 and 2 respectively.
* config/aarch64/constraints.md (Ufc): Handle vectors as well
as scalars.
(vss): New constraint.

gcc/testsuite/
* gcc.target/aarch64/sve/vcond_18.c: New test.
* gcc.target/aarch64/sve/vcond_18_run.c: Likewise.
* gcc.target/aarch64/sve/vcond_19.c: Likewise.
* gcc.target/aarch64/sve/vcond_19_run.c: Likewise.
* gcc.target/aarch64/sve/vcond_20.c: Likewise.
* gcc.target/aarch64/sve/vcond_20_run.c: Likewise.

Co-Authored-By: Kugan Vivekanandarajah <kuganv@linaro.org>
From-SVN: r274441

12 files changed:
gcc/ChangeLog
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64.c
gcc/config/aarch64/constraints.md
gcc/config/aarch64/predicates.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/vcond_18.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_18_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_19.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_19_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_20.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_20_run.c [new file with mode: 0644]

index b597ad59d4c071827f971c54299a4b47caa38e1a..777f1c87874c3932e6e6bd8cb54ace7aa8b510df 100644 (file)
@@ -1,3 +1,36 @@
+2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
+           Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
+
+       * config/aarch64/aarch64.c (aarch64_bit_representation): New function.
+       (aarch64_print_vector_float_operand): Also handle 8-bit floats.
+       (aarch64_print_operand): Add support for %I.
+       (aarch64_sve_dup_immediate_p): Handle scalars as well as vectors.
+       Bitcast floating-point constants to the corresponding integer constant.
+       (aarch64_float_const_representable_p): Handle vectors as well
+       as scalars.
+       (aarch64_expand_sve_vcond): Make sure that the operands are valid
+       for the new vcond_mask_<mode><vpred> expander.
+       * config/aarch64/predicates.md (aarch64_sve_dup_immediate): Also
+       test aarch64_float_const_representable_p.
+       (aarch64_sve_reg_or_dup_imm): New predicate.
+       * config/aarch64/aarch64-sve.md (vec_extract<vpred><Vel>): Use
+       gen_vcond_mask_<mode><vpred> instead of
+       gen_aarch64_sve_dup<mode>_const.
+       (vcond_mask_<mode><vpred>): Turn into a define_expand that
+       accepts aarch64_sve_reg_or_dup_imm and aarch64_simd_reg_or_zero
+       for operands 1 and 2 respectively.  Force operand 2 into a
+       register if operand 1 is a register.  Fold old define_insn...
+       (aarch64_sve_dup<mode>_const): ...and this define_insn...
+       (*vcond_mask_<mode><vpred>): ...into this new pattern.  Handle
+       floating-point constants that can be moved as integers.  Add
+       alternatives for MOV /M and FMOV /M.
+       (vcond<mode><v_int_equiv>, vcondu<mode><v_int_equiv>)
+       (vcond<mode><v_fp_equiv>): Accept nonmemory_operand for operands
+       1 and 2 respectively.
+       * config/aarch64/constraints.md (Ufc): Handle vectors as well
+       as scalars.
+       (vss): New constraint.
+
 2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
 
        * config/aarch64/predicates.md (aarch64_sve_float_maxmin_immediate)
index 851b459cc483904541e50117f64dc9e78c2d48c1..1bdfc4706d14052c6345ba1af577c8102b2dde7a 100644 (file)
   "TARGET_SVE"
   {
     rtx tmp = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
-                                               CONST1_RTX (<MODE>mode),
-                                               CONST0_RTX (<MODE>mode)));
+    emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
+                                            CONST1_RTX (<MODE>mode),
+                                            CONST0_RTX (<MODE>mode)));
     emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
     DONE;
   }
 ;; ---- [INT,FP] Select based on predicates
 ;; -------------------------------------------------------------------------
 ;; Includes merging patterns for:
+;; - FMOV
 ;; - MOV
 ;; - SEL
 ;; -------------------------------------------------------------------------
 ;; vcond_mask operand order: true, false, mask
 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
 ;; SEL operand order:        mask, true, false
-(define_insn "vcond_mask_<mode><vpred>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+(define_expand "vcond_mask_<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
        (unspec:SVE_ALL
-         [(match_operand:<VPRED> 3 "register_operand" "Upa")
-          (match_operand:SVE_ALL 1 "register_operand" "w")
-          (match_operand:SVE_ALL 2 "register_operand" "w")]
+         [(match_operand:<VPRED> 3 "register_operand")
+          (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
+          (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE"
-  "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
+  {
+    if (register_operand (operands[1], <MODE>mode))
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+  }
 )
 
-;; Selects between a duplicated immediate and zero.
-(define_insn "aarch64_sve_dup<mode>_const"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
-       (unspec:SVE_I
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
-          (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
+;; Selects between:
+;; - two registers
+;; - a duplicated immediate and a register
+;; - a duplicated immediate and zero
+(define_insn "*vcond_mask_<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
+       (unspec:SVE_ALL
+         [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
+          (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
+          (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
          UNSPEC_SEL))]
-  "TARGET_SVE"
-  "mov\t%0.<Vetype>, %1/z, #%2"
+  "TARGET_SVE
+   && (!register_operand (operands[1], <MODE>mode)
+       || register_operand (operands[2], <MODE>mode))"
+  "@
+   sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
+   mov\t%0.<Vetype>, %3/m, #%I1
+   mov\t%0.<Vetype>, %3/z, #%I1
+   fmov\t%0.<Vetype>, %3/m, #%1
+   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
+   movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
+   movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
+  [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
 )
 
 ;; -------------------------------------------------------------------------
          (match_operator 3 "comparison_operator"
            [(match_operand:<V_INT_EQUIV> 4 "register_operand")
             (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
-         (match_operand:SVE_ALL 1 "register_operand")
-         (match_operand:SVE_ALL 2 "register_operand")))]
+         (match_operand:SVE_ALL 1 "nonmemory_operand")
+         (match_operand:SVE_ALL 2 "nonmemory_operand")))]
   "TARGET_SVE"
   {
     aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
          (match_operator 3 "comparison_operator"
            [(match_operand:<V_INT_EQUIV> 4 "register_operand")
             (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
-         (match_operand:SVE_ALL 1 "register_operand")
-         (match_operand:SVE_ALL 2 "register_operand")))]
+         (match_operand:SVE_ALL 1 "nonmemory_operand")
+         (match_operand:SVE_ALL 2 "nonmemory_operand")))]
   "TARGET_SVE"
   {
     aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
          (match_operator 3 "comparison_operator"
            [(match_operand:<V_FP_EQUIV> 4 "register_operand")
             (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
-         (match_operand:SVE_HSD 1 "register_operand")
-         (match_operand:SVE_HSD 2 "register_operand")))]
+         (match_operand:SVE_HSD 1 "nonmemory_operand")
+         (match_operand:SVE_HSD 2 "nonmemory_operand")))]
   "TARGET_SVE"
   {
     aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
index 27a9a5fd28dc0a87fa44699da4a5aeed878d9a0f..aaf9e80aea5e8332b5bdccace63174a5912bf36a 100644 (file)
@@ -1482,6 +1482,16 @@ aarch64_dbx_register_number (unsigned regno)
    return DWARF_FRAME_REGISTERS;
 }
 
+/* If X is a CONST_DOUBLE, return its bit representation as a constant
+   integer, otherwise return X unmodified.  */
+static rtx
+aarch64_bit_representation (rtx x)
+{
+  if (CONST_DOUBLE_P (x))
+    x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
+  return x;
+}
+
 /* Return true if MODE is any of the Advanced SIMD structure modes.  */
 static bool
 aarch64_advsimd_struct_mode_p (machine_mode mode)
@@ -8275,7 +8285,8 @@ aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
   if (negate)
     r = real_value_negate (&r);
 
-  /* We only handle the SVE single-bit immediates here.  */
+  /* Handle the SVE single-bit immediates specially, since they have a
+     fixed form in the assembly syntax.  */
   if (real_equal (&r, &dconst0))
     asm_fprintf (f, "0.0");
   else if (real_equal (&r, &dconst1))
@@ -8283,7 +8294,13 @@ aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
   else if (real_equal (&r, &dconsthalf))
     asm_fprintf (f, "0.5");
   else
-    return false;
+    {
+      const int buf_size = 20;
+      char float_buf[buf_size] = {'\0'};
+      real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size,
+                               1, GET_MODE (elt));
+      asm_fprintf (f, "%s", float_buf);
+    }
 
   return true;
 }
@@ -8312,6 +8329,11 @@ sizetochar (int size)
                        and print it as an unsigned integer, in decimal.
      'e':              Print the sign/zero-extend size as a character 8->b,
                        16->h, 32->w.
+     'I':              If the operand is a duplicated vector constant,
+                       replace it with the duplicated scalar.  If the
+                       operand is then a floating-point constant, replace
+                       it with the integer bit representation.  Print the
+                       transformed constant as a signed decimal number.
      'p':              Prints N such that 2^N == X (X must be power of 2 and
                        const int).
      'P':              Print the number of non-zero bits in X (a const_int).
@@ -8444,6 +8466,19 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
       break;
 
+    case 'I':
+      {
+       x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
+       if (CONST_INT_P (x))
+         asm_fprintf (f, "%wd", INTVAL (x));
+       else
+         {
+           output_operand_lossage ("invalid operand for '%%%c'", code);
+           return;
+         }
+       break;
+      }
+
     case 'M':
     case 'm':
       {
@@ -15116,13 +15151,11 @@ aarch64_sve_bitmask_immediate_p (rtx x)
 bool
 aarch64_sve_dup_immediate_p (rtx x)
 {
-  rtx elt;
-
-  if (!const_vec_duplicate_p (x, &elt)
-      || !CONST_INT_P (elt))
+  x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
+  if (!CONST_INT_P (x))
     return false;
 
-  HOST_WIDE_INT val = INTVAL (elt);
+  HOST_WIDE_INT val = INTVAL (x);
   if (val & 0xff)
     return IN_RANGE (val, -0x80, 0x7f);
   return IN_RANGE (val, -0x8000, 0x7f00);
@@ -16965,6 +16998,7 @@ aarch64_float_const_representable_p (rtx x)
   REAL_VALUE_TYPE r, m;
   bool fail;
 
+  x = unwrap_const_vec_duplicate (x);
   if (!CONST_DOUBLE_P (x))
     return false;
 
@@ -18086,6 +18120,13 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
   else
     aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
 
+  if (!aarch64_sve_reg_or_dup_imm (ops[1], data_mode))
+    ops[1] = force_reg (data_mode, ops[1]);
+  /* The "false" value can only be zero if the "true" value is a constant.  */
+  if (register_operand (ops[1], data_mode)
+      || !aarch64_simd_reg_or_zero (ops[2], data_mode))
+    ops[2] = force_reg (data_mode, ops[2]);
+
   rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
   emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
 }
index 61547e5ee08f0c196b10ce6bb8448209edf45145..28734b46009a09d60a0c34b58e1ac73471cc3f97 100644 (file)
 (define_constraint "Ufc"
   "A floating point constant which can be used with an\
    FMOV immediate operation."
-  (and (match_code "const_double")
+  (and (match_code "const_double,const_vector")
        (match_test "aarch64_float_const_representable_p (op)")))
 
 (define_constraint "Uvi"
    CMP instructions."
  (match_operand 0 "aarch64_sve_cmp_vsc_immediate"))
 
+(define_constraint "vss"
+  "@internal
+   A constraint that matches a signed immediate operand valid for SVE
+   DUP instructions."
+ (match_test "aarch64_sve_dup_immediate_p (op)"))
+
 (define_constraint "vsd"
   "@internal
    A constraint that matches an unsigned immediate operand valid for SVE
index b456cfffeb22801ce81bf951df76a56281ee0040..b7230ca32c61b7e875ba9f9ddc88ae108c19ad06 100644 (file)
 
 (define_predicate "aarch64_sve_dup_immediate"
   (and (match_code "const,const_vector")
-       (match_test "aarch64_sve_dup_immediate_p (op)")))
+       (ior (match_test "aarch64_sve_dup_immediate_p (op)")
+           (match_test "aarch64_float_const_representable_p (op)"))))
 
 (define_predicate "aarch64_sve_cmp_vsc_immediate"
   (and (match_code "const,const_vector")
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_vsm_immediate")))
 
+(define_predicate "aarch64_sve_reg_or_dup_imm"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_sve_dup_immediate")))
+
 (define_predicate "aarch64_sve_cmp_vsc_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_cmp_vsc_immediate")))
index 0edfe1a20fecdeecc869d41f3b73873966352d8f..a2507f729c00de8a074a406a91c8a76f2ac3af46 100644 (file)
@@ -1,3 +1,13 @@
+2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
+           Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
+
+       * gcc.target/aarch64/sve/vcond_18.c: New test.
+       * gcc.target/aarch64/sve/vcond_18_run.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_19.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_19_run.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_20.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_20_run.c: Likewise.
+
 2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
 
        * gcc.target/aarch64/sve/fmaxnm_1.c: New test.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_18.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_18.c
new file mode 100644 (file)
index 0000000..a2590b9
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define DEF_LOOP(TYPE, NAME, CONST)                    \
+  void                                                 \
+  test_##TYPE##_##NAME (TYPE *restrict x,              \
+                       TYPE *restrict pred, int n)     \
+  {                                                    \
+    for (int i = 0; i < n; ++i)                                \
+      x[i] = pred[i] > 0 ? CONST : 0;                  \
+  }
+
+#define TEST_TYPE(T, TYPE)                     \
+  T (TYPE, 2, 2.0)                             \
+  T (TYPE, 1p25, 1.25)                         \
+  T (TYPE, 32p25, 32.25)                       \
+  T (TYPE, m4, -4.0)                           \
+  T (TYPE, m2p5, -2.5)                         \
+  T (TYPE, m64p5, -64.5)
+
+#define TEST_ALL(T)                            \
+  TEST_TYPE (T, _Float16)                      \
+  TEST_TYPE (T, float)                         \
+  TEST_TYPE (T, double)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #16384\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #15616\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #-15360\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #-16128\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_18_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_18_run.c
new file mode 100644 (file)
index 0000000..279b0a3
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "vcond_18.c"
+
+#define N 97
+
+#define TEST_LOOP(TYPE, NAME, CONST)                           \
+  {                                                            \
+    TYPE x[N], pred[N];                                                \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       pred[i] = i % 5 <= i % 6;                               \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    test_##TYPE##_##NAME (x, pred, N);                         \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       if (x[i] != (TYPE) (pred[i] > 0 ? CONST : 0))           \
+         __builtin_abort ();                                   \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_19.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_19.c
new file mode 100644 (file)
index 0000000..2347b7f
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define DEF_LOOP(TYPE, NAME, CONST)                    \
+  void                                                 \
+  test_##TYPE##_##NAME (TYPE *restrict x,              \
+                       TYPE *restrict pred, int n)     \
+  {                                                    \
+    for (int i = 0; i < n; ++i)                                \
+      x[i] = pred[i] > 0 ? CONST : pred[i];            \
+  }
+
+#define TEST_TYPE(T, TYPE)                     \
+  T (TYPE, 2, 2.0)                             \
+  T (TYPE, 1p25, 1.25)                         \
+  T (TYPE, 32p25, 32.25)                       \
+  T (TYPE, m4, -4.0)                           \
+  T (TYPE, m2p5, -2.5)                         \
+  T (TYPE, m64p5, -64.5)
+
+#define TEST_ALL(T)                            \
+  TEST_TYPE (T, _Float16)                      \
+  TEST_TYPE (T, float)                         \
+  TEST_TYPE (T, double)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #16384\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #15616\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-15360\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-16128\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_19_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_19_run.c
new file mode 100644 (file)
index 0000000..d93d8aa
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "vcond_19.c"
+
+#define N 97
+
+#define TEST_LOOP(TYPE, NAME, CONST)                           \
+  {                                                            \
+    TYPE x[N], pred[N];                                                \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       pred[i] = i % 5 <= i % 6 ? i : 0;                       \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    test_##TYPE##_##NAME (x, pred, N);                         \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       if (x[i] != (TYPE) (pred[i] > 0 ? CONST : pred[i]))     \
+         __builtin_abort ();                                   \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_20.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_20.c
new file mode 100644 (file)
index 0000000..bf2af1c
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define DEF_LOOP(TYPE, NAME, CONST)                    \
+  void                                                 \
+  test_##TYPE##_##NAME (TYPE *restrict x,              \
+                       TYPE *restrict pred, int n)     \
+  {                                                    \
+    for (int i = 0; i < n; ++i)                                \
+      x[i] = pred[i] > 0 ? CONST : 12.0;               \
+  }
+
+#define TEST_TYPE(T, TYPE)                     \
+  T (TYPE, 2, 2.0)                             \
+  T (TYPE, 1p25, 1.25)                         \
+  T (TYPE, 32p25, 32.25)                       \
+  T (TYPE, m4, -4.0)                           \
+  T (TYPE, m2p5, -2.5)                         \
+  T (TYPE, m64p5, -64.5)
+
+#define TEST_ALL(T)                            \
+  TEST_TYPE (T, _Float16)                      \
+  TEST_TYPE (T, float)                         \
+  TEST_TYPE (T, double)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #16384\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #15616\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-15360\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-16128\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_20_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_20_run.c
new file mode 100644 (file)
index 0000000..33c81de
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "vcond_20.c"
+
+#define N 97
+
+#define TEST_LOOP(TYPE, NAME, CONST)                           \
+  {                                                            \
+    TYPE x[N], pred[N];                                                \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       pred[i] = i % 5 <= i % 6;                               \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    test_##TYPE##_##NAME (x, pred, N);                         \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       if (x[i] != (TYPE) (pred[i] > 0 ? CONST : 12.0))        \
+         __builtin_abort ();                                   \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}