Fold VEC_COND_EXPRs to IFN_COND_* where possible

author Richard Sandiford <richard.sandiford@linaro.org>

Fri, 25 May 2018 08:09:39 +0000 (08:09 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Fri, 25 May 2018 08:09:39 +0000 (08:09 +0000)
author Richard Sandiford <richard.sandiford@linaro.org>
Fri, 25 May 2018 08:09:39 +0000 (08:09 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Fri, 25 May 2018 08:09:39 +0000 (08:09 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 840bdf7c89e88d640df3633fa7e7c0d2dace42fa..fd187b92d391090064c1d03dd4693455bc28ea53 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,36 @@
+2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * doc/sourcebuild.texi (vect_double_cond_arith: Document.
+       * gimple-match.h (gimple_match_op::MAX_NUM_OPS): Bump to 4.
+       (gimple_match_op::gimple_match_op): Add an overload for 4 operands.
+       (gimple_match_op::set_op): Likewise.
+       (gimple_resimplify4): Declare.
+       * genmatch.c (get_operand_type): Handle CFN_COND_* functions.
+       (expr::gen_transform): Likewise.
+       (decision_tree::gen): Generate a simplification routine for 4 operands.
+       * gimple-match-head.c (gimple_simplify): Add an overload for
+       4 operands.  In the top-level function, handle up to 4 call
+       arguments and call gimple_resimplify4.
+       (gimple_resimplify4): New function.
+       (build_call_internal): Pass a fourth operand.
+       (maybe_push_to_seq): Likewise.
+       * match.pd (UNCOND_BINARY, COND_BINARY): New operator lists.
+       Fold VEC_COND_EXPRs of an operation and a default value into
+       an IFN_COND_* function if possible.
+       * config/aarch64/iterators.md (UNSPEC_COND_MAX, UNSPEC_COND_MIN):
+       New unspecs.
+       (SVE_COND_FP_BINARY): Include them.
+       (optab, sve_fp_op): Handle them.
+       (SVE_INT_BINARY_REV): New code iterator.
+       (SVE_COND_FP_BINARY_REV): New int iterator.
+       (commutative): New int attribute.
+       * config/aarch64/aarch64-protos.h (aarch64_sve_prepare_conditional_op):
+       Declare.
+       * config/aarch64/aarch64.c (aarch64_sve_prepare_conditional_op): New
+       function.
+       * config/aarch64/aarch64-sve.md (cond_<optab><mode>): Use it.
+       (*cond_<optab><mode>): New patterns for reversed operands.
+
  2018-05-25  Richard Biener  <rguenther@suse.de>
  
         * tree-vectorizer.h (STMT_VINFO_GROUP_*, GROUP_*): Remove.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index eec86428741ab08b71a62db63d2073c130418ed4..4ea50acaa59c0b58a213bd1f27fb78b6d8deee96 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -513,6 +513,7 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
  void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
  bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
  void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
+void aarch64_sve_prepare_conditional_op (rtx *, unsigned int, bool);
  #endif /* RTX_CODE */
  
  void aarch64_init_builtins (void);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index 4f918492ae497ed2db1bfab69dd1224b3d1eac0c..0bb37e72bd43bbdb1507f5da69f098a2f4a12f90 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1769,7 +1769,8 @@
           UNSPEC_SEL))]
    "TARGET_SVE"
  {
-  gcc_assert (rtx_equal_p (operands[2], operands[4]));
+  bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
+  aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
  })
  
  ;; Predicated integer operations.
@@ -1786,6 +1787,20 @@
    "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
  )
  
+;; Predicated integer operations with the operands reversed.
+(define_insn "*cond_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+       (unspec:SVE_I
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (SVE_INT_BINARY_REV:SVE_I
+            (match_operand:SVE_I 2 "register_operand" "w")
+            (match_operand:SVE_I 3 "register_operand" "0"))
+          (match_dup 3)]
+         UNSPEC_SEL))]
+  "TARGET_SVE"
+  "<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+)
+
  ;; Set operand 0 to the last active element in operand 3, or to tied
  ;; operand 1 if no elements are active.
  (define_insn "fold_extract_last_<mode>"
@@ -2567,7 +2582,7 @@
           UNSPEC_SEL))]
    "TARGET_SVE"
  {
-  gcc_assert (rtx_equal_p (operands[2], operands[4]));
+  aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
  })
  
  ;; Predicated floating-point operations.
@@ -2586,6 +2601,22 @@
    "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
  )
  
+;; Predicated floating-point operations with the operands reversed.
+(define_insn "*cond_<optab><mode>"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (unspec:SVE_F
+            [(match_dup 1)
+             (match_operand:SVE_F 2 "register_operand" "w")
+             (match_operand:SVE_F 3 "register_operand" "0")]
+            SVE_COND_FP_BINARY)
+          (match_dup 3)]
+         UNSPEC_SEL))]
+  "TARGET_SVE"
+  "<sve_fp_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+)
+
  ;; Shift an SVE vector left and insert a scalar into element 0.
  (define_insn "vec_shl_insert_<mode>"
    [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 04dedd2f7e0a0a324f5a2b99906c66370df9ea8d..afc91850d6f1458459f64bff0c8dfa3419d5e588 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -16041,6 +16041,54 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
    emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
  }
  
+/* Prepare a cond_<optab><mode> operation that has the operands
+   given by OPERANDS, where:
+
+   - operand 0 is the destination
+   - operand 1 is a predicate
+   - operands 2 to NOPS - 2 are the operands to an operation that is
+     performed for active lanes
+   - operand NOPS - 1 specifies the values to use for inactive lanes.
+
+   COMMUTATIVE_P is true if operands 2 and 3 are commutative.  In that case,
+   no pattern is provided for a tie between operands 3 and NOPS - 1.  */
+
+void
+aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops,
+                                   bool commutative_p)
+{
+  /* We can do the operation directly if the "else" value matches one
+     of the other inputs.  */
+  for (unsigned int i = 2; i < nops - 1; ++i)
+    if (rtx_equal_p (operands[i], operands[nops - 1]))
+      {
+       if (i == 3 && commutative_p)
+         std::swap (operands[2], operands[3]);
+       return;
+      }
+
+  /* If the "else" value is different from the other operands, we have
+     the choice of doing a SEL on the output or a SEL on an input.
+     Neither choice is better in all cases, but one advantage of
+     selecting the input is that it can avoid a move when the output
+     needs to be distinct from the inputs.  E.g. if operand N maps to
+     register N, selecting the output would give:
+
+       MOVPRFX Z0.S, Z2.S
+       ADD Z0.S, P1/M, Z0.S, Z3.S
+       SEL Z0.S, P1, Z0.S, Z4.S
+
+     whereas selecting the input avoids the MOVPRFX:
+
+       SEL Z0.S, P1, Z2.S, Z4.S
+       ADD Z0.S, P1/M, Z0.S, Z3.S.  */
+  machine_mode mode = GET_MODE (operands[0]);
+  rtx temp = gen_reg_rtx (mode);
+  rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]);
+  emit_set_insn (temp, gen_rtx_UNSPEC (mode, vec, UNSPEC_SEL));
+  operands[2] = operands[nops - 1] = temp;
+}
+
  /* Implement TARGET_MODES_TIEABLE_P.  In principle we should always return
     true.  However due to issues with register allocation it is preferable
     to avoid tieing integer scalar and FP scalar modes.  Executing integer
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index b57c7e221f8eb434c29ac091df44b190ca8f4541..4db3a4c368f35ca5ddc95ff8917a9bb82fc64b17 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -464,6 +464,8 @@
      UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md.
      UNSPEC_COND_ADD    ; Used in aarch64-sve.md.
      UNSPEC_COND_SUB    ; Used in aarch64-sve.md.
+    UNSPEC_COND_MAX    ; Used in aarch64-sve.md.
+    UNSPEC_COND_MIN    ; Used in aarch64-sve.md.
      UNSPEC_COND_LT     ; Used in aarch64-sve.md.
      UNSPEC_COND_LE     ; Used in aarch64-sve.md.
      UNSPEC_COND_EQ     ; Used in aarch64-sve.md.
@@ -1203,6 +1205,8 @@
  (define_code_iterator SVE_INT_BINARY [plus minus smax umax smin umin
                                       and ior xor])
  
+(define_code_iterator SVE_INT_BINARY_REV [minus])
+
  ;; SVE integer comparisons.
  (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
  
@@ -1529,7 +1533,10 @@
  
  (define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART])
  
-(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB])
+(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB
+                                        UNSPEC_COND_MAX UNSPEC_COND_MIN])
+
+(define_int_iterator SVE_COND_FP_BINARY_REV [UNSPEC_COND_SUB])
  
  (define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_LT UNSPEC_COND_LE
                                       UNSPEC_COND_EQ UNSPEC_COND_NE
@@ -1559,7 +1566,9 @@
                         (UNSPEC_IORV "ior")
                         (UNSPEC_XORV "xor")
                         (UNSPEC_COND_ADD "add")
-                       (UNSPEC_COND_SUB "sub")])
+                       (UNSPEC_COND_SUB "sub")
+                       (UNSPEC_COND_MAX "smax")
+                       (UNSPEC_COND_MIN "smin")])
  
  (define_int_attr  maxmin_uns [(UNSPEC_UMAXV "umax")
                               (UNSPEC_UMINV "umin")
@@ -1771,4 +1780,11 @@
                          (UNSPEC_COND_GT "gt")])
  
  (define_int_attr sve_fp_op [(UNSPEC_COND_ADD "fadd")
-                           (UNSPEC_COND_SUB "fsub")])
+                           (UNSPEC_COND_SUB "fsub")
+                           (UNSPEC_COND_MAX "fmaxnm")
+                           (UNSPEC_COND_MIN "fminnm")])
+
+(define_int_attr commutative [(UNSPEC_COND_ADD "true")
+                             (UNSPEC_COND_SUB "false")
+                             (UNSPEC_COND_MIN "true")
+                             (UNSPEC_COND_MAX "true")])
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi

index 596007d630d9ca15f10c8cc25ff11364df32f796..00e53a657c82ef65ffa5006f3e6cd01443c57caa 100644 (file)
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1425,6 +1425,10 @@ have different type from the value operands.
  @item vect_double
  Target supports hardware vectors of @code{double}.
  
+@item vect_double_cond_arith
+Target supports conditional addition, subtraction, minimum and maximum
+on vectors of @code{double}, via the @code{cond_} optabs.
+
  @item vect_element_align_preferred
  The target's preferred vector alignment is the same as the element
  alignment.
diff --git a/gcc/genmatch.c b/gcc/genmatch.c

index 5715dd16f550733882c2bdc203d9d07000c68a6a..d6bd90d15339de5b4a4ee1353ef04766054efd79 100644 (file)
--- a/gcc/genmatch.c
+++ b/gcc/genmatch.c
@@ -2370,6 +2370,18 @@ get_operand_type (id_base *op, unsigned pos,
    else if (*op == COND_EXPR
            && pos == 0)
      return "boolean_type_node";
+  else if (strncmp (op->id, "CFN_COND_", 9) == 0)
+    {
+      /* IFN_COND_* operands 1 and later by default have the same type
+        as the result.  The type of operand 0 needs to be specified
+        explicitly.  */
+      if (pos > 0 && expr_type)
+       return expr_type;
+      else if (pos > 0 && in_type)
+       return in_type;
+      else
+       return NULL;
+    }
    else
      {
        /* Otherwise all types should match - choose one in order of
@@ -2429,7 +2441,8 @@ expr::gen_transform (FILE *f, int indent, const char *dest, bool gimple,
        in_type = NULL;
      }
    else if (*opr == COND_EXPR
-          || *opr == VEC_COND_EXPR)
+          || *opr == VEC_COND_EXPR
+          || strncmp (opr->id, "CFN_COND_", 9) == 0)
      {
        /* Conditions are of the same type as their first alternative.  */
        sprintf (optype, "TREE_TYPE (ops%d[1])", depth);
@@ -3737,7 +3750,7 @@ decision_tree::gen (FILE *f, bool gimple)
      }
    fprintf (stderr, "removed %u duplicate tails\n", rcnt);
  
-  for (unsigned n = 1; n <= 3; ++n)
+  for (unsigned n = 1; n <= 4; ++n)
      {
        /* First generate split-out functions.  */
        for (unsigned i = 0; i < root->kids.length (); i++)
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c

index 4598781e61c3fb589bbdf3dfda6d2e0af0aff341..1a12bb35e01e3a4a266ed2683caa51ff4fac0a2b 100644 (file)
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -51,6 +51,8 @@ static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
                              code_helper, tree, tree, tree);
  static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
                              code_helper, tree, tree, tree, tree);
+static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
+                            code_helper, tree, tree, tree, tree, tree);
  
  const unsigned int gimple_match_op::MAX_NUM_OPS;
  
@@ -215,6 +217,30 @@ gimple_resimplify3 (gimple_seq *seq, gimple_match_op *res_op,
    return canonicalized;
  }
  
+/* Helper that matches and simplifies the toplevel result from
+   a gimple_simplify run (where we don't want to build
+   a stmt in case it's used in in-place folding).  Replaces
+   RES_OP with a simplified and/or canonicalized result and
+   returns whether any change was made.  */
+
+bool
+gimple_resimplify4 (gimple_seq *seq, gimple_match_op *res_op,
+                   tree (*valueize)(tree))
+{
+  /* No constant folding is defined for four-operand functions.  */
+
+  gimple_match_op res_op2 (*res_op);
+  if (gimple_simplify (&res_op2, seq, valueize,
+                      res_op->code, res_op->type,
+                      res_op->ops[0], res_op->ops[1], res_op->ops[2],
+                      res_op->ops[3]))
+    {
+      *res_op = res_op2;
+      return true;
+    }
+
+  return false;
+}
  
  /* If in GIMPLE the operation described by RES_OP should be single-rhs,
     build a GENERIC tree for that expression and update RES_OP accordingly.  */
@@ -256,7 +282,8 @@ build_call_internal (internal_fn fn, gimple_match_op *res_op)
    return gimple_build_call_internal (fn, res_op->num_ops,
                                      res_op->op_or_null (0),
                                      res_op->op_or_null (1),
-                                    res_op->op_or_null (2));
+                                    res_op->op_or_null (2),
+                                    res_op->op_or_null (3));
  }
  
  /* Push the exploded expression described by RES_OP as a statement to
@@ -343,7 +370,8 @@ maybe_push_res_to_seq (gimple_match_op *res_op, gimple_seq *seq, tree res)
           new_stmt = gimple_build_call (decl, num_ops,
                                         res_op->op_or_null (0),
                                         res_op->op_or_null (1),
-                                       res_op->op_or_null (2));
+                                       res_op->op_or_null (2),
+                                       res_op->op_or_null (3));
         }
        if (!res)
         {
@@ -654,7 +682,7 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
        /* ???  This way we can't simplify calls with side-effects.  */
        if (gimple_call_lhs (stmt) != NULL_TREE
           && gimple_call_num_args (stmt) >= 1
-         && gimple_call_num_args (stmt) <= 3)
+         && gimple_call_num_args (stmt) <= 4)
         {
           bool valueized = false;
           combined_fn cfn;
@@ -697,6 +725,9 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
             case 3:
               return (gimple_resimplify3 (seq, res_op, valueize)
                       || valueized);
+           case 4:
+             return (gimple_resimplify4 (seq, res_op, valueize)
+                     || valueized);
             default:
              gcc_unreachable ();
             }
diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h

index 9a4d3bb630fd65cd9bb5658918bc3d8be1a89802..69b53f2115703a1cf69c12156dfa03cc8e608f80 100644 (file)
--- a/gcc/gimple-match.h
+++ b/gcc/gimple-match.h
@@ -49,17 +49,19 @@ struct gimple_match_op
    gimple_match_op (code_helper, tree, tree);
    gimple_match_op (code_helper, tree, tree, tree);
    gimple_match_op (code_helper, tree, tree, tree, tree);
+  gimple_match_op (code_helper, tree, tree, tree, tree, tree);
  
    void set_op (code_helper, tree, unsigned int);
    void set_op (code_helper, tree, tree);
    void set_op (code_helper, tree, tree, tree);
    void set_op (code_helper, tree, tree, tree, tree);
+  void set_op (code_helper, tree, tree, tree, tree, tree);
    void set_value (tree);
  
    tree op_or_null (unsigned int) const;
  
    /* The maximum value of NUM_OPS.  */
-  static const unsigned int MAX_NUM_OPS = 3;
+  static const unsigned int MAX_NUM_OPS = 4;
  
    /* The operation being performed.  */
    code_helper code;
@@ -113,6 +115,17 @@ gimple_match_op::gimple_match_op (code_helper code_in, tree type_in,
    ops[2] = op2;
  }
  
+inline
+gimple_match_op::gimple_match_op (code_helper code_in, tree type_in,
+                                 tree op0, tree op1, tree op2, tree op3)
+  : code (code_in), type (type_in), num_ops (4)
+{
+  ops[0] = op0;
+  ops[1] = op1;
+  ops[2] = op2;
+  ops[3] = op3;
+}
+
  /* Change the operation performed to CODE_IN, the type of the result to
     TYPE_IN, and the number of operands to NUM_OPS_IN.  The caller needs
     to set the operands itself.  */
@@ -160,6 +173,19 @@ gimple_match_op::set_op (code_helper code_in, tree type_in,
    ops[2] = op2;
  }
  
+inline void
+gimple_match_op::set_op (code_helper code_in, tree type_in,
+                        tree op0, tree op1, tree op2, tree op3)
+{
+  code = code_in;
+  type = type_in;
+  num_ops = 4;
+  ops[0] = op0;
+  ops[1] = op1;
+  ops[2] = op2;
+  ops[3] = op3;
+}
+
  /* Set the "operation" to be the single value VALUE, such as a constant
     or SSA_NAME.  */
  
@@ -196,6 +222,7 @@ bool gimple_simplify (gimple *, gimple_match_op *, gimple_seq *,
  bool gimple_resimplify1 (gimple_seq *, gimple_match_op *, tree (*)(tree));
  bool gimple_resimplify2 (gimple_seq *, gimple_match_op *, tree (*)(tree));
  bool gimple_resimplify3 (gimple_seq *, gimple_match_op *, tree (*)(tree));
+bool gimple_resimplify4 (gimple_seq *, gimple_match_op *, tree (*)(tree));
  tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *,
                             tree res = NULL_TREE);
  void maybe_build_generic_op (gimple_match_op *);
diff --git a/gcc/match.pd b/gcc/match.pd

index 8a71141eac910be7fbb0bfdf677c719c91ced3c7..f08571ef28c6bc95a78f906631341ac92d6b975e 100644 (file)
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -74,6 +74,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (FLOOR)
  DEFINE_INT_AND_FLOAT_ROUND_FN (CEIL)
  DEFINE_INT_AND_FLOAT_ROUND_FN (ROUND)
  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+
+/* Binary operations and their associated IFN_COND_* function.  */
+(define_operator_list UNCOND_BINARY
+  plus minus
+  min max
+  bit_and bit_ior bit_xor)
+(define_operator_list COND_BINARY
+  IFN_COND_ADD IFN_COND_SUB
+  IFN_COND_MIN IFN_COND_MAX
+  IFN_COND_AND IFN_COND_IOR IFN_COND_XOR)
      
  /* As opposed to convert?, this still creates a single pattern, so
     it is not a suitable replacement for convert? in all cases.  */
@@ -4780,3 +4790,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (simplify
        (cmp (popcount @0) integer_zerop)
        (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
+
+/* Simplify:
+
+     a = a1 op a2
+     r = c ? a : b;
+
+   to:
+
+     r = c ? a1 op a2 : b;
+
+   if the target can do it in one go.  This makes the operation conditional
+   on c, so could drop potentially-trapping arithmetic, but that's a valid
+   simplification if the result of the operation isn't needed.  */
+(for uncond_op (UNCOND_BINARY)
+     cond_op (COND_BINARY)
+ (simplify
+  (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
+  (with { tree op_type = TREE_TYPE (@4); }
+   (if (element_precision (type) == element_precision (op_type))
+    (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
+ (simplify
+  (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
+  (with { tree op_type = TREE_TYPE (@4); }
+   (if (element_precision (type) == element_precision (op_type))
+    (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 59b230956e8242e33663baffaac879bf06ae0947..c5b2c631b5d8c70b78bec00191412d16f7c62324 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,15 @@
+2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * lib/target-supports.exp
+       (check_effective_target_vect_double_cond_arith): New proc.
+       * gcc.dg/vect/vect-cond-arith-1.c: New test.
+       * gcc.target/aarch64/sve/vcond_8.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_8_run.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_9.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_9_run.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_12.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_12_run.c: Likewise.
+
  2018-05-25  Janus Weil  <janus@gcc.gnu.org>
  
         PR fortran/85839
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c

new file mode 100644 (file)

index 0000000..9f2fccd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c
@@ -0,0 +1,58 @@
+/* { dg-additional-options "-fdump-tree-optimized -fno-trapping-math -ffinite-math-only" } */
+
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS * 11 / 64 + 3)
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+
+#define DEF(OP)                                                        \
+  void __attribute__ ((noipa))                                 \
+  f_##OP (double *restrict a, double *restrict b, double x)    \
+  {                                                            \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       double truev = OP (b[i], x);                            \
+       a[i] = b[i] < 100 ? truev : b[i];                       \
+      }                                                                \
+  }
+
+#define TEST(OP)                                       \
+  {                                                    \
+    f_##OP (a, b, 10);                                 \
+    for (int i = 0; i < N; ++i)                                \
+      {                                                        \
+       int bval = (i % 17) * 10;                       \
+       int truev = OP (bval, 10);                      \
+       if (a[i] != (bval < 100 ? truev : bval))        \
+       __builtin_abort ();                             \
+       asm volatile ("" ::: "memory");                 \
+      }                                                        \
+  }
+
+#define FOR_EACH_OP(T)                         \
+  T (add)                                      \
+  T (sub)                                      \
+  T (__builtin_fmax)                           \
+  T (__builtin_fmin)
+
+FOR_EACH_OP (DEF)
+
+int
+main (void)
+{
+  double a[N], b[N];
+  for (int i = 0; i < N; ++i)
+    {
+      b[i] = (i % 17) * 10;
+      asm volatile ("" ::: "memory");
+    }
+  FOR_EACH_OP (TEST)
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { = \.COND_ADD} "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump { = \.COND_SUB} "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump { = \.COND_MAX} "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump { = \.COND_MIN} "optimized" { target vect_double_cond_arith } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c

new file mode 100644 (file)

index 0000000..95b371a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c
@@ -0,0 +1,125 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include <stdint.h>
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+#define max(A, B) ((A) > (B) ? (A) : (B))
+#define min(A, B) ((A) < (B) ? (A) : (B))
+#define and(A, B) ((A) & (B))
+#define ior(A, B) ((A) | (B))
+#define xor(A, B) ((A) ^ (B))
+
+#define N 121
+
+#define DEF_LOOP(TYPE, CMPTYPE, OP)                            \
+  void __attribute__((noipa))                                  \
+  f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond,        \
+                  CMPTYPE limit, TYPE src2v, TYPE elsev)       \
+  {                                                            \
+    TYPE induc = 0;                                            \
+    for (unsigned int i = 0; i < N; ++i, induc += 1)           \
+      {                                                                \
+       TYPE truev = OP (induc, src2v);                         \
+       dest[i] = cond[i] < limit ? truev : elsev;              \
+      }                                                                \
+  }
+
+#define FOR_EACH_INT_TYPE(T, TYPE) \
+  T (TYPE, TYPE, add) \
+  T (TYPE, TYPE, sub) \
+  T (TYPE, TYPE, max) \
+  T (TYPE, TYPE, min) \
+  T (TYPE, TYPE, and) \
+  T (TYPE, TYPE, ior) \
+  T (TYPE, TYPE, xor)
+
+#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
+  T (TYPE, CMPTYPE, add) \
+  T (TYPE, CMPTYPE, sub) \
+  T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
+  T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
+
+#define FOR_EACH_LOOP(T) \
+  FOR_EACH_INT_TYPE (T, int8_t) \
+  FOR_EACH_INT_TYPE (T, int16_t) \
+  FOR_EACH_INT_TYPE (T, int32_t) \
+  FOR_EACH_INT_TYPE (T, int64_t) \
+  FOR_EACH_INT_TYPE (T, uint8_t) \
+  FOR_EACH_INT_TYPE (T, uint16_t) \
+  FOR_EACH_INT_TYPE (T, uint32_t) \
+  FOR_EACH_INT_TYPE (T, uint64_t) \
+  FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
+  FOR_EACH_FP_TYPE (T, float, float, f32) \
+  FOR_EACH_FP_TYPE (T, double, double, f64)
+
+FOR_EACH_LOOP (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
+
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b,} 14 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h,} 18 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s,} 18 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d,} 18 } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c

new file mode 100644 (file)

index 0000000..50a98c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include "vcond_12.c"
+
+#define TEST_LOOP(TYPE, CMPTYPE, OP)                           \
+  {                                                            \
+    TYPE dest[N];                                              \
+    CMPTYPE cond[N];                                           \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      cond[i] = i % 5;                                         \
+    TYPE src2v = 14;                                           \
+    TYPE elsev = 17;                                           \
+    f_##OP##_##TYPE (dest, cond, 3, src2v, elsev);             \
+    TYPE induc = 0;                                            \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+       TYPE if_true = OP (induc, src2v);                       \
+       if (dest[i] != (i % 5 < 3 ? if_true : elsev))           \
+         __builtin_abort ();                                   \
+       induc += 1;                                             \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  FOR_EACH_LOOP (TEST_LOOP);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c

new file mode 100644 (file)

index 0000000..c32ab59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include <stdint.h>
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+#define max(A, B) ((A) > (B) ? (A) : (B))
+#define min(A, B) ((A) < (B) ? (A) : (B))
+#define and(A, B) ((A) & (B))
+#define ior(A, B) ((A) | (B))
+#define xor(A, B) ((A) ^ (B))
+
+#define DEF_LOOP(TYPE, CMPTYPE, OP)                            \
+  void __attribute__((noipa))                                  \
+  f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond,        \
+                  CMPTYPE limit, TYPE *restrict src,           \
+                  TYPE val, unsigned int n)                    \
+  {                                                            \
+    for (unsigned int i = 0; i < n; ++i)                       \
+      {                                                                \
+       TYPE truev = OP (src[i], val);                          \
+       dest[i] = cond[i] < limit ? truev : src[i];             \
+      }                                                                \
+  }
+
+#define FOR_EACH_INT_TYPE(T, TYPE) \
+  T (TYPE, TYPE, add) \
+  T (TYPE, TYPE, sub) \
+  T (TYPE, TYPE, max) \
+  T (TYPE, TYPE, min) \
+  T (TYPE, TYPE, and) \
+  T (TYPE, TYPE, ior) \
+  T (TYPE, TYPE, xor)
+
+#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
+  T (TYPE, CMPTYPE, add) \
+  T (TYPE, CMPTYPE, sub) \
+  T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
+  T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
+
+#define FOR_EACH_LOOP(T) \
+  FOR_EACH_INT_TYPE (T, int8_t) \
+  FOR_EACH_INT_TYPE (T, int16_t) \
+  FOR_EACH_INT_TYPE (T, int32_t) \
+  FOR_EACH_INT_TYPE (T, int64_t) \
+  FOR_EACH_INT_TYPE (T, uint8_t) \
+  FOR_EACH_INT_TYPE (T, uint16_t) \
+  FOR_EACH_INT_TYPE (T, uint32_t) \
+  FOR_EACH_INT_TYPE (T, uint64_t) \
+  FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
+  FOR_EACH_FP_TYPE (T, float, float, f32) \
+  FOR_EACH_FP_TYPE (T, double, double, f64)
+
+FOR_EACH_LOOP (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c

new file mode 100644 (file)

index 0000000..5f45e16
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c
@@ -0,0 +1,32 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include "vcond_8.c"
+
+#define N 187
+
+#define TEST_LOOP(TYPE, CMPTYPE, OP)                           \
+  {                                                            \
+    TYPE dest[N], src[N];                                      \
+    CMPTYPE cond[N];                                           \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+        src[i] = i * 3;                                                \
+       cond[i] = i % 5;                                        \
+      }                                                                \
+    f_##OP##_##TYPE (dest, cond, 3, src, 77, N);               \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+        TYPE if_false = i * 3;                                 \
+       TYPE if_true = OP (if_false, (TYPE) 77);                \
+       if (dest[i] != (i % 5 < 3 ? if_true : if_false))        \
+         __builtin_abort ();                                   \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  FOR_EACH_LOOP (TEST_LOOP);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c

new file mode 100644 (file)

index 0000000..618e187
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include <stdint.h>
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+#define max(A, B) ((A) > (B) ? (A) : (B))
+#define min(A, B) ((A) < (B) ? (A) : (B))
+#define and(A, B) ((A) & (B))
+#define ior(A, B) ((A) | (B))
+#define xor(A, B) ((A) ^ (B))
+
+#define DEF_LOOP(TYPE, CMPTYPE, OP)                            \
+  void __attribute__((noipa))                                  \
+  f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond,        \
+                  CMPTYPE limit, TYPE *restrict src1,          \
+                  TYPE *restrict src2, unsigned int n)         \
+  {                                                            \
+    for (unsigned int i = 0; i < n; ++i)                       \
+      {                                                                \
+       TYPE truev = OP (src1[i], src2[i]);                     \
+       dest[i] = cond[i] < limit ? truev : src2[i];            \
+      }                                                                \
+  }
+
+#define FOR_EACH_INT_TYPE(T, TYPE) \
+  T (TYPE, TYPE, add) \
+  T (TYPE, TYPE, sub) \
+  T (TYPE, TYPE, max) \
+  T (TYPE, TYPE, min) \
+  T (TYPE, TYPE, and) \
+  T (TYPE, TYPE, ior) \
+  T (TYPE, TYPE, xor)
+
+#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
+  T (TYPE, CMPTYPE, add) \
+  T (TYPE, CMPTYPE, sub) \
+  T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
+  T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
+
+#define FOR_EACH_LOOP(T) \
+  FOR_EACH_INT_TYPE (T, int8_t) \
+  FOR_EACH_INT_TYPE (T, int16_t) \
+  FOR_EACH_INT_TYPE (T, int32_t) \
+  FOR_EACH_INT_TYPE (T, int64_t) \
+  FOR_EACH_INT_TYPE (T, uint8_t) \
+  FOR_EACH_INT_TYPE (T, uint16_t) \
+  FOR_EACH_INT_TYPE (T, uint32_t) \
+  FOR_EACH_INT_TYPE (T, uint64_t) \
+  FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
+  FOR_EACH_FP_TYPE (T, float, float, f32) \
+  FOR_EACH_FP_TYPE (T, double, double, f64)
+
+FOR_EACH_LOOP (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c

new file mode 100644 (file)

index 0000000..14f3242
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include "vcond_9.c"
+
+#define N 187
+
+#define TEST_LOOP(TYPE, CMPTYPE, OP)                           \
+  {                                                            \
+    TYPE dest[N], src1[N], src2[N];                            \
+    CMPTYPE cond[N];                                           \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+        src1[i] = i * 4 - i % 7;                               \
+        src2[i] = i * 3 + 1;                                   \
+       cond[i] = i % 5;                                        \
+      }                                                                \
+    f_##OP##_##TYPE (dest, cond, 3, src1, src2, N);            \
+    for (unsigned int i = 0; i < N; ++i)                       \
+      {                                                                \
+       TYPE src1v = i * 4 - i % 7;                             \
+        TYPE src2v = i * 3 + 1;                                        \
+       TYPE if_true = OP (src1v, src2v);                       \
+       if (dest[i] != (i % 5 < 3 ? if_true : src2v))           \
+         __builtin_abort ();                                   \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  FOR_EACH_LOOP (TEST_LOOP);
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index 0a53d7b1aadd350e8167614e1d73b678f6ec93e8..0f8edce69bf944b043cdff562a9757afa330d413 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5590,6 +5590,13 @@ proc check_effective_target_vect_double { } {
      return $et_vect_double_saved($et_index)
  }
  
+# Return 1 if the target supports conditional addition, subtraction, minimum
+# and maximum on vectors of double, via the cond_ optabs.  Return 0 otherwise.
+
+proc check_effective_target_vect_double_cond_arith { } {
+    return [check_effective_target_aarch64_sve]
+}
+
  # Return 1 if the target supports hardware vectors of long long, 0 otherwise.
  #
  # This won't change for different subtargets so cache the result.
author	Richard Sandiford <richard.sandiford@linaro.org>
	Fri, 25 May 2018 08:09:39 +0000 (08:09 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Fri, 25 May 2018 08:09:39 +0000 (08:09 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/doc/sourcebuild.texi		patch \| blob \| history
gcc/genmatch.c		patch \| blob \| history
gcc/gimple-match-head.c		patch \| blob \| history
gcc/gimple-match.h		patch \| blob \| history
gcc/match.pd		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-cond-arith-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/vcond_12_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/vcond_8.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/vcond_8_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/vcond_9.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/vcond_9_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/lib/target-supports.exp		patch \| blob \| history