Add support for SVE scatter stores

author Richard Sandiford <richard.sandiford@linaro.org>

Sat, 13 Jan 2018 18:01:59 +0000 (18:01 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Sat, 13 Jan 2018 18:01:59 +0000 (18:01 +0000)
author Richard Sandiford <richard.sandiford@linaro.org>
Sat, 13 Jan 2018 18:01:59 +0000 (18:01 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Sat, 13 Jan 2018 18:01:59 +0000 (18:01 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index c660ff13f0df3a48c0e533196845bb1c55611c70..2c6f9eb85d0ec75b74e61cb2bf76f01dcb68f0ca 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,52 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * doc/sourcebuild.texi (vect_scatter_store): Document.
+       * optabs.def (scatter_store_optab, mask_scatter_store_optab): New
+       optabs.
+       * doc/md.texi (scatter_store@var{m}, mask_scatter_store@var{m}):
+       Document.
+       * genopinit.c (main): Add supports_vec_scatter_store and
+       supports_vec_scatter_store_cached to target_optabs.
+       * gimple.h (gimple_expr_type): Handle IFN_SCATTER_STORE and
+       IFN_MASK_SCATTER_STORE.
+       * internal-fn.def (SCATTER_STORE, MASK_SCATTER_STORE): New internal
+       functions.
+       * internal-fn.h (internal_store_fn_p): Declare.
+       (internal_fn_stored_value_index): Likewise.
+       * internal-fn.c (scatter_store_direct): New macro.
+       (expand_scatter_store_optab_fn): New function.
+       (direct_scatter_store_optab_supported_p): New macro.
+       (internal_store_fn_p): New function.
+       (internal_gather_scatter_fn_p): Handle IFN_SCATTER_STORE and
+       IFN_MASK_SCATTER_STORE.
+       (internal_fn_mask_index): Likewise.
+       (internal_fn_stored_value_index): New function.
+       (internal_gather_scatter_fn_supported_p): Adjust operand numbers
+       for scatter stores.
+       * optabs-query.h (supports_vec_scatter_store_p): Declare.
+       * optabs-query.c (supports_vec_scatter_store_p): New function.
+       * tree-vectorizer.h (vect_get_store_rhs): Declare.
+       * tree-vect-data-refs.c (vect_analyze_data_ref_access): Return
+       true for scatter stores.
+       (vect_gather_scatter_fn_p): Handle scatter stores too.
+       (vect_check_gather_scatter): Consider using scatter stores if
+       supports_vec_scatter_store_p.
+       * tree-vect-patterns.c (vect_try_gather_scatter_pattern): Handle
+       scatter stores too.
+       * tree-vect-stmts.c (exist_non_indexing_operands_for_use_p): Use
+       internal_fn_stored_value_index.
+       (check_load_store_masking): Handle scatter stores too.
+       (vect_get_store_rhs): Make public.
+       (vectorizable_call): Use internal_store_fn_p.
+       (vectorizable_store): Handle scatter store internal functions.
+       (vect_transform_stmt): Compare GROUP_STORE_COUNT with GROUP_SIZE
+       when deciding whether the end of the group has been reached.
+       * config/aarch64/aarch64.md (UNSPEC_ST1_SCATTER): New unspec.
+       * config/aarch64/aarch64-sve.md (scatter_store<mode>): New expander.
+       (mask_scatter_store<mode>): New insns.
+
  2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
             Alan Hayward  <alan.hayward@arm.com>
             David Sherwood  <david.sherwood@arm.com>
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index 04ea25c0a87dd85c5199015a73ecb98b59a0edd3..8da10c158a707ad620ab239ea0bd96f1dfdcbc02 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -246,6 +246,63 @@
     ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
  )
  
+;; Unpredicated scatter store.
+(define_expand "scatter_store<mode>"
+  [(set (mem:BLK (scratch))
+       (unspec:BLK
+         [(match_dup 5)
+          (match_operand:DI 0 "aarch64_reg_or_zero")
+          (match_operand:<V_INT_EQUIV> 1 "register_operand")
+          (match_operand:DI 2 "const_int_operand")
+          (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
+          (match_operand:SVE_SD 4 "register_operand")]
+         UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  {
+    operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+  }
+)
+
+;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
+;; unsigned extension and false for signed extension.
+(define_insn "mask_scatter_store<mode>"
+  [(set (mem:BLK (scratch))
+       (unspec:BLK
+         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+          (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
+          (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
+          (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
+          (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
+          (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
+         UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1w\t%4.s, %5, [%1.s]
+   st1w\t%4.s, %5, [%0, %1.s, sxtw]
+   st1w\t%4.s, %5, [%0, %1.s, uxtw]
+   st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
+   st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
+)
+
+;; Predicated scatter stores for 64-bit elements.  The value of operand 2
+;; doesn't matter in this case.
+(define_insn "mask_scatter_store<mode>"
+  [(set (mem:BLK (scratch))
+       (unspec:BLK
+         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
+          (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
+          (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
+          (match_operand:DI 2 "const_int_operand")
+          (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
+          (match_operand:SVE_D 4 "register_operand" "w, w, w")]
+         UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1d\t%4.d, %5, [%1.d]
+   st1d\t%4.d, %5, [%0, %1.d]
+   st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
+)
+
  ;; SVE structure moves.
  (define_expand "mov<mode>"
    [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

index 455e04d46233d613f3c3e520d2b906ce5f1dc755..edb6a7583338a924020d9f301aebc577f1fce63b 100644 (file)
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -153,6 +153,7 @@
      UNSPEC_ST1_SVE
      UNSPEC_LD1RQ
      UNSPEC_LD1_GATHER
+    UNSPEC_ST1_SCATTER
      UNSPEC_MERGE_PTRUE
      UNSPEC_PTEST_PTRUE
      UNSPEC_UNPACKSHI
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi

index 245fa90e6d14d13aca3abf8c1f87599276a77b04..f5167a1615533a6a91f942d7488da9a84c0e474c 100644 (file)
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4937,6 +4937,35 @@ operand 5.  Bit @var{i} of the mask is set if element @var{i}
  of the result should be loaded from memory and clear if element @var{i}
  of the result should be set to zero.
  
+@cindex @code{scatter_store@var{m}} instruction pattern
+@item @samp{scatter_store@var{m}}
+Store a vector of mode @var{m} into several distinct memory locations.
+Operand 0 is a scalar base address and operand 1 is a vector of offsets
+from that base.  Operand 4 is the vector of values that should be stored,
+which has the same number of elements as the offset.  For each element
+index @var{i}:
+
+@itemize @bullet
+@item
+extend the offset element @var{i} to address width, using zero
+extension if operand 2 is 1 and sign extension if operand 2 is zero;
+@item
+multiply the extended offset by operand 3;
+@item
+add the result to the base; and
+@item
+store element @var{i} of operand 4 to that address.
+@end itemize
+
+The value of operand 2 does not matter if the offsets are already
+address width.
+
+@cindex @code{mask_scatter_store@var{m}} instruction pattern
+@item @samp{mask_scatter_store@var{m}}
+Like @samp{scatter_store@var{m}}, but takes an extra mask operand as
+operand 5.  Bit @var{i} of the mask is set if element @var{i}
+of the result should be stored to memory.
+
  @cindex @code{vec_set@var{m}} instruction pattern
  @item @samp{vec_set@var{m}}
  Set given field in the vector value.  Operand 0 is the vector to modify,
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi

index e02d4da9b0f9d94742177966b8cab4057f74e530..f0233c9cca4ca32248db407bf00fb4c97eb740b1 100644 (file)
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1421,6 +1421,9 @@ so that vector loops can handle partial as well as full vectors.
  @item vect_masked_store
  Target supports vector masked stores.
  
+@item vect_scatter_store
+Target supports vector scatter stores.
+
  @item vect_aligned_arrays
  Target aligns arrays to vector alignment boundary.
  
diff --git a/gcc/genopinit.c b/gcc/genopinit.c

index dd4e3aa03905b7e60dec332630a3c8a2abd87758..65a38d214bbc24095be2a6ccb97ca543f7231019 100644 (file)
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -239,6 +239,8 @@ main (int argc, const char **argv)
            "     mode.  */\n"
            "  bool supports_vec_gather_load;\n"
            "  bool supports_vec_gather_load_cached;\n"
+          "  bool supports_vec_scatter_store;\n"
+          "  bool supports_vec_scatter_store_cached;\n"
            "};\n"
            "extern void init_all_optabs (struct target_optabs *);\n"
            "\n"
diff --git a/gcc/gimple.h b/gcc/gimple.h

index 281015ad3e21858f0ca7f55023cfe2dfa5fef5d5..74605864a71a6e679128f2facebaa43ec5689aa1 100644 (file)
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -6355,11 +6355,18 @@ gimple_expr_type (const gimple *stmt)
    if (code == GIMPLE_CALL)
      {
        const gcall *call_stmt = as_a <const gcall *> (stmt);
-      if (gimple_call_internal_p (call_stmt)
-          && gimple_call_internal_fn (call_stmt) == IFN_MASK_STORE)
-        return TREE_TYPE (gimple_call_arg (call_stmt, 3));
-      else
-        return gimple_call_return_type (call_stmt);
+      if (gimple_call_internal_p (call_stmt))
+       switch (gimple_call_internal_fn (call_stmt))
+         {
+         case IFN_MASK_STORE:
+         case IFN_SCATTER_STORE:
+           return TREE_TYPE (gimple_call_arg (call_stmt, 3));
+         case IFN_MASK_SCATTER_STORE:
+           return TREE_TYPE (gimple_call_arg (call_stmt, 4));
+         default:
+           break;
+         }
+      return gimple_call_return_type (call_stmt);
      }
    else if (code == GIMPLE_ASSIGN)
      {
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c

index 8cf5b79c0289c1741ff5a5c9a40b0cb12e4cf96d..88adaea4c862572b913b1e3db32571e61cf78027 100644 (file)
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -87,6 +87,7 @@ init_internal_fns ()
  #define mask_store_direct { 3, 2, false }
  #define store_lanes_direct { 0, 0, false }
  #define mask_store_lanes_direct { 0, 0, false }
+#define scatter_store_direct { 3, 3, false }
  #define unary_direct { 0, 0, true }
  #define binary_direct { 0, 0, true }
  #define cond_unary_direct { 1, 1, true }
@@ -2730,6 +2731,42 @@ expand_LAUNDER (internal_fn, gcall *call)
    expand_assignment (lhs, gimple_call_arg (call, 0), false);
  }
  
+/* Expand {MASK_,}SCATTER_STORE{S,U} call CALL using optab OPTAB.  */
+
+static void
+expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
+{
+  internal_fn ifn = gimple_call_internal_fn (stmt);
+  int rhs_index = internal_fn_stored_value_index (ifn);
+  int mask_index = internal_fn_mask_index (ifn);
+  tree base = gimple_call_arg (stmt, 0);
+  tree offset = gimple_call_arg (stmt, 1);
+  tree scale = gimple_call_arg (stmt, 2);
+  tree rhs = gimple_call_arg (stmt, rhs_index);
+
+  rtx base_rtx = expand_normal (base);
+  rtx offset_rtx = expand_normal (offset);
+  HOST_WIDE_INT scale_int = tree_to_shwi (scale);
+  rtx rhs_rtx = expand_normal (rhs);
+
+  struct expand_operand ops[6];
+  int i = 0;
+  create_address_operand (&ops[i++], base_rtx);
+  create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
+  create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
+  create_integer_operand (&ops[i++], scale_int);
+  create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs)));
+  if (mask_index >= 0)
+    {
+      tree mask = gimple_call_arg (stmt, mask_index);
+      rtx mask_rtx = expand_normal (mask);
+      create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask)));
+    }
+
+  insn_code icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)));
+  expand_insn (icode, i, ops);
+}
+
  /* Expand {MASK_,}GATHER_LOAD call CALL using optab OPTAB.  */
  
  static void
@@ -3016,6 +3053,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
  #define direct_mask_store_optab_supported_p direct_optab_supported_p
  #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
  #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
+#define direct_scatter_store_optab_supported_p direct_optab_supported_p
  #define direct_while_optab_supported_p convert_optab_supported_p
  #define direct_fold_extract_optab_supported_p direct_optab_supported_p
  #define direct_fold_left_optab_supported_p direct_optab_supported_p
@@ -3202,6 +3240,25 @@ internal_load_fn_p (internal_fn fn)
      }
  }
  
+/* Return true if IFN is some form of store to memory.  */
+
+bool
+internal_store_fn_p (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_MASK_STORE:
+    case IFN_STORE_LANES:
+    case IFN_MASK_STORE_LANES:
+    case IFN_SCATTER_STORE:
+    case IFN_MASK_SCATTER_STORE:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
  /* Return true if IFN is some form of gather load or scatter store.  */
  
  bool
@@ -3211,6 +3268,8 @@ internal_gather_scatter_fn_p (internal_fn fn)
      {
      case IFN_GATHER_LOAD:
      case IFN_MASK_GATHER_LOAD:
+    case IFN_SCATTER_STORE:
+    case IFN_MASK_SCATTER_STORE:
        return true;
  
      default:
@@ -3235,6 +3294,27 @@ internal_fn_mask_index (internal_fn fn)
      case IFN_MASK_GATHER_LOAD:
        return 3;
  
+    case IFN_MASK_SCATTER_STORE:
+      return 4;
+
+    default:
+      return -1;
+    }
+}
+
+/* If FN takes a value that should be stored to memory, return the index
+   of that argument, otherwise return -1.  */
+
+int
+internal_fn_stored_value_index (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_MASK_STORE:
+    case IFN_SCATTER_STORE:
+    case IFN_MASK_SCATTER_STORE:
+      return 3;
+
      default:
        return -1;
      }
@@ -3259,9 +3339,12 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
      return false;
    optab optab = direct_internal_fn_optab (ifn);
    insn_code icode = direct_optab_handler (optab, TYPE_MODE (vector_type));
+  int output_ops = internal_load_fn_p (ifn) ? 1 : 0;
    return (icode != CODE_FOR_nothing
-         && insn_operand_matches (icode, 3, GEN_INT (offset_sign == UNSIGNED))
-         && insn_operand_matches (icode, 4, GEN_INT (scale)));
+         && insn_operand_matches (icode, 2 + output_ops,
+                                  GEN_INT (offset_sign == UNSIGNED))
+         && insn_operand_matches (icode, 3 + output_ops,
+                                  GEN_INT (scale)));
  }
  
  /* Expand STMT as though it were a call to internal function FN.  */
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def

index db81b83eb5b4120a65f1260f2a5583e81784bedd..5970d0e472cf685b71ec748a17c37940f4d11927 100644 (file)
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
     - mask_store: currently just maskstore
     - store_lanes: currently just vec_store_lanes
     - mask_store_lanes: currently just vec_mask_store_lanes
+   - scatter_store: used for {mask_,}scatter_store
  
     - unary: a normal unary optab, such as vec_reverse_<mode>
     - binary: a normal binary optab, such as vec_interleave_lo_<mode>
@@ -123,6 +124,10 @@ DEF_INTERNAL_OPTAB_FN (GATHER_LOAD, ECF_PURE, gather_load, gather_load)
  DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE,
                        mask_gather_load, gather_load)
  
+DEF_INTERNAL_OPTAB_FN (SCATTER_STORE, 0, scatter_store, scatter_store)
+DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0,
+                      mask_scatter_store, scatter_store)
+
  DEF_INTERNAL_OPTAB_FN (MASK_STORE, 0, maskstore, mask_store)
  DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
  DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h

index c536d1ff5cb6ae26732e152e578472ab7eeb3aee..67102fdad76d7301cc959c0043b712f01d524219 100644 (file)
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -193,8 +193,10 @@ extern bool set_edom_supported_p (void);
  extern internal_fn get_conditional_internal_fn (tree_code);
  
  extern bool internal_load_fn_p (internal_fn);
+extern bool internal_store_fn_p (internal_fn);
  extern bool internal_gather_scatter_fn_p (internal_fn);
  extern int internal_fn_mask_index (internal_fn);
+extern int internal_fn_stored_value_index (internal_fn);
  extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
                                                     tree, signop, int);
  
diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c

index b5c7a76e67e415a4ae23a0991547a7ddae51b957..a8e10e6deccb85c08c032b0f88115341bc638e35 100644 (file)
--- a/gcc/optabs-query.c
+++ b/gcc/optabs-query.c
@@ -711,3 +711,21 @@ supports_vec_gather_load_p ()
  
    return this_fn_optabs->supports_vec_gather_load;
  }
+
+/* Return true if vec_scatter_store is available for at least one vector
+   mode.  */
+
+bool
+supports_vec_scatter_store_p ()
+{
+  if (this_fn_optabs->supports_vec_scatter_store_cached)
+    return this_fn_optabs->supports_vec_scatter_store;
+
+  this_fn_optabs->supports_vec_scatter_store_cached = true;
+
+  this_fn_optabs->supports_vec_scatter_store
+    = supports_at_least_one_mode_p (scatter_store_optab);
+
+  return this_fn_optabs->supports_vec_scatter_store;
+}
+
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h

index 2b02d07b1be98a37d736ea1bf928f0e91939f353..a2574bb75faf23a852d2074e7ea5a30c528e4ff2 100644 (file)
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -192,6 +192,7 @@ bool can_atomic_exchange_p (machine_mode, bool);
  bool can_atomic_load_p (machine_mode);
  bool lshift_cheap_p (bool);
  bool supports_vec_gather_load_p ();
+bool supports_vec_scatter_store_p ();
  
  /* Version of find_widening_optab_handler_and_mode that operates on
     specific mode types.  */
diff --git a/gcc/optabs.def b/gcc/optabs.def

index 532cf9789ba2dd3652ba69ba0719de2abccbfe19..2c30f0eb39668f9e4c678305f754f3d1a3954bca 100644 (file)
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -386,6 +386,8 @@ OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
  
  OPTAB_D (gather_load_optab, "gather_load$a")
  OPTAB_D (mask_gather_load_optab, "mask_gather_load$a")
+OPTAB_D (scatter_store_optab, "scatter_store$a")
+OPTAB_D (mask_scatter_store_optab, "mask_scatter_store$a")
  
  OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE)
  OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 20d84c268403ed1653a35ef748834d2be270b59e..995503780e04ee73188a6d27a3a80511d62a49ea 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,30 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * lib/target-supports.exp (check_effective_target_vect_scatter_store):
+       New proc.
+       * gcc.dg/vect/pr25413a.c: Expect both loops to be optimized on
+       targets with scatter stores.
+       * gcc.dg/vect/vect-71.c: Restrict XFAIL to targets without scatter
+       stores.
+       * gcc.target/aarch64/sve/mask_scatter_store_1.c: New test.
+       * gcc.target/aarch64/sve/mask_scatter_store_2.c: Likewise.
+       * gcc.target/aarch64/sve/scatter_store_1.c: Likewise.
+       * gcc.target/aarch64/sve/scatter_store_2.c: Likewise.
+       * gcc.target/aarch64/sve/scatter_store_3.c: Likewise.
+       * gcc.target/aarch64/sve/scatter_store_4.c: Likewise.
+       * gcc.target/aarch64/sve/scatter_store_5.c: Likewise.
+       * gcc.target/aarch64/sve/scatter_store_6.c: Likewise.
+       * gcc.target/aarch64/sve/scatter_store_7.c: Likewise.
+       * gcc.target/aarch64/sve/strided_store_1.c: Likewise.
+       * gcc.target/aarch64/sve/strided_store_2.c: Likewise.
+       * gcc.target/aarch64/sve/strided_store_3.c: Likewise.
+       * gcc.target/aarch64/sve/strided_store_4.c: Likewise.
+       * gcc.target/aarch64/sve/strided_store_5.c: Likewise.
+       * gcc.target/aarch64/sve/strided_store_6.c: Likewise.
+       * gcc.target/aarch64/sve/strided_store_7.c: Likewise.
+
  2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
             Alan Hayward  <alan.hayward@arm.com>
             David Sherwood  <david.sherwood@arm.com>
diff --git a/gcc/testsuite/gcc.dg/vect/pr25413a.c b/gcc/testsuite/gcc.dg/vect/pr25413a.c

index a80ca8681128e6358035c88e206abab015b39e8b..e444b2c3e8ee24c4374453c95fb4e892323a3897 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/pr25413a.c
+++ b/gcc/testsuite/gcc.dg/vect/pr25413a.c
@@ -123,6 +123,7 @@ int main (void)
    return 0;
  } 
  
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_scatter_store } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect_scatter_store } } } */
  /* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" { target { ! vector_alignment_reachable  } } } } */
  /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { ! vector_alignment_reachable } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-71.c b/gcc/testsuite/gcc.dg/vect/vect-71.c

index 2d1a3ffd0adb9d44d2889ebeb26171285cbaab76..f15521176df55bef45fde880221bdfcd7aa17dbc 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-71.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-71.c
@@ -36,4 +36,4 @@ int main (void)
    return main1 ();
  }
  
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_scatter_store } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_scatter_store_1.c

new file mode 100644 (file)

index 0000000..c799943
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_scatter_store_1.c
@@ -0,0 +1,51 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS)                           \
+  void                                                                 \
+  f_##DATA_TYPE##_##CMP_TYPE                                           \
+    (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,                        \
+     CMP_TYPE *restrict cmp1, CMP_TYPE *restrict cmp2,                         \
+     INDEX##BITS *restrict indices, int n)                             \
+  {                                                                    \
+    for (int i = 0; i < n; ++i)                                                \
+      if (cmp1[i] == cmp2[i])                                          \
+       dest[indices[i]] = src[i] + 1;                                  \
+  }
+
+#define TEST32(T, DATA_TYPE)           \
+  T (DATA_TYPE, int32_t, 32)           \
+  T (DATA_TYPE, uint32_t, 32)          \
+  T (DATA_TYPE, float, 32)
+
+#define TEST64(T, DATA_TYPE)           \
+  T (DATA_TYPE, int64_t, 64)           \
+  T (DATA_TYPE, uint64_t, 64)          \
+  T (DATA_TYPE, double, 64)
+
+#define TEST_ALL(T)                    \
+  TEST32 (T, int32_t)                  \
+  TEST32 (T, uint32_t)                 \
+  TEST32 (T, float)                    \
+  TEST64 (T, int64_t)                  \
+  TEST64 (T, uint64_t)                 \
+  TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_scatter_store_2.c

new file mode 100644 (file)

index 0000000..ba8e671
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_scatter_store_2.c
@@ -0,0 +1,17 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "mask_scatter_store_1.c"
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c

new file mode 100644 (file)

index 0000000..65be5e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c
@@ -0,0 +1,31 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS)                                     \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,    \
+                INDEX##BITS *indices, int n)                           \
+  {                                                                    \
+    for (int i = 9; i < n; ++i)                                                \
+      dest[indices[i]] = src[i] + 1;                                   \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int32_t, 32)                              \
+  T (uint32_t, 32)                             \
+  T (float, 32)                                        \
+  T (int64_t, 64)                              \
+  T (uint64_t, 64)                             \
+  T (double, 64)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c

new file mode 100644 (file)

index 0000000..5cb507c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "scatter_store_1.c"
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c

new file mode 100644 (file)

index 0000000..faa85df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c
@@ -0,0 +1,32 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+/* Invoked 18 times for each data size.  */
+#define TEST_LOOP(DATA_TYPE, BITS)                                     \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,    \
+                INDEX##BITS *indices, int n)                           \
+  {                                                                    \
+    for (int i = 9; i < n; ++i)                                                \
+      *(DATA_TYPE *) ((char *) dest + indices[i]) = src[i] + 1;                \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int32_t, 32)                              \
+  T (uint32_t, 32)                             \
+  T (float, 32)                                        \
+  T (int64_t, 64)                              \
+  T (uint64_t, 64)                             \
+  T (double, 64)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c

new file mode 100644 (file)

index 0000000..8dff57c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "scatter_store_3.c"
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c

new file mode 100644 (file)

index 0000000..0962a72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c
@@ -0,0 +1,23 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+/* Invoked 18 times for each data size.  */
+#define TEST_LOOP(DATA_TYPE)                                           \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE (DATA_TYPE *restrict *dest, DATA_TYPE *restrict src,   \
+                int n)                                                 \
+  {                                                                    \
+    for (int i = 9; i < n; ++i)                                                \
+      *dest[i] = src[i] + 1;                                           \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int64_t)                                  \
+  T (uint64_t)                                 \
+  T (double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_6.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_6.c

new file mode 100644 (file)

index 0000000..ee31562
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_6.c
@@ -0,0 +1,36 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#endif
+
+/* Invoked 18 times for each data size.  */
+#define TEST_LOOP(DATA_TYPE, BITS)                                     \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,    \
+                INDEX##BITS *indices, INDEX##BITS mask, int n)         \
+  {                                                                    \
+    for (int i = 9; i < n; ++i)                                                \
+      dest[(INDEX##BITS) (indices[i] | mask)] = src[i] + 1;            \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int32_t, 16)                              \
+  T (uint32_t, 16)                             \
+  T (float, 16)                                        \
+  T (int64_t, 32)                              \
+  T (uint64_t, 32)                             \
+  T (double, 32)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_7.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_7.c

new file mode 100644 (file)

index 0000000..784921e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_7.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+
+#include "scatter_store_6.c"
+
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* Either extension type is OK here.  */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, [us]xtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_1.c

new file mode 100644 (file)

index 0000000..4cd55ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_1.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX8
+#define INDEX8 int8_t
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS)                             \
+  void __attribute__ ((noinline, noclone))                     \
+  f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest,            \
+                         DATA_TYPE *restrict src,              \
+                         INDEX##BITS stride, INDEX##BITS n)    \
+  {                                                            \
+    for (INDEX##BITS i = 0; i < n; ++i)                                \
+      dest[i * stride] = src[i] + 1;                           \
+  }
+
+#define TEST_TYPE(T, DATA_TYPE)                        \
+  T (DATA_TYPE, 8)                             \
+  T (DATA_TYPE, 16)                            \
+  T (DATA_TYPE, 32)                            \
+  T (DATA_TYPE, 64)
+
+#define TEST_ALL(T)                            \
+  TEST_TYPE (T, int32_t)                       \
+  TEST_TYPE (T, uint32_t)                      \
+  TEST_TYPE (T, float)                         \
+  TEST_TYPE (T, int64_t)                       \
+  TEST_TYPE (T, uint64_t)                      \
+  TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_2.c

new file mode 100644 (file)

index 0000000..f0ea58e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_2.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define INDEX8 uint8_t
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "strided_store_1.c"
+
+/* 8 and 16 bits are signed because the multiplication promotes to int.
+   Using uxtw for all 9 would be OK.  */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* The 32-bit loop needs to honor the defined overflow in uint32_t,
+   so we vectorize the offset calculation.  This means that the
+   64-bit version needs two copies.  */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_3.c

new file mode 100644 (file)

index 0000000..68835af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_3.c
@@ -0,0 +1,33 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, OTHER_TYPE)                               \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest,                    \
+                         DATA_TYPE *restrict src,                      \
+                         OTHER_TYPE *restrict other,                   \
+                         OTHER_TYPE mask,                              \
+                         int stride, int n)                            \
+  {                                                                    \
+    for (int i = 0; i < n; ++i)                                                \
+      dest[i * stride] = src[i] + (OTHER_TYPE) (other[i] | mask);      \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int32_t, int16_t)                         \
+  T (uint32_t, int16_t)                                \
+  T (float, int16_t)                           \
+  T (int64_t, int32_t)                         \
+  T (uint64_t, int32_t)                                \
+  T (double, int32_t)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_4.c

new file mode 100644 (file)

index 0000000..48d83a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_4.c
@@ -0,0 +1,33 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE)                      \
+  void __attribute__ ((noinline, noclone))                     \
+  f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest,            \
+                         DATA_TYPE *restrict src, int n)       \
+  {                                                            \
+    for (int i = 0; i < n; ++i)                                        \
+      dest[i * SCALE] = src[i] + 1;                            \
+  }
+
+#define TEST_TYPE(T, DATA_TYPE)                        \
+  T (DATA_TYPE, 5, 5)                          \
+  T (DATA_TYPE, 7, 7)                          \
+  T (DATA_TYPE, 11, 11)                                \
+  T (DATA_TYPE, 200, 200)                      \
+  T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T)                            \
+  TEST_TYPE (T, int32_t)                       \
+  TEST_TYPE (T, uint32_t)                      \
+  TEST_TYPE (T, float)                         \
+  TEST_TYPE (T, int64_t)                       \
+  TEST_TYPE (T, uint64_t)                      \
+  TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_5.c

new file mode 100644 (file)

index 0000000..ea7756e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_5.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE)                      \
+  void __attribute__ ((noinline, noclone))                     \
+  f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest,            \
+                         DATA_TYPE *restrict src, long n)      \
+  {                                                            \
+    for (long i = 0; i < n; ++i)                               \
+      dest[i * SCALE] = src[i] + 1;                            \
+  }
+
+#define TEST_TYPE(T, DATA_TYPE)                        \
+  T (DATA_TYPE, 5, 5)                          \
+  T (DATA_TYPE, 7, 7)                          \
+  T (DATA_TYPE, 11, 11)                                \
+  T (DATA_TYPE, 200, 200)                      \
+  T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T)                            \
+  TEST_TYPE (T, int32_t)                       \
+  TEST_TYPE (T, uint32_t)                      \
+  TEST_TYPE (T, float)                         \
+  TEST_TYPE (T, int64_t)                       \
+  TEST_TYPE (T, uint64_t)                      \
+  TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_store_6.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_6.c

new file mode 100644 (file)

index 0000000..111d525
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_6.c
@@ -0,0 +1,7 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable --save-temps" } */
+
+#include "strided_store_5.c"
+
+/* { dg-final { scan-assembler-not {\[x[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_store_7.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_7.c

new file mode 100644 (file)

index 0000000..9f7ce83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/strided_store_7.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE)                      \
+  void __attribute__ ((noinline, noclone))                     \
+  f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest,            \
+                         DATA_TYPE *restrict src)              \
+  {                                                            \
+    for (long i = 0; i < 1000; ++i)                            \
+      dest[i * SCALE] = src[i] + 1;                            \
+  }
+
+#define TEST_TYPE(T, DATA_TYPE)                        \
+  T (DATA_TYPE, 5, 5)                          \
+  T (DATA_TYPE, 7, 7)                          \
+  T (DATA_TYPE, 11, 11)                                \
+  T (DATA_TYPE, 200, 200)                      \
+  T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T)                            \
+  TEST_TYPE (T, int32_t)                       \
+  TEST_TYPE (T, uint32_t)                      \
+  TEST_TYPE (T, float)                         \
+  TEST_TYPE (T, int64_t)                       \
+  TEST_TYPE (T, uint64_t)                      \
+  TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index 850260f0ee08b073082f16e23ff041d107ccf950..98a5eb75cbe42fbedde1c64b629c5498b971b5f8 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6600,6 +6600,12 @@ proc check_effective_target_vect_masked_store { } {
      return [check_effective_target_aarch64_sve]
  }
  
+# Return 1 if the target supports vector scatter stores.
+
+proc check_effective_target_vect_scatter_store { } {
+    return [check_effective_target_aarch64_sve]
+}
+
  # Return 1 if the target supports vector conditional operations, 0 otherwise.
  
  proc check_effective_target_vect_condition { } {
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c

index daa8b0c36d3fea7d0ffb5831c76f65556efddcd5..c6bfe453fd84091a177baac248fd728b80304903 100644 (file)
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2648,6 +2648,9 @@ vect_analyze_data_ref_access (struct data_reference *dr)
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
    struct loop *loop = NULL;
  
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+    return true;
+
    if (loop_vinfo)
      loop = LOOP_VINFO_LOOP (loop_vinfo);
  
@@ -3336,7 +3339,7 @@ vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype,
    if (read_p)
      ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
    else
-    return false;
+    ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
  
    /* Test whether the target supports this combination.  */
    if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
@@ -3408,7 +3411,8 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
    /* True if we should aim to use internal functions rather than
       built-in functions.  */
    bool use_ifn_p = (DR_IS_READ (dr)
-                   && supports_vec_gather_load_p ());
+                   ? supports_vec_gather_load_p ()
+                   : supports_vec_scatter_store_p ());
  
    base = DR_REF (dr);
    /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
@@ -3727,7 +3731,8 @@ again:
           bool maybe_scatter
             = DR_IS_WRITE (dr)
               && !TREE_THIS_VOLATILE (DR_REF (dr))
-             && targetm.vectorize.builtin_scatter != NULL;
+             && (targetm.vectorize.builtin_scatter != NULL
+                 || supports_vec_scatter_store_p ());
           bool maybe_simd_lane_access
             = is_a <loop_vec_info> (vinfo) && loop->simduid;
  
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c

index f4b1b3e1ce92f82f0cbc4222357821cbd54d5dc9..0831b7e6978b4655df84b62aed55e23ce174cf03 100644 (file)
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -4235,10 +4235,6 @@ vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
    if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
      return NULL;
  
-  /* Reject stores for now.  */
-  if (!DR_IS_READ (dr))
-    return NULL;
-
    /* Get the boolean that controls whether the load or store happens.
       This is null if the operation is unconditional.  */
    tree mask = vect_get_load_store_mask (stmt);
@@ -4278,8 +4274,16 @@ vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
        gimple_call_set_lhs (pattern_stmt, load_lhs);
      }
    else
-    /* Not yet supported.  */
-    gcc_unreachable ();
+    {
+      tree rhs = vect_get_store_rhs (stmt);
+      if (mask != NULL)
+       pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
+                                                  base, offset, scale, rhs,
+                                                  mask);
+      else
+       pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
+                                                  base, offset, scale, rhs);
+    }
    gimple_call_set_nothrow (pattern_stmt, true);
  
    /* Copy across relevant vectorization info and associate DR with the
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c

index df588347789184bca1a726d469561f379048446e..0f74772fe7e16964c8c04a5b49b966dd0e3b5173 100644 (file)
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -397,12 +397,13 @@ exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
           if (mask_index >= 0
               && use == gimple_call_arg (stmt, mask_index))
             return true;
+         int stored_value_index = internal_fn_stored_value_index (ifn);
+         if (stored_value_index >= 0
+             && use == gimple_call_arg (stmt, stored_value_index))
+           return true;
           if (internal_gather_scatter_fn_p (ifn)
               && use == gimple_call_arg (stmt, 1))
             return true;
-         if (ifn == IFN_MASK_STORE
-             && use == gimple_call_arg (stmt, 3))
-           return true;
         }
        return false;
      }
@@ -1765,10 +1766,11 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
  
    if (memory_access_type == VMAT_GATHER_SCATTER)
      {
-      gcc_assert (is_load);
+      internal_fn ifn = (is_load
+                        ? IFN_MASK_GATHER_LOAD
+                        : IFN_MASK_SCATTER_STORE);
        tree offset_type = TREE_TYPE (gs_info->offset);
-      if (!internal_gather_scatter_fn_supported_p (IFN_MASK_GATHER_LOAD,
-                                                  vectype,
+      if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
                                                    gs_info->memory_type,
                                                    TYPE_SIGN (offset_type),
                                                    gs_info->scale))
@@ -1777,7 +1779,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                              "can't use a fully-masked loop because the"
                              " target doesn't have an appropriate masked"
-                            " gather load instruction.\n");
+                            " gather load or scatter store instruction.\n");
           LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
           return;
         }
@@ -2021,7 +2023,7 @@ perm_mask_for_reverse (tree vectype)
  /* STMT is either a masked or unconditional store.  Return the value
     being stored.  */
  
-static tree
+tree
  vect_get_store_rhs (gimple *stmt)
  {
    if (gassign *assign = dyn_cast <gassign *> (stmt))
@@ -2032,8 +2034,9 @@ vect_get_store_rhs (gimple *stmt)
    if (gcall *call = dyn_cast <gcall *> (stmt))
      {
        internal_fn ifn = gimple_call_internal_fn (call);
-      gcc_assert (ifn == IFN_MASK_STORE);
-      return gimple_call_arg (stmt, 3);
+      int index = internal_fn_stored_value_index (ifn);
+      gcc_assert (index >= 0);
+      return gimple_call_arg (stmt, index);
      }
    gcc_unreachable ();
  }
@@ -3023,7 +3026,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
  
    if (gimple_call_internal_p (stmt)
        && (internal_load_fn_p (gimple_call_internal_fn (stmt))
-         || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
+         || internal_store_fn_p (gimple_call_internal_fn (stmt))))
      /* Handled by vectorizable_load and vectorizable_store.  */
      return false;
  
@@ -6109,7 +6112,11 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
    else
      {
        gcall *call = dyn_cast <gcall *> (stmt);
-      if (!call || !gimple_call_internal_p (call, IFN_MASK_STORE))
+      if (!call || !gimple_call_internal_p (call))
+       return false;
+
+      internal_fn ifn = gimple_call_internal_fn (call);
+      if (!internal_store_fn_p (ifn))
         return false;
  
        if (slp_node != NULL)
@@ -6120,10 +6127,13 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
           return false;
         }
  
-      ref_type = TREE_TYPE (gimple_call_arg (call, 1));
-      mask = gimple_call_arg (call, 2);
-      if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
-       return false;
+      int mask_index = internal_fn_mask_index (ifn);
+      if (mask_index >= 0)
+       {
+         mask = gimple_call_arg (call, mask_index);
+         if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
+           return false;
+       }
      }
  
    op = vect_get_store_rhs (stmt);
@@ -6185,7 +6195,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                                              TYPE_MODE (mask_vectype), false))
             return false;
         }
-      else if (memory_access_type != VMAT_LOAD_STORE_LANES)
+      else if (memory_access_type != VMAT_LOAD_STORE_LANES
+              && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
         {
           if (dump_enabled_p ())
             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6201,7 +6212,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
         return false;
      }
  
-  grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
+  grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+                  && memory_access_type != VMAT_GATHER_SCATTER);
    if (grouped_store)
      {
        first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
@@ -6237,7 +6249,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
  
    ensure_base_align (dr);
  
-  if (memory_access_type == VMAT_GATHER_SCATTER)
+  if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
      {
        tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
        tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
@@ -6387,10 +6399,14 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
        return true;
      }
  
-  if (grouped_store)
+  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
      {
-      GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
+      gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
+      GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
+    }
  
+  if (grouped_store)
+    {
        /* FORNOW */
        gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
  
@@ -6690,10 +6706,27 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
        || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
      offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
  
-  if (memory_access_type == VMAT_LOAD_STORE_LANES)
-    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
+  tree bump;
+  tree vec_offset = NULL_TREE;
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+    {
+      aggr_type = NULL_TREE;
+      bump = NULL_TREE;
+    }
+  else if (memory_access_type == VMAT_GATHER_SCATTER)
+    {
+      aggr_type = elem_type;
+      vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
+                                      &bump, &vec_offset);
+    }
    else
-    aggr_type = vectype;
+    {
+      if (memory_access_type == VMAT_LOAD_STORE_LANES)
+       aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
+      else
+       aggr_type = vectype;
+      bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
+    }
  
    if (mask)
      LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
@@ -6798,12 +6831,19 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
               dataref_offset = build_int_cst (ref_type, 0);
               inv_p = false;
             }
+         else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+           {
+             vect_get_gather_scatter_ops (loop, stmt, &gs_info,
+                                          &dataref_ptr, &vec_offset);
+             inv_p = false;
+           }
           else
             dataref_ptr
               = vect_create_data_ref_ptr (first_stmt, aggr_type,
                                           simd_lane_access_p ? loop : NULL,
                                           offset, &dummy, gsi, &ptr_incr,
-                                         simd_lane_access_p, &inv_p);
+                                         simd_lane_access_p, &inv_p,
+                                         NULL_TREE, bump);
           gcc_assert (bb_vinfo || !inv_p);
         }
        else
@@ -6830,11 +6870,17 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
             }
           if (dataref_offset)
             dataref_offset
-             = int_const_binop (PLUS_EXPR, dataref_offset,
-                                TYPE_SIZE_UNIT (aggr_type));
+             = int_const_binop (PLUS_EXPR, dataref_offset, bump);
+         else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+           {
+             gimple *def_stmt;
+             vect_def_type dt;
+             vect_is_simple_use (vec_offset, loop_vinfo, &def_stmt, &dt);
+             vec_offset = vect_get_vec_def_for_stmt_copy (dt, vec_offset);
+           }
           else
             dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
-                                          TYPE_SIZE_UNIT (aggr_type));
+                                          bump);
         }
  
        if (memory_access_type == VMAT_LOAD_STORE_LANES)
@@ -6906,10 +6952,28 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
                                                       vec_mask, gsi);
  
+             if (memory_access_type == VMAT_GATHER_SCATTER)
+               {
+                 tree scale = size_int (gs_info.scale);
+                 gcall *call;
+                 if (masked_loop_p)
+                   call = gimple_build_call_internal
+                     (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
+                      scale, vec_oprnd, final_mask);
+                 else
+                   call = gimple_build_call_internal
+                     (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
+                      scale, vec_oprnd);
+                 gimple_call_set_nothrow (call, true);
+                 new_stmt = call;
+                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                 break;
+               }
+
               if (i > 0)
                 /* Bump the vector pointer.  */
                 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
-                                              stmt, NULL_TREE);
+                                              stmt, bump);
  
               if (slp)
                 vec_oprnd = vec_oprnds[i];
@@ -9407,9 +9471,11 @@ vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
              one are skipped, and there vec_stmt_info shouldn't be freed
              meanwhile.  */
           *grouped_store = true;
-         if (STMT_VINFO_VEC_STMT (stmt_info))
+         stmt_vec_info group_info
+           = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
+         if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
             is_store = true;
-         }
+       }
        else
         is_store = true;
        break;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

index 903e56e3431b147d0133b6c90f07f14b98ddd141..d3dda52a041a87367a41b60987ddfe1dc37ba4ef 100644 (file)
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1413,6 +1413,7 @@ extern void vect_finish_replace_stmt (gimple *, gimple *);
  extern void vect_finish_stmt_generation (gimple *, gimple *,
                                           gimple_stmt_iterator *);
  extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
+extern tree vect_get_store_rhs (gimple *);
  extern tree vect_get_vec_def_for_operand_1 (gimple *, enum vect_def_type);
  extern tree vect_get_vec_def_for_operand (tree, gimple *, tree = NULL);
  extern void vect_get_vec_defs (tree, tree, gimple *, vec<tree> *,
author	Richard Sandiford <richard.sandiford@linaro.org>
	Sat, 13 Jan 2018 18:01:59 +0000 (18:01 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Sat, 13 Jan 2018 18:01:59 +0000 (18:01 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| history
gcc/config/aarch64/aarch64.md		patch \| blob \| history
gcc/doc/md.texi		patch \| blob \| history
gcc/doc/sourcebuild.texi		patch \| blob \| history
gcc/genopinit.c		patch \| blob \| history
gcc/gimple.h		patch \| blob \| history
gcc/internal-fn.c		patch \| blob \| history
gcc/internal-fn.def		patch \| blob \| history
gcc/internal-fn.h		patch \| blob \| history
gcc/optabs-query.c		patch \| blob \| history
gcc/optabs-query.h		patch \| blob \| history
gcc/optabs.def		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/pr25413a.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-71.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/mask_scatter_store_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/mask_scatter_store_2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/scatter_store_6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/scatter_store_7.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/strided_store_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/strided_store_2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/strided_store_3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/strided_store_4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/strided_store_5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/strided_store_6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/strided_store_7.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/lib/target-supports.exp		patch \| blob \| history
gcc/tree-vect-data-refs.c		patch \| blob \| history
gcc/tree-vect-patterns.c		patch \| blob \| history
gcc/tree-vect-stmts.c		patch \| blob \| history
gcc/tree-vectorizer.h		patch \| blob \| history