Add optabs for accelerating RAW and WAR alias checks
authorRichard Sandiford <richard.sandiford@arm.com>
Mon, 18 Nov 2019 15:36:10 +0000 (15:36 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Mon, 18 Nov 2019 15:36:10 +0000 (15:36 +0000)
This patch adds optabs that check whether a read followed by a write
or a write followed by a read can be divided into interleaved byte
accesses without changing the dependencies between the bytes.
This is one of the uses of the SVE2 WHILERW and WHILEWR instructions.
(The instructions can also be used to limit the VF at runtime,
but that's future work.)

2019-11-18  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* doc/sourcebuild.texi (vect_check_ptrs): Document.
* optabs.def (check_raw_ptrs_optab, check_war_ptrs_optab): New optabs.
* doc/md.texi: Document them.
* internal-fn.def (IFN_CHECK_RAW_PTRS, IFN_CHECK_WAR_PTRS): New
internal functions.
* internal-fn.h (internal_check_ptrs_fn_supported_p): Declare.
* internal-fn.c (check_ptrs_direct): New macro.
(expand_check_ptrs_optab_fn): Likewise.
(direct_check_ptrs_optab_supported_p): Likewise.
(internal_check_ptrs_fn_supported_p): New fuction.
* tree-data-ref.c: Include internal-fn.h.
(create_ifn_alias_checks): New function.
(create_intersect_range_checks): Use it.
* config/aarch64/iterators.md (SVE2_WHILE_PTR): New int iterator.
(optab, cmp_op): Handle it.
(raw_war, unspec): New int attributes.
* config/aarch64/aarch64.md (UNSPEC_WHILERW, UNSPEC_WHILE_WR): New
constants.
* config/aarch64/predicates.md (aarch64_bytes_per_sve_vector_operand):
New predicate.
* config/aarch64/aarch64-sve2.md (check_<raw_war>_ptrs<mode>): New
expander.
(@aarch64_sve2_while<cmp_op><GPI:mode><PRED_ALL:mode>_ptest): New
pattern.

gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_check_ptrs):
New procedure.
* gcc.dg/vect/vect-alias-check-14.c: Expect IFN_CHECK_WAR to be
used, if available.
* gcc.dg/vect/vect-alias-check-15.c: Likewise.
* gcc.dg/vect/vect-alias-check-16.c: Likewise IFN_CHECK_RAW.
* gcc.target/aarch64/sve2/whilerw_1.c: New test.
* gcc.target/aarch64/sve2/whilewr_1.c: Likewise.
* gcc.target/aarch64/sve2/whilewr_2.c: Likewise.

From-SVN: r278414

20 files changed:
gcc/ChangeLog
gcc/config/aarch64/aarch64-sve2.md
gcc/config/aarch64/aarch64.md
gcc/config/aarch64/iterators.md
gcc/config/aarch64/predicates.md
gcc/doc/md.texi
gcc/doc/sourcebuild.texi
gcc/internal-fn.c
gcc/internal-fn.def
gcc/internal-fn.h
gcc/optabs.def
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c
gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c [new file with mode: 0644]
gcc/testsuite/lib/target-supports.exp
gcc/tree-data-ref.c

index be2fac770ee7f5e91d78c8cefdf552848af6f7d1..c57e8c4008497bd3d23dae259ab7483ddb356895 100644 (file)
@@ -1,3 +1,30 @@
+2019-11-18  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * doc/sourcebuild.texi (vect_check_ptrs): Document.
+       * optabs.def (check_raw_ptrs_optab, check_war_ptrs_optab): New optabs.
+       * doc/md.texi: Document them.
+       * internal-fn.def (IFN_CHECK_RAW_PTRS, IFN_CHECK_WAR_PTRS): New
+       internal functions.
+       * internal-fn.h (internal_check_ptrs_fn_supported_p): Declare.
+       * internal-fn.c (check_ptrs_direct): New macro.
+       (expand_check_ptrs_optab_fn): Likewise.
+       (direct_check_ptrs_optab_supported_p): Likewise.
+       (internal_check_ptrs_fn_supported_p): New fuction.
+       * tree-data-ref.c: Include internal-fn.h.
+       (create_ifn_alias_checks): New function.
+       (create_intersect_range_checks): Use it.
+       * config/aarch64/iterators.md (SVE2_WHILE_PTR): New int iterator.
+       (optab, cmp_op): Handle it.
+       (raw_war, unspec): New int attributes.
+       * config/aarch64/aarch64.md (UNSPEC_WHILERW, UNSPEC_WHILE_WR): New
+       constants.
+       * config/aarch64/predicates.md (aarch64_bytes_per_sve_vector_operand):
+       New predicate.
+       * config/aarch64/aarch64-sve2.md (check_<raw_war>_ptrs<mode>): New
+       expander.
+       (@aarch64_sve2_while<cmp_op><GPI:mode><PRED_ALL:mode>_ptest): New
+       pattern.
+
 2019-11-18  Richard Sandiford  <richard.sandiford@arm.com>
 
        * tree.c (build_vector_from_ctor): Directly return a zero vector for
index 15142d1d775038522a653817d23e354044207112..106a9a015abe7d44ffcebcc8b41f29d96f937c81 100644 (file)
   }
   [(set_attr "movprfx" "*,yes")]
 )
+
+;; Use WHILERW and WHILEWR to accelerate alias checks.  This is only
+;; possible if the accesses we're checking are exactly the same size
+;; as an SVE vector.
+(define_expand "check_<raw_war>_ptrs<mode>"
+  [(match_operand:GPI 0 "register_operand")
+   (unspec:VNx16BI
+     [(match_operand:GPI 1 "register_operand")
+      (match_operand:GPI 2 "register_operand")
+      (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand")
+      (match_operand:GPI 4 "const_int_operand")]
+     SVE2_WHILE_PTR)]
+  "TARGET_SVE2"
+{
+  /* Use the widest predicate mode we can.  */
+  unsigned int align = INTVAL (operands[4]);
+  if (align > 8)
+    align = 8;
+  machine_mode pred_mode = aarch64_sve_pred_mode (align).require ();
+
+  /* Emit a WHILERW or WHILEWR, setting the condition codes based on
+     the result.  */
+  emit_insn (gen_aarch64_sve2_while_ptest
+            (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode,
+             gen_rtx_SCRATCH (pred_mode), operands[1], operands[2],
+             CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode)));
+
+  /* Set operand 0 to true if the last bit of the predicate result is set,
+     i.e. if all elements are free of dependencies.  */
+  rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+  rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx);
+  emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg));
+  DONE;
+})
+
+;; A WHILERW or WHILEWR in which only the flags result is interesting.
+(define_insn_and_rewrite "@aarch64_sve2_while<cmp_op><GPI:mode><PRED_ALL:mode>_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+       (unspec:CC_NZC
+         [(match_operand 3)
+          (match_operand 4)
+          (const_int SVE_KNOWN_PTRUE)
+          (unspec:PRED_ALL
+            [(match_operand:GPI 1 "register_operand" "r")
+             (match_operand:GPI 2 "register_operand" "r")]
+            SVE2_WHILE_PTR)]
+         UNSPEC_PTEST))
+   (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
+  "TARGET_SVE2"
+  "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %x1, %x2"
+  ;; Force the compiler to drop the unused predicate operand, so that we
+  ;; don't have an unnecessary PTRUE.
+  "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
+  {
+    operands[3] = CONSTM1_RTX (VNx16BImode);
+    operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+  }
+)
index f19e2272750f7d1aefbd74f2908e629675067443..87e9b9364bdcc783e444bf5ade20b473706367ee 100644 (file)
     UNSPEC_WHILE_LO
     UNSPEC_WHILE_LS
     UNSPEC_WHILE_LT
+    UNSPEC_WHILERW
+    UNSPEC_WHILEWR
     UNSPEC_LDN
     UNSPEC_STN
     UNSPEC_INSR
index bfeebe9b7724bceff5980fc9f42f895d565d67bc..83a0d156e84baf7dde8f9e46eeeca4edfa1f9037 100644 (file)
 (define_int_iterator SVE_WHILE [UNSPEC_WHILE_LE UNSPEC_WHILE_LO
                                UNSPEC_WHILE_LS UNSPEC_WHILE_LT])
 
+(define_int_iterator SVE2_WHILE_PTR [UNSPEC_WHILERW UNSPEC_WHILEWR])
+
 (define_int_iterator SVE_SHIFT_WIDE [UNSPEC_ASHIFT_WIDE
                                     UNSPEC_ASHIFTRT_WIDE
                                     UNSPEC_LSHIFTRT_WIDE])
                        (UNSPEC_FEXPA "fexpa")
                        (UNSPEC_FTSMUL "ftsmul")
                        (UNSPEC_FTSSEL "ftssel")
+                       (UNSPEC_WHILERW "vec_check_raw_alias")
+                       (UNSPEC_WHILEWR "vec_check_war_alias")
                        (UNSPEC_COND_FABS "abs")
                        (UNSPEC_COND_FADD "add")
                        (UNSPEC_COND_FCADD90 "cadd90")
                         (UNSPEC_WHILE_LE "le")
                         (UNSPEC_WHILE_LO "lo")
                         (UNSPEC_WHILE_LS "ls")
-                        (UNSPEC_WHILE_LT "lt")])
+                        (UNSPEC_WHILE_LT "lt")
+                        (UNSPEC_WHILERW "rw")
+                        (UNSPEC_WHILEWR "wr")])
 
 (define_int_attr while_optab_cmp [(UNSPEC_WHILE_LE "le")
                                  (UNSPEC_WHILE_LO "ult")
                                  (UNSPEC_WHILE_LS "ule")
                                  (UNSPEC_WHILE_LT "lt")])
 
+(define_int_attr raw_war [(UNSPEC_WHILERW "raw")
+                         (UNSPEC_WHILEWR "war")])
+
 (define_int_attr brk_op [(UNSPEC_BRKA "a") (UNSPEC_BRKB "b")
                         (UNSPEC_BRKN "n")
                         (UNSPEC_BRKPA "pa") (UNSPEC_BRKPB "pb")])
                                (UNSPEC_REVB "16")
                                (UNSPEC_REVH "32")
                                (UNSPEC_REVW "64")])
+
+(define_int_attr unspec [(UNSPEC_WHILERW "UNSPEC_WHILERW")
+                        (UNSPEC_WHILEWR "UNSPEC_WHILEWR")])
index 2c5c53c716d98036192b9d97f0a4cf99a7c885f0..232361235349c629be962885a133ebfe2dc0279e 100644 (file)
 
 (define_predicate "aarch64_sve_any_binary_operator"
   (match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor"))
+
+(define_predicate "aarch64_bytes_per_sve_vector_operand"
+  (and (match_code "const_int,const_poly_int")
+       (match_test "known_eq (wi::to_poly_wide (op, mode),
+                             BYTES_PER_SVE_VECTOR)")))
index 87bbeb4bfc957a8a06bd67fcef74d9f754e8db3c..0ad4a00739fff54fa95e91e25329a424afcc4b0f 100644 (file)
@@ -5076,6 +5076,37 @@ for (i = 1; i < GET_MODE_NUNITS (@var{n}); i++)
   operand0[i] = operand0[i - 1] && (operand1 + i < operand2);
 @end smallexample
 
+@cindex @code{check_raw_ptrs@var{m}} instruction pattern
+@item @samp{check_raw_ptrs@var{m}}
+Check whether, given two pointers @var{a} and @var{b} and a length @var{len},
+a write of @var{len} bytes at @var{a} followed by a read of @var{len} bytes
+at @var{b} can be split into interleaved byte accesses
+@samp{@var{a}[0], @var{b}[0], @var{a}[1], @var{b}[1], @dots{}}
+without affecting the dependencies between the bytes.  Set operand 0
+to true if the split is possible and false otherwise.
+
+Operands 1, 2 and 3 provide the values of @var{a}, @var{b} and @var{len}
+respectively.  Operand 4 is a constant integer that provides the known
+common alignment of @var{a} and @var{b}.  All inputs have mode @var{m}.
+
+This split is possible if:
+
+@smallexample
+@var{a} == @var{b} || @var{a} + @var{len} <= @var{b} || @var{b} + @var{len} <= @var{a}
+@end smallexample
+
+You should only define this pattern if the target has a way of accelerating
+the test without having to do the individual comparisons.
+
+@cindex @code{check_war_ptrs@var{m}} instruction pattern
+@item @samp{check_war_ptrs@var{m}}
+Like @samp{check_raw_ptrs@var{m}}, but with the read and write swapped round.
+The split is possible in this case if:
+
+@smallexample
+@var{b} <= @var{a} || @var{a} + @var{len} <= @var{b}
+@end smallexample
+
 @cindex @code{vec_cmp@var{m}@var{n}} instruction pattern
 @item @samp{vec_cmp@var{m}@var{n}}
 Output a vector comparison.  Operand 0 of mode @var{n} is the destination for
index f3bf66c44ee82a3f28f1ad638a8cea1b6cc19bf6..a3432bc36704358347f3748cb0f111c36e0b5f7a 100644 (file)
@@ -1487,6 +1487,10 @@ Target supports hardware vectors of @code{long}.
 @item vect_long_long
 Target supports hardware vectors of @code{long long}.
 
+@item vect_check_ptrs
+Target supports the @code{check_raw_ptrs} and @code{check_war_ptrs}
+optabs on vectors.
+
 @item vect_fully_masked
 Target supports fully-masked (also known as fully-predicated) loops,
 so that vector loops can handle partial as well as full vectors.
index 6a878bde24d65c84938ac6e6880aff4efe847784..88d52d2c25da8f37d5965c5efca9cf57ecf2f228 100644 (file)
@@ -118,6 +118,7 @@ init_internal_fns ()
 #define fold_extract_direct { 2, 2, false }
 #define fold_left_direct { 1, 1, false }
 #define mask_fold_left_direct { 1, 1, false }
+#define check_ptrs_direct { 0, 0, false }
 
 const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
@@ -3006,6 +3007,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 #define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \
   expand_direct_optab_fn (FN, STMT, OPTAB, 3)
 
+#define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
+  expand_direct_optab_fn (FN, STMT, OPTAB, 4)
+
 /* RETURN_TYPE and ARGS are a return type and argument list that are
    in principle compatible with FN (which satisfies direct_internal_fn_p).
    Return the types that should be used to determine whether the
@@ -3095,6 +3099,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
 #define direct_fold_left_optab_supported_p direct_optab_supported_p
 #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
+#define direct_check_ptrs_optab_supported_p direct_optab_supported_p
 
 /* Return the optab used by internal function FN.  */
 
@@ -3572,6 +3577,24 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
          && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
 }
 
+/* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
+   for pointers of type TYPE when the accesses have LENGTH bytes and their
+   common byte alignment is ALIGN.  */
+
+bool
+internal_check_ptrs_fn_supported_p (internal_fn ifn, tree type,
+                                   poly_uint64 length, unsigned int align)
+{
+  machine_mode mode = TYPE_MODE (type);
+  optab optab = direct_internal_fn_optab (ifn);
+  insn_code icode = direct_optab_handler (optab, mode);
+  if (icode == CODE_FOR_nothing)
+    return false;
+  rtx length_rtx = immed_wide_int_const (length, mode);
+  return (insn_operand_matches (icode, 3, length_rtx)
+         && insn_operand_matches (icode, 4, GEN_INT (align)));
+}
+
 /* Expand STMT as though it were a call to internal function FN.  */
 
 void
index a9459449fee15b17fe9c8ff1113b0305ed936bff..85f45d660b7dc496b2f4d478128ac05d0a91a25d 100644 (file)
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
    - cond_ternary: a conditional ternary optab, such as cond_fma_rev<mode>
 
    - fold_left: for scalar = FN (scalar, vector), keyed off the vector mode
+   - check_ptrs: used for check_{raw,war}_ptrs
 
    DEF_INTERNAL_SIGNED_OPTAB_FN defines an internal function that
    maps to one of two optabs, depending on the signedness of an input.
@@ -136,6 +137,10 @@ DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
                       vec_mask_store_lanes, mask_store_lanes)
 
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
+DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
+                      check_raw_ptrs, check_ptrs)
+DEF_INTERNAL_OPTAB_FN (CHECK_WAR_PTRS, ECF_CONST | ECF_NOTHROW,
+                      check_war_ptrs, check_ptrs)
 
 DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW,
                       vec_shl_insert, binary)
index 389241a8a0679b991a30ae34d6ca590c4fda515b..a1bc0819915f12a8d80af2e4477b6684f3df1feb 100644 (file)
@@ -221,6 +221,8 @@ extern int internal_fn_mask_index (internal_fn);
 extern int internal_fn_stored_value_index (internal_fn);
 extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
                                                    tree, tree, int);
+extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
+                                               poly_uint64, unsigned int);
 
 extern void expand_internal_call (gcall *);
 extern void expand_internal_call (internal_fn, gcall *);
index 90e177a5cc093fda7dee0274fb27456f04ad8dbd..24d8275000e74b54fc1f9f745a0f60375ed4114b 100644 (file)
@@ -429,6 +429,9 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I$a")
 OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a")
 OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
 
+OPTAB_D (check_raw_ptrs_optab, "check_raw_ptrs$a")
+OPTAB_D (check_war_ptrs_optab, "check_war_ptrs$a")
+
 OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE)
 OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES)
 OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")
index 3a37d94ae1473c84d94316724e0f0ddeb42eb1d1..01e8e2b8a69464ce0459d5354ec774518efe0d54 100644 (file)
@@ -1,3 +1,15 @@
+2019-11-18  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * lib/target-supports.exp (check_effective_target_vect_check_ptrs):
+       New procedure.
+       * gcc.dg/vect/vect-alias-check-14.c: Expect IFN_CHECK_WAR to be
+       used, if available.
+       * gcc.dg/vect/vect-alias-check-15.c: Likewise.
+       * gcc.dg/vect/vect-alias-check-16.c: Likewise IFN_CHECK_RAW.
+       * gcc.target/aarch64/sve2/whilerw_1.c: New test.
+       * gcc.target/aarch64/sve2/whilewr_1.c: Likewise.
+       * gcc.target/aarch64/sve2/whilewr_2.c: Likewise.
+
 2019-11-18  Richard Sandiford  <richard.sandiford@arm.com>
 
        * gcc.target/aarch64/sve/acle/asm/ptest_pmore.c: New test.
index 1d148a04918d9a813b63ea57434bffa3670c3146..29bc571642db8858d3e4ca1027131a1a6559c4c1 100644 (file)
@@ -60,5 +60,6 @@ main (void)
 
 /* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
 /* { dg-final { scan-tree-dump-not {flags: [^\n]*ARBITRARY\n} "vect" } } */
-/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" { target { ! vect_check_ptrs } } } } */
+/* { dg-final { scan-tree-dump "using an IFN_CHECK_WAR_PTRS test" "vect" { target vect_check_ptrs } } } */
 /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
index fbe3f8431ff05e5727f7ce888d280a64670fbc4e..ad74496a6913dcf57ee4573ef1589263a32b074c 100644 (file)
@@ -57,5 +57,6 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
-/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" { target { ! vect_check_ptrs } } } } */
+/* { dg-final { scan-tree-dump "using an IFN_CHECK_WAR_PTRS test" "vect" { target vect_check_ptrs } } } */
 /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
index 81c252dfc23e6e9bc279dcb08ff054a3c55a38ce..8a9a6fffde1d39f138c5f54221854e73cef89079 100644 (file)
@@ -62,5 +62,6 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
-/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" { target { ! vect_check_ptrs } } } } */
+/* { dg-final { scan-tree-dump "using an IFN_CHECK_RAW_PTRS test" "vect" { target vect_check_ptrs } } } */
 /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c
new file mode 100644 (file)
index 0000000..63a6d2f
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE)                                \
+  TYPE                                         \
+  test_##TYPE (TYPE *dst, TYPE *src, int n)    \
+  {                                            \
+    TYPE res = 0;                              \
+    for (int i = 0; i < n; ++i)                        \
+      {                                                \
+       dst[i] += 1;                            \
+       res += src[i];                          \
+      }                                                \
+    return res;                                        \
+  }
+
+TEST_LOOP (int8_t);
+TEST_LOOP (int16_t);
+TEST_LOOP (int32_t);
+TEST_LOOP (int64_t);
+
+/* { dg-final { scan-assembler-times {\twhilerw\t} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.b, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.h, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.s, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.d, x[0-9]+, x1\n} 1 } } */
+/* { dg-final { scan-assembler-not {\twhilewr\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c
new file mode 100644 (file)
index 0000000..e204b37
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE)                                                \
+  void                                                         \
+  test_##TYPE (TYPE *dst, TYPE *src1, TYPE *src2, int n)       \
+  {                                                            \
+    for (int i = 0; i < n; ++i)                                        \
+      dst[i] = src1[i] + src2[i];                              \
+  }
+
+TEST_LOOP (int8_t);
+TEST_LOOP (int16_t);
+TEST_LOOP (int32_t);
+TEST_LOOP (int64_t);
+
+/* { dg-final { scan-assembler-times {\twhilewr\t} 8 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x1, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x2, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x1, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x2, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x1, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x2, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x1, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x2, x0\n} 1 } } */
+/* { dg-final { scan-assembler-not {\twhilerw\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c
new file mode 100644 (file)
index 0000000..0b86991
--- /dev/null
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE)                                                \
+  void                                                         \
+  test_##TYPE (TYPE *dst1, TYPE *dst2, TYPE *dst3, int n)      \
+  {                                                            \
+    for (int i = 0; i < n; ++i)                                        \
+      {                                                                \
+        dst1[i] = 1;                                           \
+        dst2[i] = 2;                                           \
+        dst3[i] = 3;                                           \
+      }                                                                \
+   }
+
+TEST_LOOP (int8_t);
+TEST_LOOP (int16_t);
+TEST_LOOP (int32_t);
+TEST_LOOP (int64_t);
+
+/* { dg-final { scan-assembler-times {\twhilewr\t} 12 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x0, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x1, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x0, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x1, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x0, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x1, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x0, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x1, x2\n} 1 } } */
+/* { dg-final { scan-assembler-not {\twhilerw\t} } } */
index 54b2fcab3898e888cc7a97530ebc4450df61bda6..08af9f85b4ea7d1ccc4fdf5ad7ce5eb6877e0fdc 100644 (file)
@@ -6459,6 +6459,13 @@ proc check_effective_target_vect_natural_alignment { } {
     return $et_vect_natural_alignment
 }
 
+# Return true if the target supports the check_raw_ptrs and check_war_ptrs
+# optabs on vectors.
+
+proc check_effective_target_vect_check_ptrs { } {
+    return [check_effective_target_aarch64_sve2]
+}
+
 # Return true if fully-masked loops are supported.
 
 proc check_effective_target_vect_fully_masked { } {
index bad80e1a23deb86e78aa12bbbb706ba022e3cbf8..117a14b29971cf376c39e619f806a3d1f663a901 100644 (file)
@@ -96,6 +96,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "tree-eh.h"
 #include "ssa.h"
+#include "internal-fn.h"
 
 static struct datadep_stats
 {
@@ -1719,6 +1720,80 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
     }
 }
 
+/* A subroutine of create_intersect_range_checks, with a subset of the
+   same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
+   to optimize cases in which the references form a simple RAW, WAR or
+   WAR dependence.  */
+
+static bool
+create_ifn_alias_checks (tree *cond_expr,
+                        const dr_with_seg_len_pair_t &alias_pair)
+{
+  const dr_with_seg_len& dr_a = alias_pair.first;
+  const dr_with_seg_len& dr_b = alias_pair.second;
+
+  /* Check for cases in which:
+
+     (a) we have a known RAW, WAR or WAR dependence
+     (b) the accesses are well-ordered in both the original and new code
+        (see the comment above the DR_ALIAS_* flags for details); and
+     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
+  if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
+    return false;
+
+  /* Make sure that both DRs access the same pattern of bytes,
+     with a constant length and and step.  */
+  poly_uint64 seg_len;
+  if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
+      || !poly_int_tree_p (dr_a.seg_len, &seg_len)
+      || maybe_ne (dr_a.access_size, dr_b.access_size)
+      || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
+      || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
+    return false;
+
+  unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
+  tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
+  tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
+
+  /* See whether the target suports what we want to do.  WAW checks are
+     equivalent to WAR checks here.  */
+  internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
+                    ? IFN_CHECK_RAW_PTRS
+                    : IFN_CHECK_WAR_PTRS);
+  unsigned int align = MIN (dr_a.align, dr_b.align);
+  poly_uint64 full_length = seg_len + bytes;
+  if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
+                                          full_length, align))
+    {
+      full_length = seg_len + dr_a.access_size;
+      if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
+                                              full_length, align))
+       return false;
+    }
+
+  /* Commit to using this form of test.  */
+  addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
+  addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
+
+  addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
+  addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
+
+  *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
+                                            ifn, boolean_type_node,
+                                            4, addr_a, addr_b,
+                                            size_int (full_length),
+                                            size_int (align));
+
+  if (dump_enabled_p ())
+    {
+      if (ifn == IFN_CHECK_RAW_PTRS)
+       dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
+      else
+       dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
+    }
+  return true;
+}
+
 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
    free of aliases, using a condition based on index values instead
    of a condition based on addresses.  Return true on success,
@@ -2240,6 +2315,9 @@ create_intersect_range_checks (class loop *loop, tree *cond_expr,
   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
     return;
 
+  if (create_ifn_alias_checks (cond_expr, alias_pair))
+    return;
+
   if (create_waw_or_war_checks (cond_expr, alias_pair))
     return;