From: Richard Sandiford Date: Mon, 18 Nov 2019 15:36:10 +0000 (+0000) Subject: Add optabs for accelerating RAW and WAR alias checks X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=58c036c8354e4d14551ceaeffaa1dda2fe445640;p=gcc.git Add optabs for accelerating RAW and WAR alias checks This patch adds optabs that check whether a read followed by a write or a write followed by a read can be divided into interleaved byte accesses without changing the dependencies between the bytes. This is one of the uses of the SVE2 WHILERW and WHILEWR instructions. (The instructions can also be used to limit the VF at runtime, but that's future work.) 2019-11-18 Richard Sandiford gcc/ * doc/sourcebuild.texi (vect_check_ptrs): Document. * optabs.def (check_raw_ptrs_optab, check_war_ptrs_optab): New optabs. * doc/md.texi: Document them. * internal-fn.def (IFN_CHECK_RAW_PTRS, IFN_CHECK_WAR_PTRS): New internal functions. * internal-fn.h (internal_check_ptrs_fn_supported_p): Declare. * internal-fn.c (check_ptrs_direct): New macro. (expand_check_ptrs_optab_fn): Likewise. (direct_check_ptrs_optab_supported_p): Likewise. (internal_check_ptrs_fn_supported_p): New fuction. * tree-data-ref.c: Include internal-fn.h. (create_ifn_alias_checks): New function. (create_intersect_range_checks): Use it. * config/aarch64/iterators.md (SVE2_WHILE_PTR): New int iterator. (optab, cmp_op): Handle it. (raw_war, unspec): New int attributes. * config/aarch64/aarch64.md (UNSPEC_WHILERW, UNSPEC_WHILE_WR): New constants. * config/aarch64/predicates.md (aarch64_bytes_per_sve_vector_operand): New predicate. * config/aarch64/aarch64-sve2.md (check__ptrs): New expander. (@aarch64_sve2_while_ptest): New pattern. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_vect_check_ptrs): New procedure. * gcc.dg/vect/vect-alias-check-14.c: Expect IFN_CHECK_WAR to be used, if available. * gcc.dg/vect/vect-alias-check-15.c: Likewise. * gcc.dg/vect/vect-alias-check-16.c: Likewise IFN_CHECK_RAW. * gcc.target/aarch64/sve2/whilerw_1.c: New test. * gcc.target/aarch64/sve2/whilewr_1.c: Likewise. * gcc.target/aarch64/sve2/whilewr_2.c: Likewise. From-SVN: r278414 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index be2fac770ee..c57e8c40084 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +2019-11-18 Richard Sandiford + + * doc/sourcebuild.texi (vect_check_ptrs): Document. + * optabs.def (check_raw_ptrs_optab, check_war_ptrs_optab): New optabs. + * doc/md.texi: Document them. + * internal-fn.def (IFN_CHECK_RAW_PTRS, IFN_CHECK_WAR_PTRS): New + internal functions. + * internal-fn.h (internal_check_ptrs_fn_supported_p): Declare. + * internal-fn.c (check_ptrs_direct): New macro. + (expand_check_ptrs_optab_fn): Likewise. + (direct_check_ptrs_optab_supported_p): Likewise. + (internal_check_ptrs_fn_supported_p): New fuction. + * tree-data-ref.c: Include internal-fn.h. + (create_ifn_alias_checks): New function. + (create_intersect_range_checks): Use it. + * config/aarch64/iterators.md (SVE2_WHILE_PTR): New int iterator. + (optab, cmp_op): Handle it. + (raw_war, unspec): New int attributes. + * config/aarch64/aarch64.md (UNSPEC_WHILERW, UNSPEC_WHILE_WR): New + constants. + * config/aarch64/predicates.md (aarch64_bytes_per_sve_vector_operand): + New predicate. + * config/aarch64/aarch64-sve2.md (check__ptrs): New + expander. + (@aarch64_sve2_while_ptest): New + pattern. + 2019-11-18 Richard Sandiford * tree.c (build_vector_from_ctor): Directly return a zero vector for diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 15142d1d775..106a9a015ab 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -331,3 +331,61 @@ } [(set_attr "movprfx" "*,yes")] ) + +;; Use WHILERW and WHILEWR to accelerate alias checks. This is only +;; possible if the accesses we're checking are exactly the same size +;; as an SVE vector. +(define_expand "check__ptrs" + [(match_operand:GPI 0 "register_operand") + (unspec:VNx16BI + [(match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand") + (match_operand:GPI 4 "const_int_operand")] + SVE2_WHILE_PTR)] + "TARGET_SVE2" +{ + /* Use the widest predicate mode we can. */ + unsigned int align = INTVAL (operands[4]); + if (align > 8) + align = 8; + machine_mode pred_mode = aarch64_sve_pred_mode (align).require (); + + /* Emit a WHILERW or WHILEWR, setting the condition codes based on + the result. */ + emit_insn (gen_aarch64_sve2_while_ptest + (, mode, pred_mode, + gen_rtx_SCRATCH (pred_mode), operands[1], operands[2], + CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode))); + + /* Set operand 0 to true if the last bit of the predicate result is set, + i.e. if all elements are free of dependencies. */ + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp = gen_rtx_LTU (mode, cc_reg, const0_rtx); + emit_insn (gen_aarch64_cstore (operands[0], cmp, cc_reg)); + DONE; +}) + +;; A WHILERW or WHILEWR in which only the flags result is interesting. +(define_insn_and_rewrite "@aarch64_sve2_while_ptest" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand 3) + (match_operand 4) + (const_int SVE_KNOWN_PTRUE) + (unspec:PRED_ALL + [(match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")] + SVE2_WHILE_PTR)] + UNSPEC_PTEST)) + (clobber (match_scratch:PRED_ALL 0 "=Upa"))] + "TARGET_SVE2" + "while\t%0., %x1, %x2" + ;; Force the compiler to drop the unused predicate operand, so that we + ;; don't have an unnecessary PTRUE. + "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" + { + operands[3] = CONSTM1_RTX (VNx16BImode); + operands[4] = CONSTM1_RTX (mode); + } +) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index f19e2272750..87e9b9364bd 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -245,6 +245,8 @@ UNSPEC_WHILE_LO UNSPEC_WHILE_LS UNSPEC_WHILE_LT + UNSPEC_WHILERW + UNSPEC_WHILEWR UNSPEC_LDN UNSPEC_STN UNSPEC_INSR diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index bfeebe9b772..83a0d156e84 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2077,6 +2077,8 @@ (define_int_iterator SVE_WHILE [UNSPEC_WHILE_LE UNSPEC_WHILE_LO UNSPEC_WHILE_LS UNSPEC_WHILE_LT]) +(define_int_iterator SVE2_WHILE_PTR [UNSPEC_WHILERW UNSPEC_WHILEWR]) + (define_int_iterator SVE_SHIFT_WIDE [UNSPEC_ASHIFT_WIDE UNSPEC_ASHIFTRT_WIDE UNSPEC_LSHIFTRT_WIDE]) @@ -2157,6 +2159,8 @@ (UNSPEC_FEXPA "fexpa") (UNSPEC_FTSMUL "ftsmul") (UNSPEC_FTSSEL "ftssel") + (UNSPEC_WHILERW "vec_check_raw_alias") + (UNSPEC_WHILEWR "vec_check_war_alias") (UNSPEC_COND_FABS "abs") (UNSPEC_COND_FADD "add") (UNSPEC_COND_FCADD90 "cadd90") @@ -2480,13 +2484,18 @@ (UNSPEC_WHILE_LE "le") (UNSPEC_WHILE_LO "lo") (UNSPEC_WHILE_LS "ls") - (UNSPEC_WHILE_LT "lt")]) + (UNSPEC_WHILE_LT "lt") + (UNSPEC_WHILERW "rw") + (UNSPEC_WHILEWR "wr")]) (define_int_attr while_optab_cmp [(UNSPEC_WHILE_LE "le") (UNSPEC_WHILE_LO "ult") (UNSPEC_WHILE_LS "ule") (UNSPEC_WHILE_LT "lt")]) +(define_int_attr raw_war [(UNSPEC_WHILERW "raw") + (UNSPEC_WHILEWR "war")]) + (define_int_attr brk_op [(UNSPEC_BRKA "a") (UNSPEC_BRKB "b") (UNSPEC_BRKN "n") (UNSPEC_BRKPA "pa") (UNSPEC_BRKPB "pb")]) @@ -2630,3 +2639,6 @@ (UNSPEC_REVB "16") (UNSPEC_REVH "32") (UNSPEC_REVW "64")]) + +(define_int_attr unspec [(UNSPEC_WHILERW "UNSPEC_WHILERW") + (UNSPEC_WHILEWR "UNSPEC_WHILEWR")]) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 2c5c53c716d..23236123534 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -869,3 +869,8 @@ (define_predicate "aarch64_sve_any_binary_operator" (match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor")) + +(define_predicate "aarch64_bytes_per_sve_vector_operand" + (and (match_code "const_int,const_poly_int") + (match_test "known_eq (wi::to_poly_wide (op, mode), + BYTES_PER_SVE_VECTOR)"))) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 87bbeb4bfc9..0ad4a00739f 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5076,6 +5076,37 @@ for (i = 1; i < GET_MODE_NUNITS (@var{n}); i++) operand0[i] = operand0[i - 1] && (operand1 + i < operand2); @end smallexample +@cindex @code{check_raw_ptrs@var{m}} instruction pattern +@item @samp{check_raw_ptrs@var{m}} +Check whether, given two pointers @var{a} and @var{b} and a length @var{len}, +a write of @var{len} bytes at @var{a} followed by a read of @var{len} bytes +at @var{b} can be split into interleaved byte accesses +@samp{@var{a}[0], @var{b}[0], @var{a}[1], @var{b}[1], @dots{}} +without affecting the dependencies between the bytes. Set operand 0 +to true if the split is possible and false otherwise. + +Operands 1, 2 and 3 provide the values of @var{a}, @var{b} and @var{len} +respectively. Operand 4 is a constant integer that provides the known +common alignment of @var{a} and @var{b}. All inputs have mode @var{m}. + +This split is possible if: + +@smallexample +@var{a} == @var{b} || @var{a} + @var{len} <= @var{b} || @var{b} + @var{len} <= @var{a} +@end smallexample + +You should only define this pattern if the target has a way of accelerating +the test without having to do the individual comparisons. + +@cindex @code{check_war_ptrs@var{m}} instruction pattern +@item @samp{check_war_ptrs@var{m}} +Like @samp{check_raw_ptrs@var{m}}, but with the read and write swapped round. +The split is possible in this case if: + +@smallexample +@var{b} <= @var{a} || @var{a} + @var{len} <= @var{b} +@end smallexample + @cindex @code{vec_cmp@var{m}@var{n}} instruction pattern @item @samp{vec_cmp@var{m}@var{n}} Output a vector comparison. Operand 0 of mode @var{n} is the destination for diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index f3bf66c44ee..a3432bc3670 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1487,6 +1487,10 @@ Target supports hardware vectors of @code{long}. @item vect_long_long Target supports hardware vectors of @code{long long}. +@item vect_check_ptrs +Target supports the @code{check_raw_ptrs} and @code{check_war_ptrs} +optabs on vectors. + @item vect_fully_masked Target supports fully-masked (also known as fully-predicated) loops, so that vector loops can handle partial as well as full vectors. diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 6a878bde24d..88d52d2c25d 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -118,6 +118,7 @@ init_internal_fns () #define fold_extract_direct { 2, 2, false } #define fold_left_direct { 1, 1, false } #define mask_fold_left_direct { 1, 1, false } +#define check_ptrs_direct { 0, 0, false } const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct, @@ -3006,6 +3007,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) #define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 3) +#define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \ + expand_direct_optab_fn (FN, STMT, OPTAB, 4) + /* RETURN_TYPE and ARGS are a return type and argument list that are in principle compatible with FN (which satisfies direct_internal_fn_p). Return the types that should be used to determine whether the @@ -3095,6 +3099,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_fold_extract_optab_supported_p direct_optab_supported_p #define direct_fold_left_optab_supported_p direct_optab_supported_p #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p +#define direct_check_ptrs_optab_supported_p direct_optab_supported_p /* Return the optab used by internal function FN. */ @@ -3572,6 +3577,24 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type, && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale))); } +/* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN + for pointers of type TYPE when the accesses have LENGTH bytes and their + common byte alignment is ALIGN. */ + +bool +internal_check_ptrs_fn_supported_p (internal_fn ifn, tree type, + poly_uint64 length, unsigned int align) +{ + machine_mode mode = TYPE_MODE (type); + optab optab = direct_internal_fn_optab (ifn); + insn_code icode = direct_optab_handler (optab, mode); + if (icode == CODE_FOR_nothing) + return false; + rtx length_rtx = immed_wide_int_const (length, mode); + return (insn_operand_matches (icode, 3, length_rtx) + && insn_operand_matches (icode, 4, GEN_INT (align))); +} + /* Expand STMT as though it were a call to internal function FN. */ void diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index a9459449fee..85f45d660b7 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see - cond_ternary: a conditional ternary optab, such as cond_fma_rev - fold_left: for scalar = FN (scalar, vector), keyed off the vector mode + - check_ptrs: used for check_{raw,war}_ptrs DEF_INTERNAL_SIGNED_OPTAB_FN defines an internal function that maps to one of two optabs, depending on the signedness of an input. @@ -136,6 +137,10 @@ DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0, vec_mask_store_lanes, mask_store_lanes) DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) +DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW, + check_raw_ptrs, check_ptrs) +DEF_INTERNAL_OPTAB_FN (CHECK_WAR_PTRS, ECF_CONST | ECF_NOTHROW, + check_war_ptrs, check_ptrs) DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW, vec_shl_insert, binary) diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 389241a8a06..a1bc0819915 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -221,6 +221,8 @@ extern int internal_fn_mask_index (internal_fn); extern int internal_fn_stored_value_index (internal_fn); extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, tree, tree, int); +extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree, + poly_uint64, unsigned int); extern void expand_internal_call (gcall *); extern void expand_internal_call (internal_fn, gcall *); diff --git a/gcc/optabs.def b/gcc/optabs.def index 90e177a5cc0..24d8275000e 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -429,6 +429,9 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I$a") OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a") OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a") +OPTAB_D (check_raw_ptrs_optab, "check_raw_ptrs$a") +OPTAB_D (check_war_ptrs_optab, "check_war_ptrs$a") + OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE) OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES) OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3a37d94ae14..01e8e2b8a69 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,15 @@ +2019-11-18 Richard Sandiford + + * lib/target-supports.exp (check_effective_target_vect_check_ptrs): + New procedure. + * gcc.dg/vect/vect-alias-check-14.c: Expect IFN_CHECK_WAR to be + used, if available. + * gcc.dg/vect/vect-alias-check-15.c: Likewise. + * gcc.dg/vect/vect-alias-check-16.c: Likewise IFN_CHECK_RAW. + * gcc.target/aarch64/sve2/whilerw_1.c: New test. + * gcc.target/aarch64/sve2/whilewr_1.c: Likewise. + * gcc.target/aarch64/sve2/whilewr_2.c: Likewise. + 2019-11-18 Richard Sandiford * gcc.target/aarch64/sve/acle/asm/ptest_pmore.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c index 1d148a04918..29bc571642d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c @@ -60,5 +60,6 @@ main (void) /* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */ /* { dg-final { scan-tree-dump-not {flags: [^\n]*ARBITRARY\n} "vect" } } */ -/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" { target { ! vect_check_ptrs } } } } */ +/* { dg-final { scan-tree-dump "using an IFN_CHECK_WAR_PTRS test" "vect" { target vect_check_ptrs } } } */ /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c index fbe3f8431ff..ad74496a691 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c @@ -57,5 +57,6 @@ main (void) } /* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */ -/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" { target { ! vect_check_ptrs } } } } */ +/* { dg-final { scan-tree-dump "using an IFN_CHECK_WAR_PTRS test" "vect" { target vect_check_ptrs } } } */ /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c index 81c252dfc23..8a9a6fffde1 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c @@ -62,5 +62,6 @@ main (void) } /* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */ -/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" { target { ! vect_check_ptrs } } } } */ +/* { dg-final { scan-tree-dump "using an IFN_CHECK_RAW_PTRS test" "vect" { target vect_check_ptrs } } } */ /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c new file mode 100644 index 00000000000..63a6d2f4d62 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ +/* { dg-require-effective-target lp64 } */ + +#include + +#define TEST_LOOP(TYPE) \ + TYPE \ + test_##TYPE (TYPE *dst, TYPE *src, int n) \ + { \ + TYPE res = 0; \ + for (int i = 0; i < n; ++i) \ + { \ + dst[i] += 1; \ + res += src[i]; \ + } \ + return res; \ + } + +TEST_LOOP (int8_t); +TEST_LOOP (int16_t); +TEST_LOOP (int32_t); +TEST_LOOP (int64_t); + +/* { dg-final { scan-assembler-times {\twhilerw\t} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.b, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.h, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.s, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.d, x[0-9]+, x1\n} 1 } } */ +/* { dg-final { scan-assembler-not {\twhilewr\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c new file mode 100644 index 00000000000..e204b37c614 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ +/* { dg-require-effective-target lp64 } */ + +#include + +#define TEST_LOOP(TYPE) \ + void \ + test_##TYPE (TYPE *dst, TYPE *src1, TYPE *src2, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dst[i] = src1[i] + src2[i]; \ + } + +TEST_LOOP (int8_t); +TEST_LOOP (int16_t); +TEST_LOOP (int32_t); +TEST_LOOP (int64_t); + +/* { dg-final { scan-assembler-times {\twhilewr\t} 8 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x1, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x2, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x1, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x2, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x1, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x2, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x1, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x2, x0\n} 1 } } */ +/* { dg-final { scan-assembler-not {\twhilerw\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c new file mode 100644 index 00000000000..0b86991333e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns" } */ +/* { dg-require-effective-target lp64 } */ + +#include + +#define TEST_LOOP(TYPE) \ + void \ + test_##TYPE (TYPE *dst1, TYPE *dst2, TYPE *dst3, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + { \ + dst1[i] = 1; \ + dst2[i] = 2; \ + dst3[i] = 3; \ + } \ + } + +TEST_LOOP (int8_t); +TEST_LOOP (int16_t); +TEST_LOOP (int32_t); +TEST_LOOP (int64_t); + +/* { dg-final { scan-assembler-times {\twhilewr\t} 12 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x0, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x1, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x0, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x1, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x0, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x1, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x0, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x1, x2\n} 1 } } */ +/* { dg-final { scan-assembler-not {\twhilerw\t} } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 54b2fcab389..08af9f85b4e 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6459,6 +6459,13 @@ proc check_effective_target_vect_natural_alignment { } { return $et_vect_natural_alignment } +# Return true if the target supports the check_raw_ptrs and check_war_ptrs +# optabs on vectors. + +proc check_effective_target_vect_check_ptrs { } { + return [check_effective_target_aarch64_sve2] +} + # Return true if fully-masked loops are supported. proc check_effective_target_vect_fully_masked { } { diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index bad80e1a23d..117a14b2997 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -96,6 +96,7 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "tree-eh.h" #include "ssa.h" +#include "internal-fn.h" static struct datadep_stats { @@ -1719,6 +1720,80 @@ prune_runtime_alias_test_list (vec *alias_pairs, } } +/* A subroutine of create_intersect_range_checks, with a subset of the + same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS + to optimize cases in which the references form a simple RAW, WAR or + WAR dependence. */ + +static bool +create_ifn_alias_checks (tree *cond_expr, + const dr_with_seg_len_pair_t &alias_pair) +{ + const dr_with_seg_len& dr_a = alias_pair.first; + const dr_with_seg_len& dr_b = alias_pair.second; + + /* Check for cases in which: + + (a) we have a known RAW, WAR or WAR dependence + (b) the accesses are well-ordered in both the original and new code + (see the comment above the DR_ALIAS_* flags for details); and + (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ + if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW)) + return false; + + /* Make sure that both DRs access the same pattern of bytes, + with a constant length and and step. */ + poly_uint64 seg_len; + if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0) + || !poly_int_tree_p (dr_a.seg_len, &seg_len) + || maybe_ne (dr_a.access_size, dr_b.access_size) + || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0) + || !tree_fits_uhwi_p (DR_STEP (dr_a.dr))) + return false; + + unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr)); + tree addr_a = DR_BASE_ADDRESS (dr_a.dr); + tree addr_b = DR_BASE_ADDRESS (dr_b.dr); + + /* See whether the target suports what we want to do. WAW checks are + equivalent to WAR checks here. */ + internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW + ? IFN_CHECK_RAW_PTRS + : IFN_CHECK_WAR_PTRS); + unsigned int align = MIN (dr_a.align, dr_b.align); + poly_uint64 full_length = seg_len + bytes; + if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), + full_length, align)) + { + full_length = seg_len + dr_a.access_size; + if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), + full_length, align)) + return false; + } + + /* Commit to using this form of test. */ + addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr)); + addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); + + addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr)); + addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); + + *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION, + ifn, boolean_type_node, + 4, addr_a, addr_b, + size_int (full_length), + size_int (align)); + + if (dump_enabled_p ()) + { + if (ifn == IFN_CHECK_RAW_PTRS) + dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n"); + else + dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n"); + } + return true; +} + /* Try to generate a runtime condition that is true if ALIAS_PAIR is free of aliases, using a condition based on index values instead of a condition based on addresses. Return true on success, @@ -2240,6 +2315,9 @@ create_intersect_range_checks (class loop *loop, tree *cond_expr, if (create_intersect_range_checks_index (loop, cond_expr, alias_pair)) return; + if (create_ifn_alias_checks (cond_expr, alias_pair)) + return; + if (create_waw_or_war_checks (cond_expr, alias_pair)) return;