From: Michael Matz Date: Thu, 21 May 2015 14:36:04 +0000 (+0000) Subject: tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p to strided_p. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f2e2a985429ab708a31abf0bd3bc3df9dbbc3033;p=gcc.git tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p to strided_p. * tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p to strided_p. (STMT_VINFO_STRIDE_LOAD_P): Rename to ... (STMT_VINFO_STRIDED_P): ... this. * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Adjust. (vect_verify_datarefs_alignment): Likewise. (vect_enhance_data_refs_alignment): Likewise. (vect_analyze_data_ref_access): Likewise. (vect_analyze_data_refs): Accept strided stores. * tree-vect-stmts.c (vect_model_store_cost): Count strided stores. (vect_model_load_cost): Adjust for macro rename. (vectorizable_mask_load_store): Likewise. (vectorizable_load): Likewise. (vectorizable_store): Open code strided stores. testsuite/ * gcc.dg/vect/vect-strided-store.c: New test. * gfortran.dg/vect/fast-math-pr37021.f90: Adjust. * gfortran.dg/vect/fast-math-rnflow-trs2a2.f90: Adjust. From-SVN: r223486 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 46784af7235..2474aa10ab0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2015-05-08 Michael Matz + + * tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p + to strided_p. + (STMT_VINFO_STRIDE_LOAD_P): Rename to ... + (STMT_VINFO_STRIDED_P): ... this. + * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Adjust. + (vect_verify_datarefs_alignment): Likewise. + (vect_enhance_data_refs_alignment): Likewise. + (vect_analyze_data_ref_access): Likewise. + (vect_analyze_data_refs): Accept strided stores. + * tree-vect-stmts.c (vect_model_store_cost): Count strided stores. + (vect_model_load_cost): Adjust for macro rename. + (vectorizable_mask_load_store): Likewise. + (vectorizable_load): Likewise. + (vectorizable_store): Open code strided stores. + 2015-05-21 Kyrylo Tkachov * doc/sourcebuild.texi (7.2.3.9 Other hardware attributes): diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a5b78aa738e..46a6bb7ad67 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2015-05-08 Michael Matz + + * gcc.dg/vect/vect-strided-store.c: New test. + * gfortran.dg/vect/fast-math-pr37021.f90: Adjust. + * gfortran.dg/vect/fast-math-rnflow-trs2a2.f90: Adjust. + 2015-05-21 Kyrylo Tkachov * lib/target-supports.exp (check_effective_target_sqrt_insn): New check. @@ -51,7 +57,7 @@ 2015-05-20 Alex Velenko - * gcc.target/arm/thumb1-far-jump-2.c (r4): Added int in definition. + * gcc.target/arm/thumb1-far-jump-2.c (r4): Added int in definition. 2015-05-20 David Malcolm diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c new file mode 100644 index 00000000000..5014bc7cfcd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c @@ -0,0 +1,36 @@ +/* { dg-require-effective-target vect_float } */ + +#include +#include "tree-vect.h" + +void __attribute__((noinline)) +sumit (float * __restrict dest, + float * __restrict src, float * __restrict src2, + int stride, int n) +{ + int i; + for (i = 0; i < n; i++) + dest[i*stride] = src[i] + src2[i]; +} + +int main() +{ + int i, stride; + float src[] = {1, 2, 3, 4, 5, 6, 7, 8}; + float dest[64]; + check_vect (); + for (stride = 0; stride < 8; stride++) + { + sumit (dest, src, src, stride, 8); + if (!stride && dest[0] != 16) + abort(); + else if (stride) + for (i = 0; i < 8; i++) + if (2*src[i] != dest[i*stride]) + abort (); + } + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90 b/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90 index b17ac9c3277..d5f5d40765f 100644 --- a/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90 +++ b/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90 @@ -14,5 +14,5 @@ subroutine to_product_of(self,a,b,a1,a2) end do end subroutine -! { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } +! { dg-final { scan-tree-dump "vectorized 2 loops" "vect" } } ! { dg-final { cleanup-tree-dump "vect" } } diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90 b/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90 index 1d13cea80e0..625be8390dc 100644 --- a/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90 +++ b/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90 @@ -29,5 +29,5 @@ return end function trs2a2 -! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } +! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } ! { dg-final { cleanup-tree-dump "vect" } } diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 7e938996866..3b8405f597c 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -663,9 +663,9 @@ vect_compute_data_ref_alignment (struct data_reference *dr) /* Initialize misalignment to unknown. */ SET_DR_MISALIGNMENT (dr, -1); - /* Strided loads perform only component accesses, misalignment information + /* Strided accesses perform only component accesses, misalignment information is irrelevant for them. */ - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + if (STMT_VINFO_STRIDED_P (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) return true; @@ -942,9 +942,9 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) || !STMT_VINFO_VECTORIZABLE (stmt_info)) continue; - /* Strided loads perform only component accesses, alignment is + /* Strided accesses perform only component accesses, alignment is irrelevant for them. */ - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + if (STMT_VINFO_STRIDED_P (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) continue; @@ -1410,9 +1410,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) if (integer_zerop (DR_STEP (dr))) continue; - /* Strided loads perform only component accesses, alignment is + /* Strided accesses perform only component accesses, alignment is irrelevant for them. */ - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + if (STMT_VINFO_STRIDED_P (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) continue; @@ -1703,9 +1703,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) && GROUP_FIRST_ELEMENT (stmt_info) != stmt) continue; - /* Strided loads perform only component accesses, alignment is + /* Strided accesses perform only component accesses, alignment is irrelevant for them. */ - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + if (STMT_VINFO_STRIDED_P (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) continue; @@ -1824,7 +1824,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) && GROUP_FIRST_ELEMENT (stmt_info) != stmt)) continue; - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + if (STMT_VINFO_STRIDED_P (stmt_info)) { /* Strided loads perform only component accesses, alignment is irrelevant for them. */ @@ -2346,7 +2346,7 @@ vect_analyze_data_ref_access (struct data_reference *dr) /* Assume this is a DR handled by non-constant strided load case. */ if (TREE_CODE (step) != INTEGER_CST) - return (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + return (STMT_VINFO_STRIDED_P (stmt_info) && (!STMT_VINFO_GROUPED_ACCESS (stmt_info) || vect_analyze_group_access (dr))); @@ -3758,8 +3758,7 @@ again: else if (loop_vinfo && TREE_CODE (DR_STEP (dr)) != INTEGER_CST) { - if (nested_in_vect_loop_p (loop, stmt) - || !DR_IS_READ (dr)) + if (nested_in_vect_loop_p (loop, stmt)) { if (dump_enabled_p ()) { @@ -3771,7 +3770,7 @@ again: } return false; } - STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true; + STMT_VINFO_STRIDED_P (stmt_info) = true; } } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index f82decb798e..37a706fa849 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1014,7 +1014,19 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, } /* Costs of the stores. */ - vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec); + if (STMT_VINFO_STRIDED_P (stmt_info)) + { + /* N scalar stores plus extracting the elements. */ + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + inside_cost += record_stmt_cost (body_cost_vec, + ncopies * TYPE_VECTOR_SUBPARTS (vectype), + scalar_store, stmt_info, 0, vect_body); + inside_cost += record_stmt_cost (body_cost_vec, + ncopies * TYPE_VECTOR_SUBPARTS (vectype), + vec_to_scalar, stmt_info, 0, vect_body); + } + else + vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1113,7 +1125,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, access is instead being provided by a load-and-permute operation, include the cost of the permutes. */ if (!load_lanes_p && group_size > 1 - && !STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + && !STMT_VINFO_STRIDED_P (stmt_info)) { /* Uses an even and odd extract operations or shuffle operations for each needed permute. */ @@ -1128,7 +1140,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, } /* The loads themselves. */ - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + if (STMT_VINFO_STRIDED_P (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) { /* N scalar loads plus gathering them into a vector. */ @@ -1143,7 +1155,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, || group_size > 1 || slp_node), &inside_cost, &prologue_cost, prologue_cost_vec, body_cost_vec, true); - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + if (STMT_VINFO_STRIDED_P (stmt_info)) inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct, stmt_info, 0, vect_body); @@ -1823,7 +1835,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi, if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) return false; - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + if (STMT_VINFO_STRIDED_P (stmt_info)) return false; if (STMT_VINFO_GATHER_P (stmt_info)) @@ -5016,7 +5028,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, tree dataref_ptr = NULL_TREE; tree dataref_offset = NULL_TREE; gimple ptr_incr = NULL; - int nunits = TYPE_VECTOR_SUBPARTS (vectype); + unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies; int j; gimple next_stmt, first_stmt = NULL; @@ -5103,38 +5115,40 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - negative = - tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt) - ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr), - size_zero_node) < 0; - if (negative && ncopies > 1) + if (!STMT_VINFO_STRIDED_P (stmt_info)) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "multiple types with negative step.\n"); - return false; - } - - if (negative) - { - gcc_assert (!grouped_store); - alignment_support_scheme = vect_supportable_dr_alignment (dr, false); - if (alignment_support_scheme != dr_aligned - && alignment_support_scheme != dr_unaligned_supported) + negative = + tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt) + ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr), + size_zero_node) < 0; + if (negative && ncopies > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step but alignment required.\n"); + "multiple types with negative step.\n"); return false; } - if (dt != vect_constant_def - && dt != vect_external_def - && !perm_mask_for_reverse (vectype)) + if (negative) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step and reversing not supported.\n"); - return false; + gcc_assert (!grouped_store); + alignment_support_scheme = vect_supportable_dr_alignment (dr, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step but alignment required.\n"); + return false; + } + if (dt != vect_constant_def + && dt != vect_external_def + && !perm_mask_for_reverse (vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step and reversing not supported.\n"); + return false; + } } } @@ -5233,6 +5247,113 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n", ncopies); + if (STMT_VINFO_STRIDED_P (stmt_info)) + { + gimple_stmt_iterator incr_gsi; + bool insert_after; + gimple incr; + tree offvar; + tree ivstep; + tree running_off; + gimple_seq stmts = NULL; + tree stride_base, stride_step, alias_off; + tree vec_oprnd; + + gcc_assert (!nested_in_vect_loop_p (loop, stmt)); + + stride_base + = fold_build_pointer_plus + (unshare_expr (DR_BASE_ADDRESS (dr)), + size_binop (PLUS_EXPR, + convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))), + convert_to_ptrofftype (DR_INIT(dr)))); + stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr))); + + /* For a store with loop-invariant (but other than power-of-2) + stride (i.e. not a grouped access) like so: + + for (i = 0; i < n; i += stride) + array[i] = ...; + + we generate a new induction variable and new stores from + the components of the (vectorized) rhs: + + for (j = 0; ; j += VF*stride) + vectemp = ...; + tmp1 = vectemp[0]; + array[j] = tmp1; + tmp2 = vectemp[1]; + array[j + stride] = tmp2; + ... + */ + + ivstep = stride_step; + ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, + build_int_cst (TREE_TYPE (ivstep), + ncopies * nunits)); + + standard_iv_increment_position (loop, &incr_gsi, &insert_after); + + create_iv (stride_base, ivstep, NULL, + loop, &incr_gsi, insert_after, + &offvar, NULL); + incr = gsi_stmt (incr_gsi); + set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); + + stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE); + if (stmts) + gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); + + prev_stmt_info = NULL; + running_off = offvar; + alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0); + for (j = 0; j < ncopies; j++) + { + /* We've set op and dt above, from gimple_assign_rhs1(stmt), + and first_stmt == stmt. */ + if (j == 0) + vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL); + else + vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd); + + for (i = 0; i < nunits; i++) + { + tree newref, newoff; + gimple incr, assign; + tree size = TYPE_SIZE (elem_type); + /* Extract the i'th component. */ + tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i), + size); + tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd, + size, pos); + + elem = force_gimple_operand_gsi (gsi, elem, true, + NULL_TREE, true, + GSI_SAME_STMT); + + newref = build2 (MEM_REF, TREE_TYPE (vectype), + running_off, alias_off); + + /* And store it to *running_off. */ + assign = gimple_build_assign (newref, elem); + vect_finish_stmt_generation (stmt, assign, gsi); + + newoff = copy_ssa_name (running_off, NULL); + incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, + running_off, stride_step); + vect_finish_stmt_generation (stmt, incr, gsi); + + running_off = newoff; + if (j == 0 && i == i) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign; + prev_stmt_info = vinfo_for_stmt (assign); + } + } + return true; + } + dr_chain.create (group_size); oprnds.create (group_size); @@ -5796,7 +5917,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); if (!slp && !PURE_SLP_STMT (stmt_info) - && !STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + && !STMT_VINFO_STRIDED_P (stmt_info)) { if (vect_load_lanes_supported (vectype, group_size)) load_lanes_p = true; @@ -5851,7 +5972,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, return false; } } - else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + else if (STMT_VINFO_STRIDED_P (stmt_info)) { if ((grouped_load && (slp || PURE_SLP_STMT (stmt_info))) @@ -6099,7 +6220,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, } return true; } - else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + else if (STMT_VINFO_STRIDED_P (stmt_info)) { gimple_stmt_iterator incr_gsi; bool insert_after; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 5a4fdbb63ac..17e590e67de 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -646,7 +646,9 @@ typedef struct _stmt_vec_info { /* For loads only, true if this is a gather load. */ bool gather_p; - bool stride_load_p; + + /* True if this is an access with loop-invariant stride. */ + bool strided_p; /* For both loads and stores. */ bool simd_lane_access_p; @@ -664,7 +666,7 @@ typedef struct _stmt_vec_info { #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable #define STMT_VINFO_DATA_REF(S) (S)->data_ref_info #define STMT_VINFO_GATHER_P(S) (S)->gather_p -#define STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p +#define STMT_VINFO_STRIDED_P(S) (S)->strided_p #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address