From: Richard Sandiford Date: Wed, 6 Jul 2016 08:16:53 +0000 (+0000) Subject: [7/7] Add negative and zero strides to vect_memory_access_type X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=62da9e14ce5b670bf40a91f1dc9473579a2d5549;p=gcc.git [7/7] Add negative and zero strides to vect_memory_access_type This patch uses the vect_memory_access_type from patch 6 to represent the effect of a negative contiguous stride or a zero stride. The latter is valid only for loads. Tested on aarch64-linux-gnu and x86_64-linux-gnu. gcc/ * tree-vectorizer.h (vect_memory_access_type): Add VMAT_INVARIANT, VMAT_CONTIGUOUS_DOWN and VMAT_CONTIGUOUS_REVERSED. * tree-vect-stmts.c (compare_step_with_zero): New function. (perm_mask_for_reverse): Move further up file. (get_group_load_store_type): Stick to VMAT_ELEMENTWISE if the step is negative. (get_negative_load_store_type): New function. (get_load_store_type): Call it. Add an ncopies argument. (vectorizable_mask_load_store): Update call accordingly and remove tests for negative steps. (vectorizable_store, vectorizable_load): Likewise. Handle new memory_access_types. From-SVN: r238039 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2e309344c06..c853885a2b3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2016-07-06 Richard Sandiford + + * tree-vectorizer.h (vect_memory_access_type): Add + VMAT_INVARIANT, VMAT_CONTIGUOUS_DOWN and VMAT_CONTIGUOUS_REVERSED. + * tree-vect-stmts.c (compare_step_with_zero): New function. + (perm_mask_for_reverse): Move further up file. + (get_group_load_store_type): Stick to VMAT_ELEMENTWISE if the + step is negative. + (get_negative_load_store_type): New function. + (get_load_store_type): Call it. Add an ncopies argument. + (vectorizable_mask_load_store): Update call accordingly and + remove tests for negative steps. + (vectorizable_store, vectorizable_load): Likewise. Handle new + memory_access_types. + 2016-07-06 Richard Sandiford * tree-vectorizer.h (vect_memory_access_type): New enum. diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 16bec2b4d35..ffa5e9881d6 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1672,6 +1672,42 @@ vectorizable_internal_function (combined_fn cfn, tree fndecl, static tree permute_vec_elements (tree, tree, tree, gimple *, gimple_stmt_iterator *); +/* STMT is a non-strided load or store, meaning that it accesses + elements with a known constant step. Return -1 if that step + is negative, 0 if it is zero, and 1 if it is greater than zero. */ + +static int +compare_step_with_zero (gimple *stmt) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + tree step; + if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt)) + step = STMT_VINFO_DR_STEP (stmt_info); + else + step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info)); + return tree_int_cst_compare (step, size_zero_node); +} + +/* If the target supports a permute mask that reverses the elements in + a vector of type VECTYPE, return that mask, otherwise return null. */ + +static tree +perm_mask_for_reverse (tree vectype) +{ + int i, nunits; + unsigned char *sel; + + nunits = TYPE_VECTOR_SUBPARTS (vectype); + sel = XALLOCAVEC (unsigned char, nunits); + + for (i = 0; i < nunits; ++i) + sel[i] = nunits - 1 - i; + + if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + return NULL_TREE; + return vect_gen_perm_mask_checked (vectype, sel); +} /* A subroutine of get_load_store_type, with a subset of the same arguments. Handle the case where STMT is part of a grouped load @@ -1755,7 +1791,8 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp, would access excess elements in the last iteration. */ bool would_overrun_p = (gap != 0); if (!STMT_VINFO_STRIDED_P (stmt_info) - && (can_overrun_p || !would_overrun_p)) + && (can_overrun_p || !would_overrun_p) + && compare_step_with_zero (stmt) > 0) { /* First try using LOAD/STORE_LANES. */ if (vls_type == VLS_LOAD @@ -1814,17 +1851,69 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp, return true; } +/* A subroutine of get_load_store_type, with a subset of the same + arguments. Handle the case where STMT is a load or store that + accesses consecutive elements with a negative step. */ + +static vect_memory_access_type +get_negative_load_store_type (gimple *stmt, tree vectype, + vec_load_store_type vls_type, + unsigned int ncopies) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + dr_alignment_support alignment_support_scheme; + + if (ncopies > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "multiple types with negative step.\n"); + return VMAT_ELEMENTWISE; + } + + alignment_support_scheme = vect_supportable_dr_alignment (dr, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step but alignment required.\n"); + return VMAT_ELEMENTWISE; + } + + if (vls_type == VLS_STORE_INVARIANT) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "negative step with invariant source;" + " no permute needed.\n"); + return VMAT_CONTIGUOUS_DOWN; + } + + if (!perm_mask_for_reverse (vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step and reversing not supported.\n"); + return VMAT_ELEMENTWISE; + } + + return VMAT_CONTIGUOUS_REVERSE; +} + /* Analyze load or store statement STMT of type VLS_TYPE. Return true if there is a memory access type that the vectorized form can use, storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers or scatters, fill in GS_INFO accordingly. SLP says whether we're performing SLP rather than loop vectorization. - VECTYPE is the vector type that the vectorized statements will use. */ + VECTYPE is the vector type that the vectorized statements will use. + NCOPIES is the number of vector statements that will be needed. */ static bool get_load_store_type (gimple *stmt, tree vectype, bool slp, - vec_load_store_type vls_type, + vec_load_store_type vls_type, unsigned int ncopies, vect_memory_access_type *memory_access_type, gather_scatter_info *gs_info) { @@ -1860,7 +1949,19 @@ get_load_store_type (gimple *stmt, tree vectype, bool slp, *memory_access_type = VMAT_ELEMENTWISE; } else - *memory_access_type = VMAT_CONTIGUOUS; + { + int cmp = compare_step_with_zero (stmt); + if (cmp < 0) + *memory_access_type = get_negative_load_store_type + (stmt, vectype, vls_type, ncopies); + else if (cmp == 0) + { + gcc_assert (vls_type == VLS_LOAD); + *memory_access_type = VMAT_INVARIANT; + } + else + *memory_access_type = VMAT_CONTIGUOUS; + } /* FIXME: At the moment the cost model seems to underestimate the cost of using elementwise accesses. This check preserves the @@ -1971,7 +2072,7 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, vls_type = VLS_LOAD; vect_memory_access_type memory_access_type; - if (!get_load_store_type (stmt, vectype, false, vls_type, + if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies, &memory_access_type, &gs_info)) return false; @@ -1996,10 +2097,6 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, vls_type == VLS_LOAD ? "load" : "store"); return false; } - else if (tree_int_cst_compare (nested_in_vect_loop - ? STMT_VINFO_DR_STEP (stmt_info) - : DR_STEP (dr), size_zero_node) <= 0) - return false; else if (!VECTOR_MODE_P (TYPE_MODE (vectype)) || !can_vec_mask_load_store_p (TYPE_MODE (vectype), TYPE_MODE (mask_vectype), @@ -5340,27 +5437,6 @@ ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr) } -/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements - reversal of the vector elements. If that is impossible to do, - returns NULL. */ - -static tree -perm_mask_for_reverse (tree vectype) -{ - int i, nunits; - unsigned char *sel; - - nunits = TYPE_VECTOR_SUBPARTS (vectype); - sel = XALLOCAVEC (unsigned char, nunits); - - for (i = 0; i < nunits; ++i) - sel[i] = nunits - 1 - i; - - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) - return NULL_TREE; - return vect_gen_perm_mask_checked (vectype, sel); -} - /* Function vectorizable_store. Check if STMT defines a non scalar data-ref (array/pointer/structure) that @@ -5400,7 +5476,6 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, vec oprnds = vNULL; vec result_chain = vNULL; bool inv_p; - bool negative = false; tree offset = NULL_TREE; vec vec_oprnds = vNULL; bool slp = (slp_node != NULL); @@ -5504,44 +5579,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - if (!STMT_VINFO_STRIDED_P (stmt_info)) - { - negative = - tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt) - ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr), - size_zero_node) < 0; - if (negative && ncopies > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "multiple types with negative step.\n"); - return false; - } - if (negative) - { - alignment_support_scheme = vect_supportable_dr_alignment (dr, false); - if (alignment_support_scheme != dr_aligned - && alignment_support_scheme != dr_unaligned_supported) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step but alignment required.\n"); - return false; - } - if (dt != vect_constant_def - && dt != vect_external_def - && !perm_mask_for_reverse (vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step and reversing not supported.\n"); - return false; - } - } - } - vect_memory_access_type memory_access_type; - if (!get_load_store_type (stmt, vectype, slp, vls_type, + if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies, &memory_access_type, &gs_info)) return false; @@ -5947,7 +5986,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, || alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); - if (negative) + if (memory_access_type == VMAT_CONTIGUOUS_DOWN + || memory_access_type == VMAT_CONTIGUOUS_REVERSE) offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); if (memory_access_type == VMAT_LOAD_STORE_LANES) @@ -6169,9 +6209,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign); - if (negative - && dt != vect_constant_def - && dt != vect_external_def) + if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) { tree perm_mask = perm_mask_for_reverse (vectype); tree perm_dest @@ -6375,7 +6413,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, gimple *first_stmt; gimple *first_stmt_for_drptr = NULL; bool inv_p; - bool negative = false; bool compute_in_loop = false; struct loop *at_loop; int vec_num; @@ -6531,55 +6568,10 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, } vect_memory_access_type memory_access_type; - if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, + if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies, &memory_access_type, &gs_info)) return false; - if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info) - && !STMT_VINFO_STRIDED_P (stmt_info)) - { - negative = tree_int_cst_compare (nested_in_vect_loop - ? STMT_VINFO_DR_STEP (stmt_info) - : DR_STEP (dr), - size_zero_node) < 0; - if (negative && ncopies > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "multiple types with negative step.\n"); - return false; - } - - if (negative) - { - if (grouped_load) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step for group load not supported" - "\n"); - return false; - } - alignment_support_scheme = vect_supportable_dr_alignment (dr, false); - if (alignment_support_scheme != dr_aligned - && alignment_support_scheme != dr_unaligned_supported) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step but alignment required.\n"); - return false; - } - if (!perm_mask_for_reverse (vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step and reversing not supported." - "\n"); - return false; - } - } - } - if (!vec_stmt) /* transformation not required. */ { if (!slp) @@ -7120,7 +7112,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, else at_loop = loop; - if (negative) + if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); if (memory_access_type == VMAT_LOAD_STORE_LANES) @@ -7409,7 +7401,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, } } - if (negative) + if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) { tree perm_mask = perm_mask_for_reverse (vectype); new_temp = permute_vec_elements (new_temp, new_temp, diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index ef69b7e8a1f..2cfb72a6fa3 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -484,14 +484,26 @@ enum slp_vect_type { /* Describes how we're going to vectorize an individual load or store, or a group of loads or stores. */ enum vect_memory_access_type { + /* An access to an invariant address. This is used only for loads. */ + VMAT_INVARIANT, + /* A simple contiguous access. */ VMAT_CONTIGUOUS, + /* A contiguous access that goes down in memory rather than up, + with no additional permutation. This is used only for stores + of invariants. */ + VMAT_CONTIGUOUS_DOWN, + /* A simple contiguous access in which the elements need to be permuted after loading or before storing. Only used for loop vectorization; SLP uses separate permutes. */ VMAT_CONTIGUOUS_PERMUTE, + /* A simple contiguous access in which the elements need to be reversed + after loading or before storing. */ + VMAT_CONTIGUOUS_REVERSE, + /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */ VMAT_LOAD_STORE_LANES,