From 81ce375d1fdd99f9d93b00f4895eab74c3d8b54a Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Thu, 2 Apr 2020 08:48:03 -0500 Subject: [PATCH] Fix PR94401 by considering reverse overrun The commit r10-7415 brings scalar type consideration to eliminate epilogue peeling for gaps, but it exposed one problem that the current handling doesn't consider the memory access type VMAT_CONTIGUOUS_REVERSE, for which the overrun happens on low address side. This patch is to make the code take care of it by updating the offset and construction element order accordingly. Bootstrapped/regtested on powerpc64le-linux-gnu P8 and aarch64-linux-gnu. 2020-04-02 Kewen Lin gcc/ChangeLog PR tree-optimization/94401 * tree-vect-loop.c (vectorizable_load): Handle VMAT_CONTIGUOUS_REVERSE access type when loading halves of vector to avoid peeling for gaps. --- gcc/ChangeLog | 6 ++++++ gcc/tree-vect-stmts.c | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2cec36754d8..f0a9509d970 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2020-04-02 Kewen Lin + + PR tree-optimization/94401 + * tree-vect-loop.c (vectorizable_load): Handle VMAT_CONTIGUOUS_REVERSE + access type when loading halves of vector to avoid peeling for gaps. + 2020-04-02 Jakub Jelinek * config/mips/mti-linux.h (SYSROOT_SUFFIX_SPEC): Add a space in diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 46bc2bd067d..7f3a9fb5fb3 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -9602,11 +9602,20 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, if (new_vtype != NULL_TREE) ltype = half_vtype; } + tree offset + = (dataref_offset ? dataref_offset + : build_int_cst (ref_type, 0)); + if (ltype != vectype + && memory_access_type == VMAT_CONTIGUOUS_REVERSE) + { + unsigned HOST_WIDE_INT gap + = DR_GROUP_GAP (first_stmt_info); + gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type)); + tree gapcst = build_int_cst (ref_type, gap); + offset = size_binop (PLUS_EXPR, offset, gapcst); + } data_ref - = fold_build2 (MEM_REF, ltype, dataref_ptr, - dataref_offset - ? dataref_offset - : build_int_cst (ref_type, 0)); + = fold_build2 (MEM_REF, ltype, dataref_ptr, offset); if (alignment_support_scheme == dr_aligned) ; else if (DR_MISALIGNMENT (first_dr_info) == -1) @@ -9619,16 +9628,27 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, TYPE_ALIGN (elem_type)); if (ltype != vectype) { - vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); + vect_copy_ref_info (data_ref, + DR_REF (first_dr_info->dr)); tree tem = make_ssa_name (ltype); new_stmt = gimple_build_assign (tem, data_ref); - vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + vect_finish_stmt_generation (stmt_info, new_stmt, + gsi); data_ref = NULL; vec *v; vec_alloc (v, 2); - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, - build_zero_cst (ltype)); + if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) + { + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + build_zero_cst (ltype)); + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); + } + else + { + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + build_zero_cst (ltype)); + } gcc_assert (new_vtype != NULL_TREE); if (new_vtype == vectype) new_stmt = gimple_build_assign ( -- 2.30.2