Fix PR94401 by considering reverse overrun
authorKewen Lin <linkw@linux.ibm.com>
Thu, 2 Apr 2020 13:48:03 +0000 (08:48 -0500)
committerKewen Lin <linkw@linux.ibm.com>
Thu, 2 Apr 2020 13:54:11 +0000 (08:54 -0500)
The commit r10-7415 brings scalar type consideration
to eliminate epilogue peeling for gaps, but it exposed
one problem that the current handling doesn't consider
the memory access type VMAT_CONTIGUOUS_REVERSE, for
which the overrun happens on low address side.  This
patch is to make the code take care of it by updating
the offset and construction element order accordingly.

Bootstrapped/regtested on powerpc64le-linux-gnu P8
and aarch64-linux-gnu.

2020-04-02  Kewen Lin  <linkw@gcc.gnu.org>

gcc/ChangeLog

    PR tree-optimization/94401
    * tree-vect-loop.c (vectorizable_load): Handle VMAT_CONTIGUOUS_REVERSE
    access type when loading halves of vector to avoid peeling for gaps.

gcc/ChangeLog
gcc/tree-vect-stmts.c

index 2cec36754d8133f6c2087668ab99e037cb59bce5..f0a9509d9707418be19834234e5ff3158faa01ab 100644 (file)
@@ -1,3 +1,9 @@
+2020-04-02  Kewen Lin  <linkw@gcc.gnu.org>
+
+       PR tree-optimization/94401
+       * tree-vect-loop.c (vectorizable_load): Handle VMAT_CONTIGUOUS_REVERSE
+       access type when loading halves of vector to avoid peeling for gaps.
+
 2020-04-02  Jakub Jelinek  <jakub@redhat.com>
 
        * config/mips/mti-linux.h (SYSROOT_SUFFIX_SPEC): Add a space in
index 46bc2bd067d33997d7ab55430af57ad5d913b683..7f3a9fb5fb34ac7e914d1900b53820fa5d032385 100644 (file)
@@ -9602,11 +9602,20 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                            if (new_vtype != NULL_TREE)
                              ltype = half_vtype;
                          }
+                       tree offset
+                         = (dataref_offset ? dataref_offset
+                                           : build_int_cst (ref_type, 0));
+                       if (ltype != vectype
+                           && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                         {
+                           unsigned HOST_WIDE_INT gap
+                             = DR_GROUP_GAP (first_stmt_info);
+                           gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
+                           tree gapcst = build_int_cst (ref_type, gap);
+                           offset = size_binop (PLUS_EXPR, offset, gapcst);
+                         }
                        data_ref
-                         = fold_build2 (MEM_REF, ltype, dataref_ptr,
-                                        dataref_offset
-                                        ? dataref_offset
-                                        : build_int_cst (ref_type, 0));
+                         = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
                        if (alignment_support_scheme == dr_aligned)
                          ;
                        else if (DR_MISALIGNMENT (first_dr_info) == -1)
@@ -9619,16 +9628,27 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                                                  TYPE_ALIGN (elem_type));
                        if (ltype != vectype)
                          {
-                           vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+                           vect_copy_ref_info (data_ref,
+                                               DR_REF (first_dr_info->dr));
                            tree tem = make_ssa_name (ltype);
                            new_stmt = gimple_build_assign (tem, data_ref);
-                           vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+                           vect_finish_stmt_generation (stmt_info, new_stmt,
+                                                        gsi);
                            data_ref = NULL;
                            vec<constructor_elt, va_gc> *v;
                            vec_alloc (v, 2);
-                           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
-                           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
-                                                   build_zero_cst (ltype));
+                           if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                             {
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+                                                       build_zero_cst (ltype));
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+                             }
+                           else
+                             {
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+                                                       build_zero_cst (ltype));
+                             }
                            gcc_assert (new_vtype != NULL_TREE);
                            if (new_vtype == vectype)
                              new_stmt = gimple_build_assign (