vect: Fix an ICE in exact_div [PR95961]
authorFei Yang <felix.yang@huawei.com>
Thu, 2 Jul 2020 09:14:33 +0000 (10:14 +0100)
committerRichard Sandiford <richard.sandiford@arm.com>
Thu, 2 Jul 2020 09:14:33 +0000 (10:14 +0100)
In the test case for PR95961, vectorization factor computed
by vect_determine_vectorization_factor is [8,8].  But this is
updated to [1,1] later by vect_update_vf_for_slp.  When we call
vect_get_num_vectors in vect_enhance_data_refs_alignment, the number
of scalars which is based on the vectorization factor is not a multiple
of the the number of elements in the vector type.  This leads to
the ICE.  This isn't a simple stream of contiguous vector accesses.
It's hard to predict from the available information how many vector
accesses we'll actually need per iteration.  As discussed, here we
should use the number of scalars instead of the number of vectors as
an upper bound for the loop saving info about DR in the hash table.

2020-07-02  Felix Yang  <felix.yang@huawei.com>

gcc/
PR tree-optimization/95961
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Use the
number of scalars instead of the number of vectors as an upper bound
for the loop saving info about DR in the hash table.  Remove unused
local variables.

gcc/testsuite/
PR tree-optimization/95961
* gcc.target/aarch64/sve/pr95961.c: New test.

gcc/testsuite/gcc.target/aarch64/sve/pr95961.c [new file with mode: 0644]
gcc/tree-vect-data-refs.c

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr95961.c b/gcc/testsuite/gcc.target/aarch64/sve/pr95961.c
new file mode 100644 (file)
index 0000000..b9802c8
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+sve -fno-vect-cost-model" } */
+
+typedef struct {
+    unsigned short mprr_2[5][16][16];
+} ImageParameters;
+int s[16][2];
+void intrapred_luma_16x16(ImageParameters *img, int s0)
+{
+  for (int j=0; j < 16; j++)
+    for (int i=0; i < 16; i++)
+      {
+       img->mprr_2[1 ][j][i]=s[j][1];
+       img->mprr_2[2 ][j][i]=s0;
+      }
+}
index eb8288e7a851b6388172725e7aaf08b3e0b41a5c..2462276e7c2d323224193b043e15a85825e9f233 100644 (file)
@@ -1722,7 +1722,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   dr_vec_info *first_store = NULL;
   dr_vec_info *dr0_info = NULL;
   struct data_reference *dr;
-  unsigned int i, j;
+  unsigned int i;
   bool do_peeling = false;
   bool do_versioning = false;
   unsigned int npeel = 0;
@@ -1730,9 +1730,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   bool one_misalignment_unknown = false;
   bool one_dr_unsupportable = false;
   dr_vec_info *unsupportable_dr_info = NULL;
-  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  unsigned possible_npeel_number = 1;
-  tree vectype;
   unsigned int mis, same_align_drs_max = 0;
   hash_table<peel_info_hasher> peeling_htab (1);
 
@@ -1792,7 +1789,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
              bool negative = tree_int_cst_compare (DR_STEP (dr),
                                                    size_zero_node) < 0;
 
-             vectype = STMT_VINFO_VECTYPE (stmt_info);
              /* If known_alignment_for_access_p then we have set
                 DR_MISALIGNMENT which is only done if we know it at compiler
                 time, so it is safe to assume target alignment is constant.
@@ -1819,22 +1815,17 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
                  vectorization factor.
                  We do this automatically for cost model, since we calculate
                 cost for every peeling option.  */
+             poly_uint64 nscalars = npeel_tmp;
               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
                {
-                 poly_uint64 nscalars = (STMT_SLP_TYPE (stmt_info)
-                                         ? vf * DR_GROUP_SIZE (stmt_info) : vf);
-                 possible_npeel_number
-                   = vect_get_num_vectors (nscalars, vectype);
-
-                 /* NPEEL_TMP is 0 when there is no misalignment, but also
-                    allow peeling NELEMENTS.  */
-                 if (DR_MISALIGNMENT (dr_info) == 0)
-                   possible_npeel_number++;
+                 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+                 nscalars = (STMT_SLP_TYPE (stmt_info)
+                             ? vf * DR_GROUP_SIZE (stmt_info) : vf);
                }
 
              /* Save info about DR in the hash table.  Also include peeling
                 amounts according to the explanation above.  */
-              for (j = 0; j < possible_npeel_number; j++)
+             while (known_le (npeel_tmp, nscalars))
                 {
                   vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
                                            dr_info, npeel_tmp);
@@ -2059,8 +2050,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   if (do_peeling)
     {
       stmt_vec_info stmt_info = dr0_info->stmt;
-      vectype = STMT_VINFO_VECTYPE (stmt_info);
-
       if (known_alignment_for_access_p (dr0_info))
         {
          bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),