tree-optimization/95539 - fix SLP_TREE_REPRESENTATIVE vs. dr_info
authorRichard Biener <rguenther@suse.de>
Fri, 5 Jun 2020 08:13:27 +0000 (10:13 +0200)
committerRichard Biener <rguenther@suse.de>
Fri, 5 Jun 2020 09:45:43 +0000 (11:45 +0200)
This fixes a disconnect between the stmt_info used for dr_info
analysis and the one in SLP_TREE_REPRESENTATIVE with a temporary
workaround.

2020-06-05  Richard Biener  <rguenther@suse.de>

PR tree-optimization/95539
* tree-vect-data-refs.c
(vect_slp_analyze_and_verify_instance_alignment): Use
SLP_TREE_REPRESENTATIVE for the data-ref check.
* tree-vect-stmts.c (vectorizable_load): Reset stmt_info
back to the first scalar stmt rather than the
SLP_TREE_REPRESENTATIVE to match previous behavior.

* gcc.dg/vect/pr95539.c: New testcase.

gcc/testsuite/gcc.dg/vect/pr95539.c [new file with mode: 0644]
gcc/tree-vect-data-refs.c
gcc/tree-vect-stmts.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr95539.c b/gcc/testsuite/gcc.dg/vect/pr95539.c
new file mode 100644 (file)
index 0000000..de3b393
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+
+typedef unsigned short uint16_t;
+typedef short __v8hi __attribute__ ((__vector_size__ (16)));
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__))
+_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
+              short __q3, short __q2, short __q1, short __q0)
+{
+  return __extension__ (__m128i)(__v8hi){
+      __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
+}
+void gcm_HashMult_hw(__m128i *x, const unsigned char *buf, unsigned int count)
+{
+  unsigned i;
+  __m128i bin __attribute__((aligned(16)));
+  for (i = 0; i < count; i++, buf += 16)
+    {
+      bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1],
+                         ((uint16_t)buf[2] << 8) | buf[3],
+                         ((uint16_t)buf[4] << 8) | buf[5],
+                         ((uint16_t)buf[6] << 8) | buf[7],
+                         ((uint16_t)buf[8] << 8) | buf[9],
+                         ((uint16_t)buf[10] << 8) | buf[11],
+                         ((uint16_t)buf[12] << 8) | buf[13],
+                         ((uint16_t)buf[14] << 8) | buf[15]);
+      *(x++) = bin;
+    }
+}
index b950aa9e50d1b5be71d48480b17f85522736051f..fe543606a52608828058d198c018e5489ce23798 100644 (file)
@@ -2471,7 +2471,7 @@ vect_slp_analyze_and_verify_instance_alignment (vec_info *vinfo,
       return false;
 
   node = SLP_INSTANCE_TREE (instance);
-  if (STMT_VINFO_DATA_REF (SLP_TREE_SCALAR_STMTS (node)[0])
+  if (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node))
       && ! vect_slp_analyze_and_verify_node_alignment
             (vinfo, SLP_INSTANCE_TREE (instance)))
     return false;
index c0be6ef502c5f2c7220a648f2409eb8b16f7eddf..b24b0fe43040c690ecced3fdbb0ed0b678ff0915 100644 (file)
@@ -8661,6 +8661,20 @@ vectorizable_load (vec_info *vinfo,
       && ! vec_stmt)
     return false;
 
+  if (!STMT_VINFO_DATA_REF (stmt_info))
+    return false;
+
+  /* ???  Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
+     for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
+     which can be different when reduction chains were re-ordered.
+     Now that we figured we're a dataref reset stmt_info back to
+     SLP_TREE_SCALAR_STMTS[0].  When we're SLP only things should be
+     refactored in a way to maintain the dr_vec_info pointer for the
+     relevant access explicitely.  */
+  stmt_vec_info orig_stmt_info = stmt_info;
+  if (slp_node)
+    stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+
   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
     {
@@ -8703,9 +8717,6 @@ vectorizable_load (vec_info *vinfo,
        }
     }
 
-  if (!STMT_VINFO_DATA_REF (stmt_info))
-    return false;
-
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
 
@@ -8876,7 +8887,7 @@ vectorizable_load (vec_info *vinfo,
        check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
                                  memory_access_type, &gs_info, mask);
 
-      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
+      STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
       vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
                            slp_node, cost_vec);
       return true;