+2015-03-23 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/65518
+ * tree-vect-stmts.c (vectorizable_load): Reject single-element
+ interleaving cases we generate absymal code for.
+
2015-03-23 Richard Biener <rguenther@suse.de>
PR tree-optimization/65494
+2015-03-23 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/65518
+ * gcc.dg/vect/pr65518.c: New testcase.
+
2015-03-23 Richard Biener <rguenther@suse.de>
PR tree-optimization/65494
--- /dev/null
+/* { dg-do run } */\r
+\r
+extern void abort (void);\r
+\r
+typedef struct giga\r
+{\r
+ unsigned int g[16];\r
+} giga;\r
+\r
+unsigned long __attribute__((noinline,noclone))\r
+addfst(giga const *gptr, int num)\r
+{\r
+ unsigned int retval = 0;\r
+ int i;\r
+ for (i = 0; i < num; i++)\r
+ retval += gptr[i].g[0];\r
+ return retval;\r
+}\r
+\r
+int main ()\r
+{\r
+ struct giga g[8];\r
+ unsigned int n = 1;\r
+ int i, j;\r
+ for (i = 0; i < 8; ++i)\r
+ for (j = 0; j < 16; ++j)\r
+ {\r
+ g[i].g[j] = n++;\r
+ __asm__ volatile ("");\r
+ }\r
+ if (addfst (g, 8) != 456)\r
+ abort ();\r
+ return 0;\r
+}\r
+\r
+/* We don't want to vectorize the single-element interleaving in the way\r
+ we currently do that (without ignoring not needed vectors in the\r
+ gap between gptr[0].g[0] and gptr[1].g[0]), because that's very\r
+ sub-optimal and causes memory explosion (even though the cost model\r
+ should reject that in the end). */\r
+\r
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops in function" 2 "vect" } } */\r
+/* { dg-final { cleanup-tree-dump "vect" } } */\r
gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
+
+ /* If this is single-element interleaving with an element distance
+ that leaves unused vector loads around punt - we at least create
+ very sub-optimal code in that case (and blow up memory,
+ see PR65518). */
+ if (first_stmt == stmt
+ && !GROUP_NEXT_ELEMENT (stmt_info)
+ && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "single-element interleaving not supported "
+ "for not adjacent vector loads\n");
+ return false;
+ }
+
if (!slp && !PURE_SLP_STMT (stmt_info))
{
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));