+2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var.
+ (rs6000_init_cost): Initialize rs6000_vect_nonmem.
+ (rs6000_add_stmt_cost): Update rs6000_vect_nonmem.
+ (rs6000_finish_cost): Avoid vectorizing simple copy loops with
+ VF=2 that require versioning.
+
2017-05-05 David Malcolm <dmalcolm@redhat.com>
* diagnostic.h (CARET_LINE_MARGIN): Convert from macro to const
/* Implement targetm.vectorize.init_cost. */
+/* For each vectorized loop, this var holds TRUE iff a non-memory vector
+ instruction is needed by the vectorization. */
+static bool rs6000_vect_nonmem;
+
static void *
rs6000_init_cost (struct loop *loop_info)
{
data->cost[vect_prologue] = 0;
data->cost[vect_body] = 0;
data->cost[vect_epilogue] = 0;
+ rs6000_vect_nonmem = false;
return data;
}
retval = (unsigned) (count * stmt_cost);
cost_data->cost[where] += retval;
+
+ /* Check whether we're doing something other than just a copy loop.
+ Not all such loops may be profitably vectorized; see
+ rs6000_finish_cost. */
+ if ((kind == vec_to_scalar || kind == vec_perm
+ || kind == vec_promote_demote || kind == vec_construct
+ || kind == scalar_to_vec)
+ || (where == vect_body && kind == vector_stmt))
+ rs6000_vect_nonmem = true;
}
return retval;
if (cost_data->loop_info)
rs6000_density_test (cost_data);
+ /* Don't vectorize minimum-vectorization-factor, simple copy loops
+ that require versioning for any reason. The vectorization is at
+ best a wash inside the loop, and the versioning checks make
+ profitability highly unlikely and potentially quite harmful. */
+ if (cost_data->loop_info)
+ {
+ loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
+ if (!rs6000_vect_nonmem
+ && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
+ && LOOP_REQUIRES_VERSIONING (vec_info))
+ cost_data->cost[vect_body] += 10000;
+ }
+
*prologue_cost = cost_data->cost[vect_prologue];
*body_cost = cost_data->cost[vect_body];
*epilogue_cost = cost_data->cost[vect_epilogue];
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+
+/* Verify that a pure copy loop with a vectorization factor of two
+ that requires alignment will not be vectorized. See the cost
+ model hooks in rs6000.c. */
+
+typedef long unsigned int size_t;
+typedef unsigned char uint8_t;
+
+extern void *memcpy (void *__restrict __dest, const void *__restrict __src,
+ size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2)));
+
+void foo (void *dstPtr, const void *srcPtr, void *dstEnd)
+{
+ uint8_t *d = (uint8_t*)dstPtr;
+ const uint8_t *s = (const uint8_t*)srcPtr;
+ uint8_t* const e = (uint8_t*)dstEnd;
+
+ do
+ {
+ memcpy (d, s, 8);
+ d += 8;
+ s += 8;
+ }
+ while (d < e);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */