&& (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
return false;
- if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
+ /* Expand memcpy for constant length operands without a loop if it
+ is shorter that way.
+
+ With a constant length argument a
+ memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
+ if (GET_CODE (len) == CONST_INT
+ && INTVAL (len) >= 0
+ && INTVAL (len) <= 256 * 6
+ && (!TARGET_MVCLE || INTVAL (len) <= 256))
{
- if (INTVAL (len) > 0)
- emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
+ HOST_WIDE_INT o, l;
+
+ for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
+ {
+ rtx newdst = adjust_address (dst, BLKmode, o);
+ rtx newsrc = adjust_address (src, BLKmode, o);
+ emit_insn (gen_movmem_short (newdst, newsrc,
+ GEN_INT (l > 256 ? 255 : l - 1)));
+ }
}
else if (TARGET_MVCLE)
--- /dev/null
+/* Make sure that short memcpy's with constant length are emitted
+ without loop statements. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch" } */
+
+/* 3 MVCs */
+void
+*memcpy1(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 700);
+}
+
+/* NOP */
+void
+*memcpy2(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 0);
+}
+
+/* 1 MVC */
+void
+*memcpy3(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 256);
+}
+
+/* 2 MVCs */
+void
+*memcpy4(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 512);
+}
+
+/* 3 MVCs */
+void
+*memcpy5(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 768);
+}
+
+/* Loop with 2 MVCs */
+void
+*memcpy6(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 1537);
+}
+
+/* memcpy6 uses a loop - check for the two load address instructions
+ used to increment src and dest. */
+/* { dg-final { scan-assembler-times "la" 2 } } */
+
+/* { dg-final { scan-assembler-times "mvc" 11 } } */