void
s390_expand_setmem (rtx dst, rtx len, rtx val)
{
+ const int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
+
if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
return;
{
rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
rtx_code_label *loop_start_label = gen_label_rtx ();
- rtx_code_label *loop_end_label = gen_label_rtx ();
- rtx_code_label *end_label = gen_label_rtx ();
+ rtx_code_label *onebyte_end_label = gen_label_rtx ();
+ rtx_code_label *zerobyte_end_label = gen_label_rtx ();
+ rtx_code_label *restbyte_end_label = gen_label_rtx ();
machine_mode mode;
mode = GET_MODE (len);
if (mode == VOIDmode)
- mode = Pmode;
+ mode = Pmode;
dst_addr = gen_reg_rtx (Pmode);
count = gen_reg_rtx (mode);
convert_move (count, len, 1);
emit_cmp_and_jump_insns (count, const0_rtx,
- EQ, NULL_RTX, mode, 1, end_label);
+ EQ, NULL_RTX, mode, 1, zerobyte_end_label,
+ very_unlikely);
+ /* We need to make a copy of the target address since memset is
+ supposed to return it unmodified. We have to make it here
+ already since the new reg is used at onebyte_end_label. */
emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
dst = change_address (dst, VOIDmode, dst_addr);
- if (val == const0_rtx)
- temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
- OPTAB_DIRECT);
- else
+ if (val != const0_rtx)
{
- dstp1 = adjust_address (dst, VOIDmode, 1);
+ /* When using the overlapping mvc the original target
+ address is only accessed as single byte entity (even by
+ the mvc reading this value). */
set_mem_size (dst, 1);
-
- /* Initialize memory by storing the first byte. */
- emit_move_insn (adjust_address (dst, QImode, 0), val);
-
- /* If count is 1 we are done. */
- emit_cmp_and_jump_insns (count, const1_rtx,
- EQ, NULL_RTX, mode, 1, end_label);
-
- temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
- OPTAB_DIRECT);
- }
+ dstp1 = adjust_address (dst, VOIDmode, 1);
+ emit_cmp_and_jump_insns (count,
+ const1_rtx, EQ, NULL_RTX, mode, 1,
+ onebyte_end_label, very_unlikely);
+ }
+
+ /* There is one unconditional (mvi+mvc)/xc after the loop
+ dealing with the rest of the bytes, subtracting two (mvi+mvc)
+ or one (xc) here leaves this number of bytes to be handled by
+ it. */
+ temp = expand_binop (mode, add_optab, count,
+ val == const0_rtx ? constm1_rtx : GEN_INT (-2),
+ count, 1, OPTAB_DIRECT);
if (temp != count)
- emit_move_insn (count, temp);
+ emit_move_insn (count, temp);
temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
OPTAB_DIRECT);
if (temp != blocks)
- emit_move_insn (blocks, temp);
+ emit_move_insn (blocks, temp);
emit_cmp_and_jump_insns (blocks, const0_rtx,
- EQ, NULL_RTX, mode, 1, loop_end_label);
+ EQ, NULL_RTX, mode, 1, restbyte_end_label);
+
+ emit_jump (loop_start_label);
+
+ if (val != const0_rtx)
+ {
+ /* The 1 byte != 0 special case. Not handled efficiently
+ since we require two jumps for that. However, this
+ should be very rare. */
+ emit_label (onebyte_end_label);
+ emit_move_insn (adjust_address (dst, QImode, 0), val);
+ emit_jump (zerobyte_end_label);
+ }
emit_label (loop_start_label);
if (val == const0_rtx)
emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
else
- emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
+ {
+ /* Set the first byte in the block to the value and use an
+ overlapping mvc for the block. */
+ emit_move_insn (adjust_address (dst, QImode, 0), val);
+ emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
+ }
s390_load_address (dst_addr,
gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
OPTAB_DIRECT);
if (temp != blocks)
- emit_move_insn (blocks, temp);
+ emit_move_insn (blocks, temp);
emit_cmp_and_jump_insns (blocks, const0_rtx,
- EQ, NULL_RTX, mode, 1, loop_end_label);
+ NE, NULL_RTX, mode, 1, loop_start_label);
- emit_jump (loop_start_label);
- emit_label (loop_end_label);
+ emit_label (restbyte_end_label);
if (val == const0_rtx)
- emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
+ emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
else
- emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
- emit_label (end_label);
+ {
+ /* Set the first byte in the block to the value and use an
+ overlapping mvc for the block. */
+ emit_move_insn (adjust_address (dst, QImode, 0), val);
+ /* execute only uses the lowest 8 bits of count that's
+ exactly what we need here. */
+ emit_insn (gen_movmem_short (dstp1, dst,
+ convert_to_mode (Pmode, count, 1)));
+ }
+
+ emit_label (zerobyte_end_label);
}
}