static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
+static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
+static rtx ix86_zero_extend_to_Pmode PARAMS ((rtx));
+static rtx ix86_expand_aligntest PARAMS ((rtx, int));
+static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
struct ix86_address
{
}
}
+/* Helper function for the string operations bellow. Dest VARIABLE whether
+ it is aligned to VALUE bytes. If true, jump to the label. */
+static rtx
+ix86_expand_aligntest (variable, value)
+ rtx variable;
+ int value;
+{
+ rtx label = gen_label_rtx ();
+ rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
+ if (GET_MODE (variable) == DImode)
+ emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
+ else
+ emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
+ emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
+ 1, 0, label);
+ return label;
+}
+
+/* Adjust COUNTER by the VALUE. */
+static void
+ix86_adjust_counter (countreg, value)
+ rtx countreg;
+ HOST_WIDE_INT value;
+{
+ if (GET_MODE (countreg) == DImode)
+ emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
+ else
+ emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
+}
+
+/* Zero extend possibly SImode EXP to Pmode register. */
+static rtx
+ix86_zero_extend_to_Pmode (exp)
+ rtx exp;
+{
+ rtx r;
+ if (GET_MODE (exp) == VOIDmode)
+ return force_reg (Pmode, exp);
+ if (GET_MODE (exp) == Pmode)
+ return copy_to_mode_reg (Pmode, exp);
+ r = gen_reg_rtx (Pmode);
+ emit_insn (gen_zero_extendsidi2 (r, exp));
+ return r;
+}
+
+/* Expand string move (memcpy) operation. Use i386 string operations when
+ profitable. expand_clrstr contains similar code. */
+int
+ix86_expand_movstr (dst, src, count_exp, align_exp)
+ rtx dst, src, count_exp, align_exp;
+{
+ rtx srcreg, destreg, countreg;
+ enum machine_mode counter_mode;
+ HOST_WIDE_INT align = 0;
+ unsigned HOST_WIDE_INT count = 0;
+ rtx insns;
+
+ start_sequence ();
+
+ if (GET_CODE (align_exp) == CONST_INT)
+ align = INTVAL (align_exp);
+
+ /* This simple hack avoids all inlining code and simplifies code bellow. */
+ if (!TARGET_ALIGN_STRINGOPS)
+ align = 64;
+
+ if (GET_CODE (count_exp) == CONST_INT)
+ count = INTVAL (count_exp);
+
+ /* Figure out proper mode for counter. For 32bits it is always SImode,
+ for 64bits use SImode when possible, otherwise DImode.
+ Set count to number of bytes copied when known at compile time. */
+ if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
+ || x86_64_zero_extended_value (count_exp))
+ counter_mode = SImode;
+ else
+ counter_mode = DImode;
+
+ if (counter_mode != SImode && counter_mode != DImode)
+ abort ();
+
+ destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+ srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+
+ emit_insn (gen_cld ());
+
+ /* When optimizing for size emit simple rep ; movsb instruction for
+ counts not divisible by 4. */
+
+ if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
+ {
+ countreg = ix86_zero_extend_to_Pmode (count_exp);
+ if (TARGET_64BIT)
+ emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
+ destreg, srcreg, countreg));
+ else
+ emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
+ destreg, srcreg, countreg));
+ }
+
+ /* For constant aligned (or small unaligned) copies use rep movsl
+ followed by code copying the rest. For PentiumPro ensure 8 byte
+ alignment to allow rep movsl acceleration. */
+
+ else if (count != 0
+ && (align >= 8
+ || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
+ || optimize_size || count < (unsigned int)64))
+ {
+ int size = TARGET_64BIT && !optimize_size ? 8 : 4;
+ if (count & ~(size - 1))
+ {
+ countreg = copy_to_mode_reg (counter_mode,
+ GEN_INT ((count >> (size == 4 ? 2 : 3))
+ & (TARGET_64BIT ? -1 : 0x3fffffff)));
+ countreg = ix86_zero_extend_to_Pmode (countreg);
+ if (size == 4)
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
+ destreg, srcreg, countreg));
+ else
+ emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
+ destreg, srcreg, countreg));
+ }
+ else
+ emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
+ destreg, srcreg, countreg));
+ }
+ if (size == 8 && (count & 0x04))
+ emit_insn (gen_strmovsi (destreg, srcreg));
+ if (count & 0x02)
+ emit_insn (gen_strmovhi (destreg, srcreg));
+ if (count & 0x01)
+ emit_insn (gen_strmovqi (destreg, srcreg));
+ }
+ /* The generic code based on the glibc implementation:
+ - align destination to 4 bytes (8 byte alignment is used for PentiumPro
+ allowing accelerated copying there)
+ - copy the data using rep movsl
+ - copy the rest. */
+ else
+ {
+ rtx countreg2;
+ rtx label = NULL;
+
+ /* In case we don't know anything about the alignment, default to
+ library version, since it is usually equally fast and result in
+ shorter code. */
+ if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
+ {
+ end_sequence ();
+ return 0;
+ }
+
+ if (TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+
+ countreg2 = gen_reg_rtx (Pmode);
+ countreg = copy_to_mode_reg (counter_mode, count_exp);
+
+ /* We don't use loops to align destination and to copy parts smaller
+ than 4 bytes, because gcc is able to optimize such code better (in
+ the case the destination or the count really is aligned, gcc is often
+ able to predict the branches) and also it is friendlier to the
+ hardware branch prediction.
+
+ Using loops is benefical for generic case, because we can
+ handle small counts using the loops. Many CPUs (such as Athlon)
+ have large REP prefix setup costs.
+
+ This is quite costy. Maybe we can revisit this decision later or
+ add some customizability to this code. */
+
+ if (count == 0
+ && align < (TARGET_PENTIUMPRO && (count == 0
+ || count >= (unsigned int)260)
+ ? 8 : UNITS_PER_WORD))
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
+ LEU, 0, counter_mode, 1, 0, label);
+ }
+ if (align <= 1)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 1);
+ emit_insn (gen_strmovqi (destreg, srcreg));
+ ix86_adjust_counter (countreg, 1);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 2)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ emit_insn (gen_strmovhi (destreg, srcreg));
+ ix86_adjust_counter (countreg, 2);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 4
+ && ((TARGET_PENTIUMPRO && (count == 0
+ || count >= (unsigned int)260))
+ || TARGET_64BIT))
+ {
+ rtx label = ix86_expand_aligntest (destreg, 4);
+ emit_insn (gen_strmovsi (destreg, srcreg));
+ ix86_adjust_counter (countreg, 4);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (!TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
+ GEN_INT (3)));
+ emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
+ destreg, srcreg, countreg2));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
+ emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
+ destreg, srcreg, countreg2));
+ }
+
+ if (label)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
+ emit_insn (gen_strmovsi (destreg, srcreg));
+ if ((align <= 4 || count == 0) && TARGET_64BIT)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 4);
+ emit_insn (gen_strmovsi (destreg, srcreg));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 2 && count != 0 && (count & 2))
+ emit_insn (gen_strmovhi (destreg, srcreg));
+ if (align <= 2 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 2);
+ emit_insn (gen_strmovhi (destreg, srcreg));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 1 && count != 0 && (count & 1))
+ emit_insn (gen_strmovqi (destreg, srcreg));
+ if (align <= 1 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 1);
+ emit_insn (gen_strmovqi (destreg, srcreg));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ }
+
+ insns = get_insns ();
+ end_sequence ();
+
+ ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
+ emit_insns (insns);
+ return 1;
+}
+
+/* Expand string clear operation (bzero). Use i386 string operations when
+ profitable. expand_movstr contains similar code. */
+int
+ix86_expand_clrstr (src, count_exp, align_exp)
+ rtx src, count_exp, align_exp;
+{
+ rtx destreg, zeroreg, countreg;
+ enum machine_mode counter_mode;
+ HOST_WIDE_INT align = 0;
+ unsigned HOST_WIDE_INT count = 0;
+
+ if (GET_CODE (align_exp) == CONST_INT)
+ align = INTVAL (align_exp);
+
+ /* This simple hack avoids all inlining code and simplifies code bellow. */
+ if (!TARGET_ALIGN_STRINGOPS)
+ align = 32;
+
+ if (GET_CODE (count_exp) == CONST_INT)
+ count = INTVAL (count_exp);
+ /* Figure out proper mode for counter. For 32bits it is always SImode,
+ for 64bits use SImode when possible, otherwise DImode.
+ Set count to number of bytes copied when known at compile time. */
+ if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
+ || x86_64_zero_extended_value (count_exp))
+ counter_mode = SImode;
+ else
+ counter_mode = DImode;
+
+ destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+
+ emit_insn (gen_cld ());
+
+ /* When optimizing for size emit simple rep ; movsb instruction for
+ counts not divisible by 4. */
+
+ if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
+ {
+ countreg = ix86_zero_extend_to_Pmode (count_exp);
+ zeroreg = copy_to_mode_reg (QImode, const0_rtx);
+ if (TARGET_64BIT)
+ emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
+ destreg, countreg));
+ else
+ emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
+ destreg, countreg));
+ }
+ else if (count != 0
+ && (align >= 8
+ || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
+ || optimize_size || count < (unsigned int)64))
+ {
+ int size = TARGET_64BIT && !optimize_size ? 8 : 4;
+ zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
+ if (count & ~(size - 1))
+ {
+ countreg = copy_to_mode_reg (counter_mode,
+ GEN_INT ((count >> (size == 4 ? 2 : 3))
+ & (TARGET_64BIT ? -1 : 0x3fffffff)));
+ countreg = ix86_zero_extend_to_Pmode (countreg);
+ if (size == 4)
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
+ destreg, countreg));
+ else
+ emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
+ destreg, countreg));
+ }
+ else
+ emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
+ destreg, countreg));
+ }
+ if (size == 8 && (count & 0x04))
+ emit_insn (gen_strsetsi (destreg,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ if (count & 0x02)
+ emit_insn (gen_strsethi (destreg,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ if (count & 0x01)
+ emit_insn (gen_strsetqi (destreg,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ }
+ else
+ {
+ rtx countreg2;
+ rtx label = NULL;
+
+ /* In case we don't know anything about the alignment, default to
+ library version, since it is usually equally fast and result in
+ shorter code. */
+ if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
+ return 0;
+
+ if (TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+
+ countreg2 = gen_reg_rtx (Pmode);
+ countreg = copy_to_mode_reg (counter_mode, count_exp);
+ zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
+
+ if (count == 0
+ && align < (TARGET_PENTIUMPRO && (count == 0
+ || count >= (unsigned int)260)
+ ? 8 : UNITS_PER_WORD))
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
+ LEU, 0, counter_mode, 1, 0, label);
+ }
+ if (align <= 1)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 1);
+ emit_insn (gen_strsetqi (destreg,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ ix86_adjust_counter (countreg, 1);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 2)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ emit_insn (gen_strsethi (destreg,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ ix86_adjust_counter (countreg, 2);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
+ || count >= (unsigned int)260))
+ {
+ rtx label = ix86_expand_aligntest (destreg, 4);
+ emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
+ ? gen_rtx_SUBREG (SImode, zeroreg, 0)
+ : zeroreg)));
+ ix86_adjust_counter (countreg, 4);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (!TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
+ GEN_INT (3)));
+ emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
+ destreg, countreg2));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
+ emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
+ destreg, countreg2));
+ }
+
+ if (label)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
+ emit_insn (gen_strsetsi (destreg,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ if (TARGET_64BIT && (align <= 4 || count == 0))
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ emit_insn (gen_strsetsi (destreg,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 2 && count != 0 && (count & 2))
+ emit_insn (gen_strsethi (destreg,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ if (align <= 2 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ emit_insn (gen_strsethi (destreg,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 1 && count != 0 && (count & 1))
+ emit_insn (gen_strsetqi (destreg,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ if (align <= 1 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 1);
+ emit_insn (gen_strsetqi (destreg,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ }
+ return 1;
+}
+/* Expand strlen. */
+int
+ix86_expand_strlen (out, src, eoschar, align)
+ rtx out, src, eoschar, align;
+{
+ rtx addr, scratch1, scratch2, scratch3, scratch4;
+
+ /* The generic case of strlen expander is long. Avoid it's
+ expanding unless TARGET_INLINE_ALL_STRINGOPS. */
+
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+ && !TARGET_INLINE_ALL_STRINGOPS
+ && !optimize_size
+ && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
+ return 0;
+
+ addr = force_reg (Pmode, XEXP (src, 0));
+ scratch1 = gen_reg_rtx (Pmode);
+
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+ && !optimize_size)
+ {
+ /* Well it seems that some optimizer does not combine a call like
+ foo(strlen(bar), strlen(bar));
+ when the move and the subtraction is done here. It does calculate
+ the length just once when these instructions are done inside of
+ output_strlen_unroll(). But I think since &bar[strlen(bar)] is
+ often used and I use one fewer register for the lifetime of
+ output_strlen_unroll() this is better. */
+
+ emit_move_insn (out, addr);
+
+ ix86_expand_strlensi_unroll_1 (out, align);
+
+ /* strlensi_unroll_1 returns the address of the zero at the end of
+ the string, like memchr(), so compute the length by subtracting
+ the start address. */
+ if (TARGET_64BIT)
+ emit_insn (gen_subdi3 (out, out, addr));
+ else
+ emit_insn (gen_subsi3 (out, out, addr));
+ }
+ else
+ {
+ scratch2 = gen_reg_rtx (Pmode);
+ scratch3 = gen_reg_rtx (Pmode);
+ scratch4 = force_reg (Pmode, constm1_rtx);
+
+ emit_move_insn (scratch3, addr);
+ eoschar = force_reg (QImode, eoschar);
+
+ emit_insn (gen_cld ());
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
+ align, scratch4, scratch3));
+ emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
+ emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
+ }
+ else
+ {
+ emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
+ align, scratch4, scratch3));
+ emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
+ emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
+ }
+ }
+ return 1;
+}
+
/* Expand the appropriate insns for doing strlen if not just doing
repnz; scasb
This is just the body. It needs the initialisations mentioned above and
some address computing at the end. These things are done in i386.md. */
-void
-ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
- rtx out, align_rtx, scratch;
+static void
+ix86_expand_strlensi_unroll_1 (out, align_rtx)
+ rtx out, align_rtx;
{
int align;
rtx tmp;
rtx end_0_label = gen_label_rtx ();
rtx mem;
rtx tmpreg = gen_reg_rtx (SImode);
+ rtx scratch = gen_reg_rtx (SImode);
align = 0;
if (GET_CODE (align_rtx) == CONST_INT)
/* Is there a known alignment and is it less than 4? */
if (align < 4)
{
+ rtx scratch1 = gen_reg_rtx (Pmode);
+ emit_move_insn (scratch1, out);
/* Is there a known alignment and is it not 2? */
if (align != 2)
{
align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
/* Leave just the 3 lower bits. */
- align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
+ align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
NULL_RTX, 0, OPTAB_WIDEN);
emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
- SImode, 1, 0, align_4_label);
+ Pmode, 1, 0, align_4_label);
emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
- SImode, 1, 0, align_2_label);
+ Pmode, 1, 0, align_2_label);
emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
- SImode, 1, 0, align_3_label);
+ Pmode, 1, 0, align_3_label);
}
else
{
/* Since the alignment is 2, we have to check 2 or 0 bytes;
check if is aligned to 4 - byte. */
- align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
+ align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
NULL_RTX, 0, OPTAB_WIDEN);
emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
- SImode, 1, 0, align_4_label);
+ Pmode, 1, 0, align_4_label);
}
mem = gen_rtx_MEM (QImode, out);
QImode, 1, 0, end_0_label);
/* Increment the address. */
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
/* Not needed with an alignment of 2 */
if (align != 2)
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
QImode, 1, 0, end_0_label);
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
emit_label (align_3_label);
}
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
QImode, 1, 0, end_0_label);
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
}
/* Generate loop to check 4 bytes at a time. It is not a good idea to
mem = gen_rtx_MEM (SImode, out);
emit_move_insn (scratch, mem);
- emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
+ else
+ emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
/* This formula yields a nonzero result iff one of the bytes is zero.
This saves three branches inside loop and many cycles. */
if (TARGET_CMOVE)
{
rtx reg = gen_reg_rtx (SImode);
+ rtx reg2 = gen_reg_rtx (Pmode);
emit_move_insn (reg, tmpreg);
emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
reg,
tmpreg)));
/* Emit lea manually to avoid clobbering of flags. */
- emit_insn (gen_rtx_SET (SImode, reg,
- gen_rtx_PLUS (SImode, out, GEN_INT (2))));
+ emit_insn (gen_rtx_SET (SImode, reg2,
+ gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
emit_insn (gen_rtx_SET (VOIDmode, out,
- gen_rtx_IF_THEN_ELSE (SImode, tmp,
- reg,
- out)));
+ gen_rtx_IF_THEN_ELSE (Pmode, tmp,
+ reg2,
+ out)));
}
else
/* Not in the first two. Move two bytes forward. */
emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
- emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
+ else
+ emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
emit_label (end_2_label);
/* Avoid branch in fixing the byte. */
tmpreg = gen_lowpart (QImode, tmpreg);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
- emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
+ if (TARGET_64BIT)
+ emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
+ else
+ emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
emit_label (end_0_label);
}
""
"
{
- rtx srcreg, destreg, countreg;
- int align = 0;
- int count = -1;
- rtx insns;
-
- start_sequence ();
-
- if (GET_CODE (operands[3]) == CONST_INT)
- align = INTVAL (operands[3]);
-
- /* This simple hack avoids all inlining code and simplifies code bellow. */
- if (!TARGET_ALIGN_STRINGOPS)
- align = 32;
-
- if (GET_CODE (operands[2]) == CONST_INT)
- count = INTVAL (operands[2]);
-
- destreg = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
- srcreg = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
-
- emit_insn (gen_cld ());
-
- /* When optimizing for size emit simple rep ; movsb instruction for
- counts not divisible by 4. */
+ if (ix86_expand_movstr (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+}")
- if ((!optimize || optimize_size)
- && (count < 0 || (count & 0x03)))
- {
- countreg = copy_to_mode_reg (SImode, operands[2]);
- emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
- destreg, srcreg, countreg));
- }
+(define_expand "movstrdi"
+ [(use (match_operand:BLK 0 "memory_operand" ""))
+ (use (match_operand:BLK 1 "memory_operand" ""))
+ (use (match_operand:DI 2 "nonmemory_operand" ""))
+ (use (match_operand:DI 3 "const_int_operand" ""))]
+ "TARGET_64BIT"
+ "
+{
+ if (ix86_expand_movstr (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+}")
- /* For constant aligned (or small unaligned) copies use rep movsl
- followed by code copying the rest. For PentiumPro ensure 8 byte
- alignment to allow rep movsl acceleration. */
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
- else if (count >= 0
- && (align >= 8
- || (!TARGET_PENTIUMPRO && align >= 4)
- || optimize_size || count < 64))
- {
- if (count & ~0x03)
- {
- countreg = copy_to_mode_reg (SImode,
- GEN_INT ((count >> 2)
- & 0x3fffffff));
- emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
- destreg, srcreg, countreg));
- }
- if (count & 0x02)
- emit_insn (gen_strmovhi (destreg, srcreg));
- if (count & 0x01)
- emit_insn (gen_strmovqi (destreg, srcreg));
- }
- /* The generic code based on the glibc implementation:
- - align destination to 4 bytes (8 byte alignment is used for PentiumPro
- allowing accelerated copying there)
- - copy the data using rep movsl
- - copy the rest. */
- else
+(define_expand "strmovdi_rex64"
+ [(set (match_dup 2)
+ (mem:DI (match_operand:DI 1 "register_operand" "")))
+ (set (mem:DI (match_operand:DI 0 "register_operand" ""))
+ (match_dup 2))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8)))
+ (clobber (reg:CC 17))])
+ (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 8)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+{
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
{
- rtx countreg2;
- rtx label = NULL;
-
- /* In case we don't know anything about the alignment, default to
- library version, since it is usually equally fast and result in
- shorter code. */
- if (!TARGET_INLINE_ALL_STRINGOPS && align < 4)
- {
- end_sequence ();
- FAIL;
- }
-
- if (TARGET_SINGLE_STRINGOP)
- emit_insn (gen_cld ());
-
- countreg2 = gen_reg_rtx (SImode);
- countreg = copy_to_mode_reg (SImode, operands[2]);
-
- /* We don't use loops to align destination and to copy parts smaller
- than 4 bytes, because gcc is able to optimize such code better (in
- the case the destination or the count really is aligned, gcc is often
- able to predict the branches) and also it is friendlier to the
- hardware branch prediction.
-
- Using loops is benefical for generic case, because we can
- handle small counts using the loops. Many CPUs (such as Athlon)
- have large REP prefix setup costs.
-
- This is quite costy. Maybe we can revisit this decision later or
- add some customizability to this code. */
-
- if (count < 0
- && align < (TARGET_PENTIUMPRO && (count < 0 || count >= 260) ? 8 : 4))
- {
- label = gen_label_rtx ();
- emit_cmp_and_jump_insns (countreg, GEN_INT (3),
- LEU, 0, SImode, 1, 0, label);
- }
- if (align <= 1)
- {
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (SImode);
- emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (1)));
- emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
- SImode, 1, 0, label);
- emit_insn (gen_strmovqi (destreg, srcreg));
- emit_insn (gen_addsi3 (countreg, countreg, constm1_rtx));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align <= 2)
- {
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (SImode);
- emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (2)));
- emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
- SImode, 1, 0, label);
- emit_insn (gen_strmovhi (destreg, srcreg));
- emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-2)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align <= 4 && TARGET_PENTIUMPRO && (count < 1 || count >= 260))
- {
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (SImode);
- emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (4)));
- emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
- SImode, 1, 0, label);
- emit_insn (gen_strmovsi (destreg, srcreg));
- emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-4)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
-
- if (!TARGET_SINGLE_STRINGOP)
- emit_insn (gen_cld());
- emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
- emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
- destreg, srcreg, countreg2));
-
- if (label)
- {
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align > 2 && count > 0 && (count & 2))
- emit_insn (gen_strmovhi (destreg, srcreg));
- if (align <= 2 || count < 0)
- {
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (SImode);
- emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (2)));
- emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
- SImode, 1, 0, label);
- emit_insn (gen_strmovhi (destreg, srcreg));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align > 1 && count > 0 && (count & 1))
- emit_insn (gen_strmovsi (destreg, srcreg));
- if (align <= 1 || count < 0)
- {
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (SImode);
- emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (1)));
- emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
- SImode, 1, 0, label);
- emit_insn (gen_strmovqi (destreg, srcreg));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
+ emit_insn (gen_strmovdi_rex_1 (operands[0], operands[1], operands[0],
+ operands[1]));
+ DONE;
}
-
- insns = get_insns ();
- end_sequence ();
-
- ix86_set_move_mem_attrs (insns, operands[0], operands[1], destreg, srcreg);
- emit_insns (insns);
- DONE;
+ else
+ operands[2] = gen_reg_rtx (DImode);
}")
-;; Most CPUs don't like single string operations
-;; Handle this case here to simplify previous expander.
(define_expand "strmovsi"
[(set (match_dup 2)
""
"
{
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_strmovsi_rex64 (operands[0], operands[1]));
+ DONE;
+ }
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strmovsi_1 (operands[0], operands[1], operands[0],
operands[2] = gen_reg_rtx (SImode);
}")
+(define_expand "strmovsi_rex64"
+ [(set (match_dup 2)
+ (mem:SI (match_operand:DI 1 "register_operand" "")))
+ (set (mem:SI (match_operand:DI 0 "register_operand" ""))
+ (match_dup 2))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4)))
+ (clobber (reg:CC 17))])
+ (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 4)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+{
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strmovsi_rex_1 (operands[0], operands[1], operands[0],
+ operands[1]));
+ DONE;
+ }
+ else
+ operands[2] = gen_reg_rtx (SImode);
+}")
+
(define_expand "strmovhi"
[(set (match_dup 2)
(mem:HI (match_operand:SI 1 "register_operand" "")))
""
"
{
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_strmovhi_rex64 (operands[0], operands[1]));
+ DONE;
+ }
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strmovhi_1 (operands[0], operands[1], operands[0],
operands[2] = gen_reg_rtx (HImode);
}")
+(define_expand "strmovhi_rex64"
+ [(set (match_dup 2)
+ (mem:HI (match_operand:DI 1 "register_operand" "")))
+ (set (mem:HI (match_operand:DI 0 "register_operand" ""))
+ (match_dup 2))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2)))
+ (clobber (reg:CC 17))])
+ (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 2)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+{
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strmovhi_rex_1 (operands[0], operands[1], operands[0],
+ operands[1]));
+ DONE;
+ }
+ else
+ operands[2] = gen_reg_rtx (HImode);
+}")
+
(define_expand "strmovqi"
[(set (match_dup 2)
(mem:QI (match_operand:SI 1 "register_operand" "")))
""
"
{
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_strmovqi_rex64 (operands[0], operands[1]));
+ DONE;
+ }
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strmovqi_1 (operands[0], operands[1], operands[0],
operands[2] = gen_reg_rtx (QImode);
}")
+(define_expand "strmovqi_rex64"
+ [(set (match_dup 2)
+ (mem:QI (match_operand:DI 1 "register_operand" "")))
+ (set (mem:QI (match_operand:DI 0 "register_operand" ""))
+ (match_dup 2))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
+ (clobber (reg:CC 17))])
+ (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 1)))
+ (clobber (reg:CC 17))])]
+ "!TARGET_64BIT"
+ "
+{
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strmovqi_rex_1 (operands[0], operands[1], operands[0],
+ operands[1]));
+ DONE;
+ }
+ else
+ operands[2] = gen_reg_rtx (QImode);
+}")
+
+(define_insn "strmovdi_rex_1"
+ [(set (mem:DI (match_operand:DI 2 "register_operand" "0"))
+ (mem:DI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 8)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 8)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsq"
+ [(set_attr "type" "str")
+ (set_attr "mode" "DI")
+ (set_attr "memory" "both")])
+
(define_insn "strmovsi_1"
[(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
(mem:SI (match_operand:SI 3 "register_operand" "1")))
(plus:SI (match_dup 3)
(const_int 4)))
(use (reg:SI 19))]
- "TARGET_SINGLE_STRINGOP || optimize_size"
- "movsl"
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsl|movsd"
+ [(set_attr "type" "str")
+ (set_attr "mode" "SI")
+ (set_attr "memory" "both")])
+
+(define_insn "strmovsi_rex_1"
+ [(set (mem:SI (match_operand:DI 2 "register_operand" "0"))
+ (mem:SI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 4)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 4)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsl|movsd"
[(set_attr "type" "str")
(set_attr "mode" "SI")
(set_attr "memory" "both")])
(plus:SI (match_dup 3)
(const_int 2)))
(use (reg:SI 19))]
- "TARGET_SINGLE_STRINGOP || optimize_size"
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsw"
+ [(set_attr "type" "str")
+ (set_attr "memory" "both")
+ (set_attr "mode" "HI")])
+
+(define_insn "strmovhi_rex_1"
+ [(set (mem:HI (match_operand:DI 2 "register_operand" "0"))
+ (mem:HI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 2)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 2)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
"movsw"
[(set_attr "type" "str")
(set_attr "memory" "both")
(plus:SI (match_dup 3)
(const_int 1)))
(use (reg:SI 19))]
- "TARGET_SINGLE_STRINGOP || optimize_size"
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
"movsb"
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "QI")])
+(define_insn "strmovqi_rex_1"
+ [(set (mem:QI (match_operand:DI 2 "register_operand" "0"))
+ (mem:QI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 1)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 1)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "both")
+ (set_attr "mode" "QI")])
+
+(define_insn "rep_movdi_rex64"
+ [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
+ (const_int 3))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (ashift:DI (match_dup 5) (const_int 3))
+ (match_operand:DI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))
+ (use (reg:SI 19))]
+ "TARGET_64BIT"
+ "rep\;movsq|rep movsq"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "DI")])
+
(define_insn "rep_movsi"
[(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
(set (match_operand:SI 0 "register_operand" "=D")
(mem:BLK (match_dup 4)))
(use (match_dup 5))
(use (reg:SI 19))]
- ""
+ "!TARGET_64BIT"
+ "rep\;movsl|rep movsd"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "SI")])
+
+(define_insn "rep_movsi_rex64"
+ [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
+ (const_int 2))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (ashift:DI (match_dup 5) (const_int 2))
+ (match_operand:DI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))
+ (use (reg:SI 19))]
+ "TARGET_64BIT"
"rep\;movsl|rep movsd"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(mem:BLK (match_dup 4)))
(use (match_dup 5))
(use (reg:SI 19))]
- ""
+ "!TARGET_64BIT"
+ "rep\;movsb|rep movsb"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "SI")])
+
+(define_insn "rep_movqi_rex64"
+ [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_operand:DI 3 "register_operand" "0")
+ (match_operand:DI 5 "register_operand" "2")))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5)))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))
+ (use (reg:SI 19))]
+ "TARGET_64BIT"
"rep\;movsb|rep movsb"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(define_expand "clrstrsi"
[(use (match_operand:BLK 0 "memory_operand" ""))
(use (match_operand:SI 1 "nonmemory_operand" ""))
- (use (match_operand:SI 2 "const_int_operand" ""))]
+ (use (match_operand 2 "const_int_operand" ""))]
""
"
{
- /* See comments in movstr expanders. The code is mostly identical. */
-
- rtx destreg, zeroreg, countreg;
- int align = 0;
- int count = -1;
- rtx insns;
-
- start_sequence ();
-
- if (GET_CODE (operands[2]) == CONST_INT)
- align = INTVAL (operands[2]);
-
- /* This simple hack avoids all inlining code and simplifies code bellow. */
- if (!TARGET_ALIGN_STRINGOPS)
- align = 32;
-
- if (GET_CODE (operands[1]) == CONST_INT)
- count = INTVAL (operands[1]);
-
- destreg = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+ if (ix86_expand_clrstr (operands[0], operands[1], operands[2]))
+ DONE;
+ else
+ FAIL;
+}")
- emit_insn (gen_cld ());
+(define_expand "clrstrdi"
+ [(use (match_operand:BLK 0 "memory_operand" ""))
+ (use (match_operand:DI 1 "nonmemory_operand" ""))
+ (use (match_operand 2 "const_int_operand" ""))]
+ "TARGET_64BIT"
+ "
+{
+ if (ix86_expand_clrstr (operands[0], operands[1], operands[2]))
+ DONE;
+ else
+ FAIL;
+}")
- /* When optimizing for size emit simple rep ; movsb instruction for
- counts not divisible by 4. */
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
- if ((!optimize || optimize_size)
- && (count < 0 || (count & 0x03)))
- {
- countreg = copy_to_mode_reg (SImode, operands[1]);
- zeroreg = copy_to_mode_reg (QImode, const0_rtx);
- emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
- destreg, countreg));
- }
- else if (count >= 0
- && (align >= 8
- || (!TARGET_PENTIUMPRO && align >= 4)
- || optimize_size || count < 64))
- {
- zeroreg = copy_to_mode_reg (SImode, const0_rtx);
- if (INTVAL (operands[1]) & ~0x03)
- {
- countreg = copy_to_mode_reg (SImode,
- GEN_INT ((INTVAL (operands[1]) >> 2)
- & 0x3fffffff));
- emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
- destreg, countreg));
- }
- if (INTVAL (operands[1]) & 0x02)
- emit_insn (gen_strsethi (destreg,
- gen_rtx_SUBREG (HImode, zeroreg, 0)));
- if (INTVAL (operands[1]) & 0x01)
- emit_insn (gen_strsetqi (destreg,
- gen_rtx_SUBREG (QImode, zeroreg, 0)));
- }
- else
+(define_expand "strsetdi_rex64"
+ [(set (mem:DI (match_operand:DI 0 "register_operand" ""))
+ (match_operand:DI 1 "register_operand" ""))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+{
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
{
- rtx countreg2;
- rtx label = NULL;
-
- /* In case we don't know anything about the alignment, default to
- library version, since it is usually equally fast and result in
- shorter code. */
- if (!TARGET_INLINE_ALL_STRINGOPS && align < 4)
- {
- end_sequence ();
- FAIL;
- }
-
- if (TARGET_SINGLE_STRINGOP)
- emit_insn (gen_cld ());
-
- countreg2 = gen_reg_rtx (SImode);
- countreg = copy_to_mode_reg (SImode, operands[1]);
- zeroreg = copy_to_mode_reg (SImode, const0_rtx);
-
- if (count < 0
- && align < (TARGET_PENTIUMPRO && (count < 0 || count >= 260) ? 8 : 4))
- {
- label = gen_label_rtx ();
- emit_cmp_and_jump_insns (countreg, GEN_INT (3),
- LEU, 0, SImode, 1, 0, label);
- }
- if (align <= 1)
- {
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (SImode);
- emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (1)));
- emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
- SImode, 1, 0, label);
- emit_insn (gen_strsetqi (destreg,
- gen_rtx_SUBREG (QImode, zeroreg, 0)));
- emit_insn (gen_addsi3 (countreg, countreg, constm1_rtx));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align <= 2)
- {
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (SImode);
- emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (2)));
- emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
- SImode, 1, 0, label);
- emit_insn (gen_strsethi (destreg,
- gen_rtx_SUBREG (HImode, zeroreg, 0)));
- emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-2)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align <= 4 && TARGET_PENTIUMPRO && (count < 1 || count >= 260))
- {
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (SImode);
- emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (4)));
- emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
- SImode, 1, 0, label);
- emit_insn (gen_strsetsi (destreg, zeroreg));
- emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-4)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
-
- if (!TARGET_SINGLE_STRINGOP)
- emit_insn (gen_cld());
- emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
- emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
- destreg, countreg2));
-
- if (label)
- {
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align > 2 && count > 0 && (count & 2))
- emit_insn (gen_strsethi (destreg,
- gen_rtx_SUBREG (HImode, zeroreg, 0)));
- if (align <= 2 || count < 0)
- {
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (SImode);
- emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (2)));
- emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
- SImode, 1, 0, label);
- emit_insn (gen_strsethi (destreg,
- gen_rtx_SUBREG (HImode, zeroreg, 0)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align > 1 && count > 0 && (count & 1))
- emit_insn (gen_strsetqi (destreg,
- gen_rtx_SUBREG (QImode, zeroreg, 0)));
- if (align <= 1 || count < 0)
- {
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (SImode);
- emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (1)));
- emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
- SImode, 1, 0, label);
- emit_insn (gen_strsetqi (destreg,
- gen_rtx_SUBREG (QImode, zeroreg, 0)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
+ emit_insn (gen_strsetdi_rex_1 (operands[0], operands[0], operands[1]));
+ DONE;
}
-
- insns = get_insns ();
- end_sequence ();
-
- ix86_set_move_mem_attrs (insns, operands[0], operands[0], destreg, destreg);
- emit_insns (insns);
-
- DONE;
}")
-;; Most CPUs don't like single string operations
-;; Handle this case here to simplify previous expander.
-
(define_expand "strsetsi"
[(set (mem:SI (match_operand:SI 0 "register_operand" ""))
(match_operand:SI 1 "register_operand" ""))
""
"
{
- if (TARGET_SINGLE_STRINGOP || optimize_size)
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_strsetsi_rex64 (operands[0], operands[1]));
+ DONE;
+ }
+ else if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strsetsi_1 (operands[0], operands[0], operands[1]));
DONE;
}
}")
+(define_expand "strsetsi_rex64"
+ [(set (mem:SI (match_operand:DI 0 "register_operand" ""))
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+{
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strsetsi_rex_1 (operands[0], operands[0], operands[1]));
+ DONE;
+ }
+}")
+
(define_expand "strsethi"
[(set (mem:HI (match_operand:SI 0 "register_operand" ""))
(match_operand:HI 1 "register_operand" ""))
""
"
{
- if (TARGET_SINGLE_STRINGOP || optimize_size)
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_strsethi_rex64 (operands[0], operands[1]));
+ DONE;
+ }
+ else if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strsethi_1 (operands[0], operands[0], operands[1]));
DONE;
}
}")
+(define_expand "strsethi_rex64"
+ [(set (mem:HI (match_operand:DI 0 "register_operand" ""))
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+{
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strsethi_rex_1 (operands[0], operands[0], operands[1]));
+ DONE;
+ }
+}")
+
(define_expand "strsetqi"
[(set (mem:QI (match_operand:SI 0 "register_operand" ""))
(match_operand:QI 1 "register_operand" ""))
""
"
{
- if (TARGET_SINGLE_STRINGOP || optimize_size)
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_strsetqi_rex64 (operands[0], operands[1]));
+ DONE;
+ }
+ else if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strsetqi_1 (operands[0], operands[0], operands[1]));
DONE;
}
}")
+(define_expand "strsetqi_rex64"
+ [(set (mem:QI (match_operand:DI 0 "register_operand" ""))
+ (match_operand:QI 1 "register_operand" ""))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+{
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strsetqi_rex_1 (operands[0], operands[0], operands[1]));
+ DONE;
+ }
+}")
+
+(define_insn "strsetdi_rex_1"
+ [(set (mem:SI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:SI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 8)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosq"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
(define_insn "strsetsi_1"
[(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
(match_operand:SI 2 "register_operand" "a"))
(plus:SI (match_dup 1)
(const_int 4)))
(use (reg:SI 19))]
- "TARGET_SINGLE_STRINGOP || optimize_size"
- "stosl"
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosl|stosd"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "strsetsi_rex_1"
+ [(set (mem:SI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:SI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 4)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosl|stosd"
[(set_attr "type" "str")
(set_attr "memory" "store")
(set_attr "mode" "SI")])
(plus:SI (match_dup 1)
(const_int 2)))
(use (reg:SI 19))]
- "TARGET_SINGLE_STRINGOP || optimize_size"
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosw"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "HI")])
+
+(define_insn "strsethi_rex_1"
+ [(set (mem:HI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:HI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 2)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
"stosw"
[(set_attr "type" "str")
(set_attr "memory" "store")
(plus:SI (match_dup 1)
(const_int 1)))
(use (reg:SI 19))]
- "TARGET_SINGLE_STRINGOP || optimize_size"
+ "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
"stosb"
[(set_attr "type" "str")
(set_attr "memory" "store")
(set_attr "mode" "QI")])
+(define_insn "strsetqi_rex_1"
+ [(set (mem:QI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:QI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 1)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_insn "rep_stosdi_rex64"
+ [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
+ (const_int 3))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:DI 2 "register_operand" "a"))
+ (use (match_dup 4))
+ (use (reg:SI 19))]
+ "TARGET_64BIT"
+ "rep\;stosq|rep stosq"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
(define_insn "rep_stossi"
[(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
(set (match_operand:SI 0 "register_operand" "=D")
(use (match_operand:SI 2 "register_operand" "a"))
(use (match_dup 4))
(use (reg:SI 19))]
- ""
+ "!TARGET_64BIT"
+ "rep\;stosl|rep stosd"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "rep_stossi_rex64"
+ [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
+ (const_int 2))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:SI 2 "register_operand" "a"))
+ (use (match_dup 4))
+ (use (reg:SI 19))]
+ "TARGET_64BIT"
"rep\;stosl|rep stosd"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(use (match_operand:QI 2 "register_operand" "a"))
(use (match_dup 4))
(use (reg:SI 19))]
- ""
+ "!TARGET_64BIT"
+ "rep\;stosb|rep stosb"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_insn "rep_stosqi_rex64"
+ [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_operand:DI 3 "register_operand" "0")
+ (match_operand:DI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:QI 2 "register_operand" "a"))
+ (use (match_dup 4))
+ (use (reg:DI 19))]
+ "TARGET_64BIT"
"rep\;stosb|rep stosb"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
[(set (match_operand:SI 0 "register_operand" "")
(compare:SI (match_operand:BLK 1 "general_operand" "")
(match_operand:BLK 2 "general_operand" "")))
- (use (match_operand:SI 3 "general_operand" ""))
- (use (match_operand:SI 4 "immediate_operand" ""))]
+ (use (match_operand 3 "general_operand" ""))
+ (use (match_operand 4 "immediate_operand" ""))]
""
"
{
addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0));
count = operands[3];
- countreg = copy_to_mode_reg (SImode, count);
+ countreg = copy_to_mode_reg (Pmode, count);
/* %%% Iff we are testing strict equality, we can use known alignment
to good advantage. This may be possible with combine, particularly
emit_move_insn (operands[0], const0_rtx);
DONE;
}
- emit_insn (gen_cmpstrsi_nz_1 (addr1, addr2, countreg, align,
- addr1, addr2, countreg));
+ if (TARGET_64BIT)
+ emit_insn (gen_cmpstrqi_nz_rex_1 (addr1, addr2, countreg, align,
+ addr1, addr2, countreg));
+ else
+ emit_insn (gen_cmpstrqi_nz_1 (addr1, addr2, countreg, align,
+ addr1, addr2, countreg));
}
else
{
- emit_insn (gen_cmpsi_1 (countreg, countreg));
- emit_insn (gen_cmpstrsi_1 (addr1, addr2, countreg, align,
- addr1, addr2, countreg));
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_cmpdi_1_rex64 (countreg, countreg));
+ emit_insn (gen_cmpstrqi_rex_1 (addr1, addr2, countreg, align,
+ addr1, addr2, countreg));
+ }
+ else
+ {
+ emit_insn (gen_cmpsi_1 (countreg, countreg));
+ emit_insn (gen_cmpstrqi_1 (addr1, addr2, countreg, align,
+ addr1, addr2, countreg));
+ }
}
outlow = gen_lowpart (QImode, out);
;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is
;; zero. Emit extra code to make sure that a zero-length compare is EQ.
-(define_insn "cmpstrsi_nz_1"
+(define_insn "cmpstrqi_nz_1"
[(set (reg:CC 17)
(compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
(mem:BLK (match_operand:SI 5 "register_operand" "1"))))
(clobber (match_operand:SI 0 "register_operand" "=S"))
(clobber (match_operand:SI 1 "register_operand" "=D"))
(clobber (match_operand:SI 2 "register_operand" "=c"))]
- ""
+ "!TARGET_64BIT"
+ "repz{\;| }cmpsb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_insn "cmpstrqi_nz_rex_1"
+ [(set (reg:CC 17)
+ (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
+ (mem:BLK (match_operand:DI 5 "register_operand" "1"))))
+ (use (match_operand:DI 6 "register_operand" "2"))
+ (use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:SI 19))
+ (clobber (match_operand:DI 0 "register_operand" "=S"))
+ (clobber (match_operand:DI 1 "register_operand" "=D"))
+ (clobber (match_operand:DI 2 "register_operand" "=c"))]
+ "TARGET_64BIT"
"repz{\;| }cmpsb"
[(set_attr "type" "str")
(set_attr "mode" "QI")
;; The same, but the count is not known to not be zero.
-(define_insn "cmpstrsi_1"
+(define_insn "cmpstrqi_1"
[(set (reg:CC 17)
(if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2")
(const_int 0))
(clobber (match_operand:SI 0 "register_operand" "=S"))
(clobber (match_operand:SI 1 "register_operand" "=D"))
(clobber (match_operand:SI 2 "register_operand" "=c"))]
- ""
+ "!TARGET_64BIT"
+ "repz{\;| }cmpsb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_insn "cmpstrqi_rex_1"
+ [(set (reg:CC 17)
+ (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2")
+ (const_int 0))
+ (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
+ (mem:BLK (match_operand:DI 5 "register_operand" "1")))
+ (const_int 0)))
+ (use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:CC 17))
+ (use (reg:SI 19))
+ (clobber (match_operand:DI 0 "register_operand" "=S"))
+ (clobber (match_operand:DI 1 "register_operand" "=D"))
+ (clobber (match_operand:DI 2 "register_operand" "=c"))]
+ "TARGET_64BIT"
"repz{\;| }cmpsb"
[(set_attr "type" "str")
(set_attr "mode" "QI")
[(set (match_operand:SI 0 "register_operand" "")
(unspec:SI [(match_operand:BLK 1 "general_operand" "")
(match_operand:QI 2 "immediate_operand" "")
- (match_operand:SI 3 "immediate_operand" "")] 0))]
+ (match_operand 3 "immediate_operand" "")] 0))]
""
"
{
- rtx out, addr, scratch1, scratch2, scratch3;
- rtx eoschar = operands[2];
- rtx align = operands[3];
-
- /* The generic case of strlen expander is long. Avoid it's
- expanding unless TARGET_INLINE_ALL_STRINGOPS. */
-
- if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
- && !TARGET_INLINE_ALL_STRINGOPS
- && !optimize_size
- && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
- FAIL;
-
- out = operands[0];
- addr = force_reg (Pmode, XEXP (operands[1], 0));
- scratch1 = gen_reg_rtx (SImode);
-
- if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
- && !optimize_size)
- {
- /* Well it seems that some optimizer does not combine a call like
- foo(strlen(bar), strlen(bar));
- when the move and the subtraction is done here. It does calculate
- the length just once when these instructions are done inside of
- output_strlen_unroll(). But I think since &bar[strlen(bar)] is
- often used and I use one fewer register for the lifetime of
- output_strlen_unroll() this is better. */
-
- if (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)
- emit_move_insn (scratch1, addr);
-
- emit_move_insn (out, addr);
-
- ix86_expand_strlensi_unroll_1 (out, align, scratch1);
-
- /* strlensi_unroll_1 returns the address of the zero at the end of
- the string, like memchr(), so compute the length by subtracting
- the start address. */
- emit_insn (gen_subsi3 (out, out, addr));
- }
- else
- {
- scratch2 = gen_reg_rtx (SImode);
- scratch3 = gen_reg_rtx (SImode);
-
- emit_move_insn (scratch3, addr);
+ if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+}")
- emit_insn (gen_cld ());
- emit_insn (gen_strlensi_1 (scratch1, scratch3, eoschar,
- align, constm1_rtx, scratch3));
- emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
- emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
- }
- DONE;
+(define_expand "strlendi"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_operand:BLK 1 "general_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")
+ (match_operand 3 "immediate_operand" "")] 0))]
+ ""
+ "
+{
+ if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
}")
-(define_insn "strlensi_1"
+(define_insn "strlenqi_1"
[(set (match_operand:SI 0 "register_operand" "=&c")
(unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1"))
- (match_operand:QI 2 "general_operand" "a")
+ (match_operand:QI 2 "register_operand" "a")
(match_operand:SI 3 "immediate_operand" "i")
- (match_operand:SI 4 "immediate_operand" "0")] 0))
+ (match_operand:SI 4 "register_operand" "0")] 0))
(use (reg:SI 19))
(clobber (match_operand:SI 1 "register_operand" "=D"))
(clobber (reg:CC 17))]
- ""
+ "!TARGET_64BIT"
+ "repnz{\;| }scasb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_insn "strlenqi_rex_1"
+ [(set (match_operand:DI 0 "register_operand" "=&c")
+ (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1"))
+ (match_operand:QI 2 "register_operand" "a")
+ (match_operand:DI 3 "immediate_operand" "i")
+ (match_operand:DI 4 "register_operand" "0")] 0))
+ (use (reg:SI 19))
+ (clobber (match_operand:DI 1 "register_operand" "=D"))
+ (clobber (reg:CC 17))]
+ "TARGET_64BIT"
"repnz{\;| }scasb"
[(set_attr "type" "str")
(set_attr "mode" "QI")