We used to expand memcmp to "repz cmpsb" via cmpstrnsi. It was changed
by
commit
9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f
Author: Nick Clifton <nickc@redhat.com>
Date: Fri Aug 12 16:26:11 2011 +0000
builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
* builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
pattern.
* doc/md.texi (cmpstrn): Note that the comparison stops if both
fetched bytes are zero.
(cmpstr): Likewise.
(cmpmem): Note that the comparison does not stop if both of the
fetched bytes are zero.
Duplicate the cmpstrn pattern for cmpmem. The only difference is that
the length argument of cmpmem is guaranteed to be less than or equal to
lengths of 2 memory areas. Since "repz cmpsb" can be much slower than
memcmp function implemented with vector instruction, see
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
expand cmpmem to "repz cmpsb" only for -minline-all-stringops.
gcc/
PR target/95151
* config/i386/i386-expand.c (ix86_expand_cmpstrn_or_cmpmem): New
function.
* config/i386/i386-protos.h (ix86_expand_cmpstrn_or_cmpmem): New
prototype.
* config/i386/i386.md (cmpmemsi): New pattern.
gcc/testsuite/
PR target/95151
* gcc.target/i386/pr95151-1.c: New test.
* gcc.target/i386/pr95151-2.c: Likewise.
* gcc.target/i386/pr95151-3.c: Likewise.
* gcc.target/i386/pr95151-4.c: Likewise.
return true;
}
+/* Expand cmpstrn or memcmp. */
+
+bool
+ix86_expand_cmpstrn_or_cmpmem (rtx result, rtx src1, rtx src2,
+ rtx length, rtx align, bool is_cmpstrn)
+{
+ if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
+ return false;
+
+ /* Can't use this if the user has appropriated ecx, esi or edi. */
+ if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
+ return false;
+
+ if (is_cmpstrn)
+ {
+ /* For strncmp, length is the maximum length, which can be larger
+ than actual string lengths. We can expand the cmpstrn pattern
+ to "repz cmpsb" only if one of the strings is a constant so
+ that expand_builtin_strncmp() can write the length argument to
+ be the minimum of the const string length and the actual length
+ argument. Otherwise, "repz cmpsb" may pass the 0 byte. */
+ tree t1 = MEM_EXPR (src1);
+ tree t2 = MEM_EXPR (src2);
+ if (!((t1 && TREE_CODE (t1) == MEM_REF
+ && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR
+ && (TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0))
+ == STRING_CST))
+ || (t2 && TREE_CODE (t2) == MEM_REF
+ && TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR
+ && (TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0))
+ == STRING_CST))))
+ return false;
+ }
+ else
+ {
+ /* Expand memcmp to "repz cmpsb" only for -minline-all-stringops
+ since "repz cmpsb" can be much slower than memcmp function
+ implemented with vector instructions, see
+
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
+ */
+ if (!TARGET_INLINE_ALL_STRINGOPS)
+ return false;
+ }
+
+ rtx addr1 = copy_addr_to_reg (XEXP (src1, 0));
+ rtx addr2 = copy_addr_to_reg (XEXP (src2, 0));
+ if (addr1 != XEXP (src1, 0))
+ src1 = replace_equiv_address_nv (src1, addr1);
+ if (addr2 != XEXP (src2, 0))
+ src2 = replace_equiv_address_nv (src2, addr2);
+
+ /* NB: Make a copy of the data length to avoid changing the original
+ data length by cmpstrnqi patterns. */
+ length = ix86_zero_extend_to_Pmode (length);
+ rtx lengthreg = gen_reg_rtx (Pmode);
+ emit_move_insn (lengthreg, length);
+
+ /* If we are testing strict equality, we can use known alignment to
+ good advantage. This may be possible with combine, particularly
+ once cc0 is dead. */
+ if (CONST_INT_P (length))
+ {
+ if (length == const0_rtx)
+ {
+ emit_move_insn (result, const0_rtx);
+ return true;
+ }
+ emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, lengthreg, align,
+ src1, src2));
+ }
+ else
+ {
+ emit_insn (gen_cmp_1 (Pmode, lengthreg, lengthreg));
+ emit_insn (gen_cmpstrnqi_1 (addr1, addr2, lengthreg, align,
+ src1, src2));
+ }
+
+ rtx out = gen_lowpart (QImode, result);
+ emit_insn (gen_cmpintqi (out));
+ emit_move_insn (result, gen_rtx_SIGN_EXTEND (SImode, out));
+
+ return true;
+}
/* Expand the appropriate insns for doing strlen if not just doing
repnz; scasb
extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx,
rtx, rtx, rtx, rtx, bool);
+extern bool ix86_expand_cmpstrn_or_cmpmem (rtx, rtx, rtx, rtx, rtx, bool);
extern bool constant_address_p (rtx);
extern bool legitimate_pic_operand_p (rtx);
(const_string "*")))
(set_attr "mode" "QI")])
+(define_expand "cmpmemsi"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (compare:SI (match_operand:BLK 1 "memory_operand" "")
+ (match_operand:BLK 2 "memory_operand" "") ) )
+ (use (match_operand 3 "general_operand"))
+ (use (match_operand 4 "immediate_operand"))]
+ ""
+{
+ if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4], false))
+ DONE;
+ else
+ FAIL;
+})
+
(define_expand "cmpstrnsi"
[(set (match_operand:SI 0 "register_operand")
(compare:SI (match_operand:BLK 1 "general_operand")
(use (match_operand 4 "immediate_operand"))]
""
{
- rtx addr1, addr2, countreg, align, out;
-
- if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
- FAIL;
-
- /* Can't use this if the user has appropriated ecx, esi or edi. */
- if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
- FAIL;
-
- /* One of the strings must be a constant. If so, expand_builtin_strncmp()
- will have rewritten the length arg to be the minimum of the const string
- length and the actual length arg. If both strings are the same and
- shorter than the length arg, repz cmpsb will not stop at the 0 byte and
- will incorrectly base the results on chars past the 0 byte. */
- tree t1 = MEM_EXPR (operands[1]);
- tree t2 = MEM_EXPR (operands[2]);
- if (!((t1 && TREE_CODE (t1) == MEM_REF
- && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR
- && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0)) == STRING_CST)
- || (t2 && TREE_CODE (t2) == MEM_REF
- && TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR
- && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0)) == STRING_CST)))
- FAIL;
-
- addr1 = copy_addr_to_reg (XEXP (operands[1], 0));
- addr2 = copy_addr_to_reg (XEXP (operands[2], 0));
- if (addr1 != XEXP (operands[1], 0))
- operands[1] = replace_equiv_address_nv (operands[1], addr1);
- if (addr2 != XEXP (operands[2], 0))
- operands[2] = replace_equiv_address_nv (operands[2], addr2);
-
- /* NB: Make a copy of the data length to avoid changing the original
- data length by cmpstrnqi patterns. */
- rtx count = ix86_zero_extend_to_Pmode (operands[3]);
- countreg = gen_reg_rtx (Pmode);
- emit_move_insn (countreg, count);
-
- /* %%% Iff we are testing strict equality, we can use known alignment
- to good advantage. This may be possible with combine, particularly
- once cc0 is dead. */
- align = operands[4];
-
- if (CONST_INT_P (operands[3]))
- {
- if (operands[3] == const0_rtx)
- {
- emit_move_insn (operands[0], const0_rtx);
- DONE;
- }
- emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
- operands[1], operands[2]));
- }
+ if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4], true))
+ DONE;
else
- {
- emit_insn (gen_cmp_1 (Pmode, countreg, countreg));
- emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
- operands[1], operands[2]));
- }
-
- out = gen_lowpart (QImode, operands[0]);
- emit_insn (gen_cmpintqi (out));
- emit_move_insn (operands[0], gen_rtx_SIGN_EXTEND (SImode, out));
-
- DONE;
+ FAIL;
})
;; Produce a tri-state integer (-1, 0, 1) from condition codes.
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -minline-all-stringops" } */
+
+struct foo
+{
+ char array[257];
+};
+
+extern struct foo x;
+
+int
+func (struct foo i)
+{
+ return __builtin_memcmp (&x, &i, sizeof (x)) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcmp" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -minline-all-stringops" } */
+
+int
+func (void *d, void *s, unsigned int l)
+{
+ return __builtin_memcmp (d, s, l) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcmp" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-inline-all-stringops" } */
+
+struct foo
+{
+ char array[257];
+};
+
+extern struct foo x;
+
+int
+func (struct foo i)
+{
+ return __builtin_memcmp (&x, &i, sizeof (x)) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler "call\[\\t \]*_?memcmp" } } */
+/* { dg-final { scan-assembler-not "cmpsb" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-inline-all-stringops" } */
+
+int
+func (void *d, void *s, unsigned int l)
+{
+ return __builtin_memcmp (d, s, l) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler "call\[\\t \]*_?memcmp" } } */
+/* { dg-final { scan-assembler-not "cmpsb" } } */