WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
the largest allowable mode. */
static machine_mode
-select_block_compare_mode (HOST_WIDE_INT offset, HOST_WIDE_INT bytes,
- HOST_WIDE_INT align, bool word_mode_ok)
+select_block_compare_mode (unsigned HOST_WIDE_INT offset,
+ unsigned HOST_WIDE_INT bytes,
+ unsigned HOST_WIDE_INT align, bool word_mode_ok)
{
/* First see if we can do a whole load unit
as that will be more efficient than a larger load + shift. */
Do largest chunk possible without violating alignment rules. */
/* The most we can read without potential page crossing. */
- HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
+ unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
if (word_mode_ok && bytes >= UNITS_PER_WORD)
return word_mode;
/* Compute the alignment of pointer+OFFSET where the original alignment
of pointer was BASE_ALIGN. */
-static HOST_WIDE_INT
-compute_current_alignment (HOST_WIDE_INT base_align, HOST_WIDE_INT offset)
+static unsigned HOST_WIDE_INT
+compute_current_alignment (unsigned HOST_WIDE_INT base_align,
+ unsigned HOST_WIDE_INT offset)
{
if (offset == 0)
return base_align;
if (!CONST_INT_P (align_rtx))
return false;
- int base_align = INTVAL (align_rtx) / BITS_PER_UNIT;
+ unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
/* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff */
if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
gcc_assert (GET_MODE (target) == SImode);
/* Anything to move? */
- HOST_WIDE_INT bytes = INTVAL (bytes_rtx);
- if (bytes <= 0)
+ unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
+ if (bytes == 0)
return true;
/* The code generated for p7 and older is not faster than glibc
/* Strategy phase. How many ops will this take and should we expand it? */
- int offset = 0;
+ unsigned HOST_WIDE_INT offset = 0;
machine_mode load_mode =
select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
- int load_mode_size = GET_MODE_SIZE (load_mode);
+ unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
/* We don't want to generate too much code. */
if (ROUND_UP (bytes, load_mode_size) / load_mode_size
- > rs6000_block_compare_inline_limit)
+ > (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit)
return false;
bool generate_6432_conversion = false;
while (bytes > 0)
{
- int align = compute_current_alignment (base_align, offset);
+ unsigned int align = compute_current_alignment (base_align, offset);
if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
load_mode = select_block_compare_mode (offset, bytes, align,
word_mode_ok);
{
/* Move this load back so it doesn't go past the end.
P8/P9 can do this efficiently. */
- int extra_bytes = load_mode_size - bytes;
+ unsigned int extra_bytes = load_mode_size - bytes;
cmp_bytes = bytes;
if (extra_bytes < offset)
{
so this forces a non-overlapping load and a shift to get
rid of the extra bytes. */
cmp_bytes = bytes;
-
+
src1 = adjust_address (orig_src1, load_mode, offset);
src2 = adjust_address (orig_src2, load_mode, offset);
OPERANDS[0] is the target (result).
OPERANDS[1] is the first source.
OPERANDS[2] is the second source.
+ If NO_LENGTH is zero, then:
OPERANDS[3] is the length.
- OPERANDS[4] is the alignment in bytes. */
+ OPERANDS[4] is the alignment in bytes.
+ If NO_LENGTH is nonzero, then:
+ OPERANDS[3] is the alignment in bytes. */
bool
-expand_strn_compare (rtx operands[])
+expand_strn_compare (rtx operands[], int no_length)
{
rtx target = operands[0];
rtx orig_src1 = operands[1];
rtx orig_src2 = operands[2];
- rtx bytes_rtx = operands[3];
- rtx align_rtx = operands[4];
- HOST_WIDE_INT cmp_bytes = 0;
+ rtx bytes_rtx, align_rtx;
+ if (no_length)
+ {
+ bytes_rtx = NULL;
+ align_rtx = operands[3];
+ }
+ else
+ {
+ bytes_rtx = operands[3];
+ align_rtx = operands[4];
+ }
+ unsigned HOST_WIDE_INT cmp_bytes = 0;
rtx src1 = orig_src1;
rtx src2 = orig_src2;
- /* If this is not a fixed size compare, just call strncmp. */
- if (!CONST_INT_P (bytes_rtx))
+ /* If we have a length, it must be constant. This simplifies things
+ a bit as we don't have to generate code to check if we've exceeded
+ the length. Later this could be expanded to handle this case. */
+ if (!no_length && !CONST_INT_P (bytes_rtx))
return false;
/* This must be a fixed size alignment. */
if (!CONST_INT_P (align_rtx))
return false;
- int base_align = INTVAL (align_rtx);
+ unsigned int base_align = UINTVAL (align_rtx);
int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
gcc_assert (GET_MODE (target) == SImode);
- HOST_WIDE_INT bytes = INTVAL (bytes_rtx);
-
/* If we have an LE target without ldbrx and word_mode is DImode,
then we must avoid using word_mode. */
int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
&& word_mode == DImode);
- int word_mode_size = GET_MODE_SIZE (word_mode);
+ unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
+
+ unsigned HOST_WIDE_INT offset = 0;
+ unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */
+ unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */
+ if (no_length)
+ /* Use this as a standin to determine the mode to use. */
+ bytes = rs6000_string_compare_inline_limit * word_mode_size;
+ else
+ bytes = UINTVAL (bytes_rtx);
- int offset = 0;
machine_mode load_mode =
select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
- int load_mode_size = GET_MODE_SIZE (load_mode);
+ unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
+ compare_length = rs6000_string_compare_inline_limit * load_mode_size;
- /* We don't want to generate too much code. Also if bytes is
- 4096 or larger we always want the library strncmp anyway. */
- int groups = ROUND_UP (bytes, load_mode_size) / load_mode_size;
- if (bytes >= 4096 || groups > rs6000_string_compare_inline_limit)
- return false;
+ /* If we have equality at the end of the last compare and we have not
+ found the end of the string, we need to call strcmp/strncmp to
+ compare the remainder. */
+ bool equality_compare_rest = false;
+
+ if (no_length)
+ {
+ bytes = compare_length;
+ equality_compare_rest = true;
+ }
+ else
+ {
+ if (bytes <= compare_length)
+ compare_length = bytes;
+ else
+ equality_compare_rest = true;
+ }
rtx result_reg = gen_reg_rtx (word_mode);
rtx final_move_label = gen_label_rtx ();
bgt cr7,L(pagecross) */
if (align1 < 8)
- expand_strncmp_align_check (strncmp_label, src1, bytes);
+ expand_strncmp_align_check (strncmp_label, src1, compare_length);
if (align2 < 8)
- expand_strncmp_align_check (strncmp_label, src2, bytes);
+ expand_strncmp_align_check (strncmp_label, src2, compare_length);
+
+ /* After the runtime alignment checks, we can use any alignment we
+ like as we know there is no 4k boundary crossing. */
+ base_align = 8;
/* Now generate the following sequence:
- branch to begin_compare
rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label);
jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref));
- JUMP_LABEL(jmp) = begin_compare_label;
+ JUMP_LABEL (jmp) = begin_compare_label;
LABEL_NUSES (begin_compare_label) += 1;
emit_barrier ();
src2 = replace_equiv_address (src2, src2_reg);
}
- /* -m32 -mpowerpc64 results in word_mode being DImode even
- though otherwise it is 32-bit. The length arg to strncmp
- is a size_t which will be the same size as pointers. */
- rtx len_rtx;
- if (TARGET_64BIT)
- len_rtx = gen_reg_rtx(DImode);
+ if (no_length)
+ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "strcmp"),
+ target, LCT_NORMAL, GET_MODE (target), 2,
+ force_reg (Pmode, XEXP (src1, 0)), Pmode,
+ force_reg (Pmode, XEXP (src2, 0)), Pmode);
else
- len_rtx = gen_reg_rtx(SImode);
+ {
+ /* -m32 -mpowerpc64 results in word_mode being DImode even
+ though otherwise it is 32-bit. The length arg to strncmp
+ is a size_t which will be the same size as pointers. */
+ rtx len_rtx;
+ if (TARGET_64BIT)
+ len_rtx = gen_reg_rtx (DImode);
+ else
+ len_rtx = gen_reg_rtx (SImode);
- emit_move_insn (len_rtx, bytes_rtx);
+ emit_move_insn (len_rtx, bytes_rtx);
- emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "strncmp"),
- target, LCT_NORMAL, GET_MODE (target), 3,
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
- len_rtx, GET_MODE (len_rtx));
+ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "strncmp"),
+ target, LCT_NORMAL, GET_MODE (target), 3,
+ force_reg (Pmode, XEXP (src1, 0)), Pmode,
+ force_reg (Pmode, XEXP (src2, 0)), Pmode,
+ len_rtx, GET_MODE (len_rtx));
+ }
rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
/* Generate sequence of ld/ldbrx, cmpb to compare out
to the length specified. */
- while (bytes > 0)
+ unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
+ while (bytes_to_compare > 0)
{
/* Compare sequence:
check each 8B with: ld/ld cmpd bne
+ If equal, use rldicr/cmpb to check for zero byte.
cleanup code at end:
cmpb get byte that differs
cmpb look for zero byte
The last compare can branch around the cleanup code if the
result is zero because the strings are exactly equal. */
- int align = compute_current_alignment (base_align, offset);
+ unsigned int align = compute_current_alignment (base_align, offset);
if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
- load_mode = select_block_compare_mode (offset, bytes, align,
+ load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
word_mode_ok);
else
- load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
+ load_mode = select_block_compare_mode (0, bytes_to_compare, align,
+ word_mode_ok);
load_mode_size = GET_MODE_SIZE (load_mode);
- if (bytes >= load_mode_size)
+ if (bytes_to_compare >= load_mode_size)
cmp_bytes = load_mode_size;
else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
{
/* Move this load back so it doesn't go past the end.
P8/P9 can do this efficiently. */
- int extra_bytes = load_mode_size - bytes;
- cmp_bytes = bytes;
+ unsigned int extra_bytes = load_mode_size - bytes_to_compare;
+ cmp_bytes = bytes_to_compare;
if (extra_bytes < offset)
{
offset -= extra_bytes;
cmp_bytes = load_mode_size;
- bytes = cmp_bytes;
+ bytes_to_compare = cmp_bytes;
}
}
else
/* P7 and earlier can't do the overlapping load trick fast,
so this forces a non-overlapping load and a shift to get
rid of the extra bytes. */
- cmp_bytes = bytes;
+ cmp_bytes = bytes_to_compare;
src1 = adjust_address (orig_src1, load_mode, offset);
src2 = adjust_address (orig_src2, load_mode, offset);
}
}
- int remain = bytes - cmp_bytes;
+ /* Cases to handle. A and B are chunks of the two strings.
+ 1: Not end of comparison:
+ A != B: branch to cleanup code to compute result.
+ A == B: check for 0 byte, next block if not found.
+ 2: End of the inline comparison:
+ A != B: branch to cleanup code to compute result.
+ A == B: check for 0 byte, call strcmp/strncmp
+ 3: compared requested N bytes:
+ A == B: branch to result 0.
+ A != B: cleanup code to compute result. */
+
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
rtx dst_label;
- if (remain > 0)
+ if (remain > 0 || equality_compare_rest)
{
+ /* Branch to cleanup code, otherwise fall through to do
+ more compares. */
if (!cleanup_label)
cleanup_label = gen_label_rtx ();
dst_label = cleanup_label;
}
else
+ /* Branch to end and produce result of 0. */
dst_label = final_move_label;
rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
rtx cond = gen_reg_rtx (CCmode);
- if (remain == 0)
- {
- /* For the last chunk, subf. also
- generates the zero result we need. */
- rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
- rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
- }
- else
- emit_move_insn (cond, gen_rtx_COMPARE (CCmode,
- tmp_reg_src1, tmp_reg_src2));
+ /* Always produce the 0 result, it is needed if
+ cmpb finds a 0 byte in this chunk. */
+ rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
+ rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
rtx cmp_rtx;
- if (remain > 0)
- cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
- else
+ if (remain == 0 && !equality_compare_rest)
cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
+ else
+ cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
lab_ref, pc_rtx);
JUMP_LABEL (j) = dst_label;
LABEL_NUSES (dst_label) += 1;
+ if (remain > 0 || equality_compare_rest)
+ {
+ /* Generate a cmpb to test for a 0 byte and branch
+ to final result if found. */
+ rtx cmpb_zero = gen_reg_rtx (word_mode);
+ rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
+ rtx condz = gen_reg_rtx (CCmode);
+ rtx zero_reg = gen_reg_rtx (word_mode);
+ if (word_mode == SImode)
+ {
+ emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
+ emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
+ if (cmp_bytes < word_mode_size)
+ {
+ /* Don't want to look at zero bytes past end. */
+ HOST_WIDE_INT mb =
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
+ emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
+ }
+ }
+ else
+ {
+ emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
+ emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
+ if (cmp_bytes < word_mode_size)
+ {
+ /* Don't want to look at zero bytes past end. */
+ HOST_WIDE_INT mb =
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
+ emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
+ }
+ }
+
+ emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
+ rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
+ lab_ref_fin, pc_rtx);
+ rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j2) = final_move_label;
+ LABEL_NUSES (final_move_label) += 1;
+
+ }
+
offset += cmp_bytes;
- bytes -= cmp_bytes;
+ bytes_to_compare -= cmp_bytes;
+ }
+
+ if (equality_compare_rest)
+ {
+ /* Update pointers past what has been compared already. */
+ src1 = adjust_address (orig_src1, load_mode, offset);
+ src2 = adjust_address (orig_src2, load_mode, offset);
+
+ if (!REG_P (XEXP (src1, 0)))
+ {
+ rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
+ src1 = replace_equiv_address (src1, src1_reg);
+ }
+ set_mem_size (src1, cmp_bytes);
+
+ if (!REG_P (XEXP (src2, 0)))
+ {
+ rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
+ src2 = replace_equiv_address (src2, src2_reg);
+ }
+ set_mem_size (src2, cmp_bytes);
+
+ /* Construct call to strcmp/strncmp to compare the rest of the string. */
+ if (no_length)
+ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "strcmp"),
+ target, LCT_NORMAL, GET_MODE (target), 2,
+ force_reg (Pmode, XEXP (src1, 0)), Pmode,
+ force_reg (Pmode, XEXP (src2, 0)), Pmode);
+ else
+ {
+ rtx len_rtx;
+ if (TARGET_64BIT)
+ len_rtx = gen_reg_rtx (DImode);
+ else
+ len_rtx = gen_reg_rtx (SImode);
+
+ emit_move_insn (len_rtx, GEN_INT (bytes - compare_length));
+ emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "strncmp"),
+ target, LCT_NORMAL, GET_MODE (target), 3,
+ force_reg (Pmode, XEXP (src1, 0)), Pmode,
+ force_reg (Pmode, XEXP (src2, 0)), Pmode,
+ len_rtx, GET_MODE (len_rtx));
+ }
+
+ rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
+ rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
+ JUMP_LABEL (jmp) = final_label;
+ LABEL_NUSES (final_label) += 1;
+ emit_barrier ();
}
if (cleanup_label)
rtx rot_amt = gen_reg_rtx (word_mode);
rtx zero_reg = gen_reg_rtx (word_mode);
- rtx rot1_1 = gen_reg_rtx(word_mode);
- rtx rot1_2 = gen_reg_rtx(word_mode);
- rtx rot2_1 = gen_reg_rtx(word_mode);
- rtx rot2_2 = gen_reg_rtx(word_mode);
+ rtx rot1_1 = gen_reg_rtx (word_mode);
+ rtx rot1_2 = gen_reg_rtx (word_mode);
+ rtx rot2_1 = gen_reg_rtx (word_mode);
+ rtx rot2_2 = gen_reg_rtx (word_mode);
if (word_mode == SImode)
{
emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
- emit_insn (gen_movsi (zero_reg, GEN_INT(0)));
+ emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT(0xff)));
+ emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT(0xff)));
+ emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
}
else
{
emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
- emit_insn (gen_movdi (zero_reg, GEN_INT(0)));
+ emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT(0xff)));
+ emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT(0xff)));
+ emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
}