From b2272b138c1e7f6a1cb867d614951d516f88a9f1 Mon Sep 17 00:00:00 2001 From: Qing Zhao Date: Fri, 13 Jul 2018 14:10:45 +0000 Subject: [PATCH] 3nd Patch for PR78009 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78809 Inline strcmp with small constant strings The design doc for PR78809 is at: https://www.mail-archive.com/gcc@gcc.gnu.org/msg83822.html this patch is for the third part of change of PR78809. C. for strcmp (s1, s2), strncmp (s1, s2, n), and memcmp (s1, s2, n) if the result is NOT used to do simple equality test against zero, one of "s1" or "s2" is a small constant string, n is a constant, and the Min value of the length of the constant string and "n" is smaller than a predefined threshold T, inline the call by a byte-to-byte comparision sequence to avoid calling overhead. adding test case strcmpopt_5.c into gcc.dg for part C of PR78809. adding test case strcmpopt_6.c into gcc.dg to test the following case: When the specified length exceeds one of the arguments of the call to memcmp, the call to memcmp should NOT be inlined. From-SVN: r262636 --- gcc/ChangeLog | 14 +++ gcc/builtins.c | 193 ++++++++++++++++++++++++++--- gcc/doc/invoke.texi | 5 + gcc/params.def | 4 + gcc/testsuite/ChangeLog | 6 + gcc/testsuite/gcc.dg/strcmpopt_5.c | 80 ++++++++++++ gcc/testsuite/gcc.dg/strcmpopt_6.c | 36 ++++++ 7 files changed, 321 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/strcmpopt_5.c create mode 100644 gcc/testsuite/gcc.dg/strcmpopt_6.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 590efb981a3..6b776e1c7c5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2018-07-13 Qing Zhao + + PR middle-end/78809 + * builtins.c (expand_builtin_memcmp): Inline the calls first + when result_eq is false. + (expand_builtin_strcmp): Inline the calls first. + (expand_builtin_strncmp): Likewise. + (inline_string_cmp): New routine. Expand a string compare + call by using a sequence of char comparison. + (inline_expand_builtin_string_cmp): New routine. Inline expansion + a call to str(n)cmp/memcmp. + * doc/invoke.texi (--param builtin-string-cmp-inline-length): New option. + * params.def (BUILTIN_STRING_CMP_INLINE_LENGTH): New. + 2018-07-13 Richard Earnshaw * config/arm/driver-arm.c: Include arm-native.h. diff --git a/gcc/builtins.c b/gcc/builtins.c index 820d6c262b0..839a8180e48 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "memmodel.h" #include "gimple.h" #include "predict.h" +#include "params.h" #include "tm_p.h" #include "stringpool.h" #include "tree-vrp.h" @@ -118,6 +119,7 @@ static rtx expand_builtin_next_arg (void); static rtx expand_builtin_va_start (tree); static rtx expand_builtin_va_end (tree); static rtx expand_builtin_va_copy (tree); +static rtx inline_expand_builtin_string_cmp (tree, rtx, bool); static rtx expand_builtin_strcmp (tree, rtx); static rtx expand_builtin_strncmp (tree, rtx, machine_mode); static rtx builtin_memcpy_read_str (void *, HOST_WIDE_INT, scalar_int_mode); @@ -4443,19 +4445,32 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq) tree arg1 = CALL_EXPR_ARG (exp, 0); tree arg2 = CALL_EXPR_ARG (exp, 1); tree len = CALL_EXPR_ARG (exp, 2); + enum built_in_function fcode = DECL_FUNCTION_CODE (get_callee_fndecl (exp)); + bool no_overflow = true; /* Diagnose calls where the specified length exceeds the size of either object. */ - if (warn_stringop_overflow) + tree size = compute_objsize (arg1, 0); + no_overflow = check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, + len, /*maxread=*/NULL_TREE, size, + /*objsize=*/NULL_TREE); + if (no_overflow) + { + size = compute_objsize (arg2, 0); + no_overflow = check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, + len, /*maxread=*/NULL_TREE, size, + /*objsize=*/NULL_TREE); + } + + /* Due to the performance benefit, always inline the calls first + when result_eq is false. */ + rtx result = NULL_RTX; + + if (!result_eq && fcode != BUILT_IN_BCMP && no_overflow) { - tree size = compute_objsize (arg1, 0); - if (check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, len, - /*maxread=*/NULL_TREE, size, /*objsize=*/NULL_TREE)) - { - size = compute_objsize (arg2, 0); - check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, len, - /*maxread=*/NULL_TREE, size, /*objsize=*/NULL_TREE); - } + result = inline_expand_builtin_string_cmp (exp, target, true); + if (result) + return result; } machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); @@ -4497,10 +4512,10 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq) && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1) constfn = builtin_memcpy_read_str; - rtx result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx, - TREE_TYPE (len), target, - result_eq, constfn, - CONST_CAST (char *, src_str)); + result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx, + TREE_TYPE (len), target, + result_eq, constfn, + CONST_CAST (char *, src_str)); if (result) { @@ -4530,6 +4545,12 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx target) if (!validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, VOID_TYPE)) return NULL_RTX; + /* Due to the performance benefit, always inline the calls first. */ + rtx result = NULL_RTX; + result = inline_expand_builtin_string_cmp (exp, target, false); + if (result) + return result; + insn_code cmpstr_icode = direct_optab_handler (cmpstr_optab, SImode); insn_code cmpstrn_icode = direct_optab_handler (cmpstrn_optab, SImode); if (cmpstr_icode == CODE_FOR_nothing && cmpstrn_icode == CODE_FOR_nothing) @@ -4552,7 +4573,6 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx target) rtx arg1_rtx = get_memory_rtx (arg1, NULL); rtx arg2_rtx = get_memory_rtx (arg2, NULL); - rtx result = NULL_RTX; /* Try to call cmpstrsi. */ if (cmpstr_icode != CODE_FOR_nothing) result = expand_cmpstr (cmpstr_icode, target, arg1_rtx, arg2_rtx, @@ -4644,6 +4664,12 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx target, POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) return NULL_RTX; + /* Due to the performance benefit, always inline the calls first. */ + rtx result = NULL_RTX; + result = inline_expand_builtin_string_cmp (exp, target, false); + if (result) + return result; + /* If c_strlen can determine an expression for one of the string lengths, and it doesn't have side effects, then emit cmpstrnsi using length MIN(strlen(string)+1, arg3). */ @@ -4706,9 +4732,9 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx target, rtx arg1_rtx = get_memory_rtx (arg1, len); rtx arg2_rtx = get_memory_rtx (arg2, len); rtx arg3_rtx = expand_normal (len); - rtx result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx, - arg2_rtx, TREE_TYPE (len), arg3_rtx, - MIN (arg1_align, arg2_align)); + result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx, + arg2_rtx, TREE_TYPE (len), arg3_rtx, + MIN (arg1_align, arg2_align)); tree fndecl = get_callee_fndecl (exp); if (result) @@ -6722,6 +6748,139 @@ expand_builtin_goacc_parlevel_id_size (tree exp, rtx target, int ignore) return target; } +/* Expand a string compare operation using a sequence of char comparison + to get rid of the calling overhead, with result going to TARGET if + that's convenient. + + VAR_STR is the variable string source; + CONST_STR is the constant string source; + LENGTH is the number of chars to compare; + CONST_STR_N indicates which source string is the constant string; + IS_MEMCMP indicates whether it's a memcmp or strcmp. + + to: (assume const_str_n is 2, i.e., arg2 is a constant string) + + target = var_str[0] - const_str[0]; + if (target != 0) + goto ne_label; + ... + target = var_str[length - 2] - const_str[length - 2]; + if (target != 0) + goto ne_label; + target = var_str[length - 1] - const_str[length - 1]; + ne_label: + */ + +static rtx +inline_string_cmp (rtx target, tree var_str, const char* const_str, + unsigned HOST_WIDE_INT length, + int const_str_n, machine_mode mode, + bool is_memcmp) +{ + HOST_WIDE_INT offset = 0; + rtx var_rtx_array + = get_memory_rtx (var_str, build_int_cst (unsigned_type_node,length)); + rtx var_rtx = NULL_RTX; + rtx const_rtx = NULL_RTX; + rtx result = target ? target : gen_reg_rtx (mode); + rtx_code_label *ne_label = gen_label_rtx (); + tree unit_type_node = is_memcmp ? unsigned_char_type_node : char_type_node; + + start_sequence (); + + for (unsigned HOST_WIDE_INT i = 0; i < length; i++) + { + var_rtx + = adjust_address (var_rtx_array, TYPE_MODE (unit_type_node), offset); + const_rtx + = builtin_memcpy_read_str (CONST_CAST (char *, const_str), + offset, + as_a + TYPE_MODE (unit_type_node)); + rtx op0 = (const_str_n == 1) ? const_rtx : var_rtx; + rtx op1 = (const_str_n == 1) ? var_rtx : const_rtx; + + result = expand_simple_binop (mode, MINUS, op0, op1, + result, is_memcmp ? 1 : 0, OPTAB_WIDEN); + if (i < length - 1) + emit_cmp_and_jump_insns (result, CONST0_RTX (mode), NE, NULL_RTX, + mode, true, ne_label); + offset + += GET_MODE_SIZE (as_a TYPE_MODE (unit_type_node)); + } + + emit_label (ne_label); + rtx_insn *insns = get_insns (); + end_sequence (); + emit_insn (insns); + + return result; +} + +/* Inline expansion a call to str(n)cmp, with result going to + TARGET if that's convenient. + If the call is not been inlined, return NULL_RTX. */ +static rtx +inline_expand_builtin_string_cmp (tree exp, rtx target, bool is_memcmp) +{ + tree fndecl = get_callee_fndecl (exp); + enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl); + unsigned HOST_WIDE_INT length = 0; + bool is_ncmp = (fcode == BUILT_IN_STRNCMP || fcode == BUILT_IN_MEMCMP); + + gcc_checking_assert (fcode == BUILT_IN_STRCMP + || fcode == BUILT_IN_STRNCMP + || fcode == BUILT_IN_MEMCMP); + + tree arg1 = CALL_EXPR_ARG (exp, 0); + tree arg2 = CALL_EXPR_ARG (exp, 1); + tree len3_tree = is_ncmp ? CALL_EXPR_ARG (exp, 2) : NULL_TREE; + + unsigned HOST_WIDE_INT len1 = 0; + unsigned HOST_WIDE_INT len2 = 0; + unsigned HOST_WIDE_INT len3 = 0; + + const char *src_str1 = c_getstr (arg1, &len1); + const char *src_str2 = c_getstr (arg2, &len2); + + /* If neither strings is constant string, the call is not qualify. */ + if (!src_str1 && !src_str2) + return NULL_RTX; + + /* For strncmp, if the length is not a const, not qualify. */ + if (is_ncmp && !tree_fits_uhwi_p (len3_tree)) + return NULL_RTX; + + int const_str_n = 0; + if (!len1) + const_str_n = 2; + else if (!len2) + const_str_n = 1; + else if (len2 > len1) + const_str_n = 1; + else + const_str_n = 2; + + gcc_checking_assert (const_str_n > 0); + length = (const_str_n == 1) ? len1 : len2; + + if (is_ncmp && (len3 = tree_to_uhwi (len3_tree)) < length) + length = len3; + + /* If the length of the comparision is larger than the threshold, + do nothing. */ + if (length > (unsigned HOST_WIDE_INT) + PARAM_VALUE (BUILTIN_STRING_CMP_INLINE_LENGTH)) + return NULL_RTX; + + machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); + + /* Now, start inline expansion the call. */ + return inline_string_cmp (target, (const_str_n == 1) ? arg2 : arg1, + (const_str_n == 1) ? src_str1 : src_str2, length, + const_str_n, mode, is_memcmp); +} + /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient (and in mode MODE if that's convenient). diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index e0e59f6f2d0..9804808f128 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10594,6 +10594,11 @@ Control the probability of the expression having the specified value. This parameter takes a percentage (i.e. 0 ... 100) as input. The default probability of 90 is obtained empirically. +@item builtin-string-cmp-inline-length +The maximum length of a constant string for a builtin string cmp call +eligible for inlining. +The default value is 3. + @item align-threshold Select fraction of the maximal frequency of executions of a basic block in diff --git a/gcc/params.def b/gcc/params.def index a3906c26881..df3a06b5d9d 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -446,6 +446,10 @@ DEFPARAM(BUILTIN_EXPECT_PROBABILITY, "builtin-expect-probability", "Set the estimated probability in percentage for builtin expect. The default value is 90% probability.", 90, 0, 100) +DEFPARAM(BUILTIN_STRING_CMP_INLINE_LENGTH, + "builtin-string-cmp-inline-length", + "The maximum length of a constant string for a builtin string cmp call eligible for inlining. The default value is 3.", + 3, 0, 100) DEFPARAM(TRACER_DYNAMIC_COVERAGE_FEEDBACK, "tracer-dynamic-coverage-feedback", "The percentage of function, weighted by execution frequency, that must be covered by trace formation. Used when profile feedback is available.", diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7b9627f0b95..0c61e976f73 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2018-07-13 Qing Zhao + + PR middle-end/78809 + * gcc.dg/strcmpopt_5.c: New test. + * gcc.dg/strcmpopt_6.c: New test. + 2018-07-13 Richard Biener PR middle-end/85974 diff --git a/gcc/testsuite/gcc.dg/strcmpopt_5.c b/gcc/testsuite/gcc.dg/strcmpopt_5.c new file mode 100644 index 00000000000..c30fb78eb64 --- /dev/null +++ b/gcc/testsuite/gcc.dg/strcmpopt_5.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O -fdump-rtl-expand" } */ + +typedef struct { char s[8]; int x; } S; +__attribute__ ((noinline)) int +f1 (S * s) +{ + int result = 0; + result += __builtin_strncmp (s->s, "ab", 2); + result += __builtin_strncmp (s->s, "abc", 3); + return result; +} + +__attribute__ ((noinline)) int +f2 (char *p) +{ + int result = 0; + result += __builtin_strncmp (p, "ab", 2); + result += __builtin_strncmp (p, "abc", 3); + return result; +} + +__attribute__ ((noinline)) int +f3 (S * s) +{ + int result = 0; + result += __builtin_strcmp (s->s, "a"); + result += __builtin_strcmp (s->s, "ab"); + return result; +} + +__attribute__ ((noinline)) int +f4 (char *p) +{ + int result = 0; + result += __builtin_strcmp (p, "a"); + result += __builtin_strcmp (p, "ab"); + return result; +} + +__attribute__ ((noinline)) int +f5 (S * s) +{ + int result = 0; + result += __builtin_memcmp (s->s, "ab", 2); + result += __builtin_memcmp (s->s, "abc", 3); + return result; +} + +__attribute__ ((noinline)) int +f6 (char *p) +{ + int result = 0; + result += __builtin_memcmp (p, "ab", 2); + result += __builtin_memcmp (p, "abc", 3); + return result; +} + +int main (void) +{ + S ss = {{'a','b','c'}, 2}; + char *s = "abcd"; + + if (f1 (&ss) != 0 || f2 (s) != 0) + __builtin_abort (); + + if (f3 (&ss) <= 0 || f4 (s) <= 0) + __builtin_abort (); + + if (f5 (&ss) != 0 || f6 (s) != 0) + __builtin_abort (); + + return 0; + +} + + +/* { dg-final { scan-rtl-dump-times "__builtin_strcmp" 0 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "__builtin_strncmp" 0 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "__builtin_memcmp" 0 "expand" } } */ diff --git a/gcc/testsuite/gcc.dg/strcmpopt_6.c b/gcc/testsuite/gcc.dg/strcmpopt_6.c new file mode 100644 index 00000000000..0f8cf87fbd0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/strcmpopt_6.c @@ -0,0 +1,36 @@ +/* When the specified length exceeds one of the arguments of the call to memcmp, + the call to memcmp should NOT be inlined. */ +/* { dg-do run } */ +/* { dg-options "-O -fdump-rtl-expand -Wno-stringop-overflow" } */ + +typedef struct { char s[8]; int x; } S; + +__attribute__ ((noinline)) int +f1 (S * s) +{ + int result = 0; + result += __builtin_memcmp (s->s, "a", 3); + return result; +} + +__attribute__ ((noinline)) int +f2 (char *p) +{ + int result = 0; + result += __builtin_memcmp (p, "a", 3); + return result; +} + +int main (void) +{ + S ss = {{'a','b','c'}, 2}; + char *s = "abcd"; + + if (f1 (&ss) < 0 || f2 (s) < 0) + __builtin_abort (); + + return 0; + +} + +/* { dg-final { scan-rtl-dump-times "__builtin_memcmp" 4 "expand" } } */ -- 2.30.2