From 4148b00dbc46a461915602b2b3489d69fd211c22 Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Thu, 16 Aug 2018 22:38:04 +0000 Subject: [PATCH] builtins.c (c_strlen): Add new parameter eltsize. * builtins.c (c_strlen): Add new parameter eltsize. Use it for determining how to count the elements. * builtins.h (c_strlen): Adjust prototype. * expr.c (string_constant): Add new parameter mem_size. Set *mem_size appropriately. * expr.h (string_constant): Adjust protoype. * gimple-fold.c (get_range_strlen): Add new parameter eltsize. * gimple-fold.h (get_range_strlen): Adjust prototype. * gimple-ssa-sprintf.c (get_string_length): Add new parameter eltsize. (format_string): Call get_string_length with eltsize. From-SVN: r263607 --- gcc/ChangeLog | 13 +++++++++ gcc/builtins.c | 63 ++++++++++++++++++++++------------------ gcc/builtins.h | 2 +- gcc/expr.c | 14 ++++++--- gcc/expr.h | 2 +- gcc/gimple-fold.c | 33 ++++++++++++--------- gcc/gimple-fold.h | 2 +- gcc/gimple-ssa-sprintf.c | 11 +++---- 8 files changed, 87 insertions(+), 53 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 146e4e37c3a..897ec9cc91f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2018-08-16 Bernd Edlinger + + * builtins.c (c_strlen): Add new parameter eltsize. Use it + for determining how to count the elements. + * builtins.h (c_strlen): Adjust prototype. + * expr.c (string_constant): Add new parameter mem_size. + Set *mem_size appropriately. + * expr.h (string_constant): Adjust protoype. + * gimple-fold.c (get_range_strlen): Add new parameter eltsize. + * gimple-fold.h (get_range_strlen): Adjust prototype. + * gimple-ssa-sprintf.c (get_string_length): Add new parameter eltsize. + (format_string): Call get_string_length with eltsize. + 2018-08-16 David Malcolm * diagnostic.c (default_diagnostic_start_span_fn): Call pp_string diff --git a/gcc/builtins.c b/gcc/builtins.c index 6716aabfa73..b1a79f3f33f 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -568,41 +568,43 @@ string_length (const void *ptr, unsigned eltsize, unsigned maxelts) accesses. Note that this implies the result is not going to be emitted into the instruction stream. - The value returned is of type `ssizetype'. + ELTSIZE is 1 for normal single byte character strings, and 2 or + 4 for wide characer strings. ELTSIZE is by default 1. - Unfortunately, string_constant can't access the values of const char - arrays with initializers, so neither can we do so here. */ + The value returned is of type `ssizetype'. */ tree -c_strlen (tree src, int only_value) +c_strlen (tree src, int only_value, unsigned eltsize) { + gcc_assert (eltsize == 1 || eltsize == 2 || eltsize == 4); STRIP_NOPS (src); if (TREE_CODE (src) == COND_EXPR && (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0)))) { tree len1, len2; - len1 = c_strlen (TREE_OPERAND (src, 1), only_value); - len2 = c_strlen (TREE_OPERAND (src, 2), only_value); + len1 = c_strlen (TREE_OPERAND (src, 1), only_value, eltsize); + len2 = c_strlen (TREE_OPERAND (src, 2), only_value, eltsize); if (tree_int_cst_equal (len1, len2)) return len1; } if (TREE_CODE (src) == COMPOUND_EXPR && (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0)))) - return c_strlen (TREE_OPERAND (src, 1), only_value); + return c_strlen (TREE_OPERAND (src, 1), only_value, eltsize); location_t loc = EXPR_LOC_OR_LOC (src, input_location); /* Offset from the beginning of the string in bytes. */ tree byteoff; - src = string_constant (src, &byteoff); + tree memsize; + src = string_constant (src, &byteoff, &memsize); if (src == 0) return NULL_TREE; /* Determine the size of the string element. */ - unsigned eltsize - = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src)))); + if (eltsize != tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src))))) + return NULL_TREE; /* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible length of SRC. Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible @@ -613,14 +615,10 @@ c_strlen (tree src, int only_value) HOST_WIDE_INT strelts = TREE_STRING_LENGTH (src); strelts = strelts / eltsize - 1; - HOST_WIDE_INT maxelts = strelts; - tree type = TREE_TYPE (src); - if (tree size = TYPE_SIZE_UNIT (type)) - if (tree_fits_shwi_p (size)) - { - maxelts = tree_to_uhwi (size); - maxelts = maxelts / eltsize - 1; - } + if (!tree_fits_uhwi_p (memsize)) + return NULL_TREE; + + HOST_WIDE_INT maxelts = tree_to_uhwi (memsize) / eltsize - 1; /* PTR can point to the byte representation of any string type, including char* and wchar_t*. */ @@ -628,19 +626,23 @@ c_strlen (tree src, int only_value) if (byteoff && TREE_CODE (byteoff) != INTEGER_CST) { + /* For empty strings the result should be zero. */ + if (maxelts == 0) + return ssize_int (0); + + /* The code below works only for single byte character types. */ + if (eltsize != 1) + return NULL_TREE; + /* If the string has an internal NUL character followed by any non-NUL characters (e.g., "foo\0bar"), we can't compute the offset to the following NUL if we don't know where to start searching for it. */ unsigned len = string_length (ptr, eltsize, strelts); - if (len < strelts) - { - /* Return when an embedded null character is found. */ - return NULL_TREE; - } - if (!maxelts) - return ssize_int (0); + /* Return when an embedded null character is found or none at all. */ + if (len < strelts || len > maxelts) + return NULL_TREE; /* We don't know the starting offset, but we do know that the string has no internal zero bytes. If the offset falls within the bounds @@ -650,8 +652,8 @@ c_strlen (tree src, int only_value) tree offsave = TREE_SIDE_EFFECTS (byteoff) ? save_expr (byteoff) : byteoff; offsave = fold_convert (ssizetype, offsave); tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node, offsave, - build_int_cst (ssizetype, len * eltsize)); - tree lenexp = size_diffop_loc (loc, ssize_int (strelts * eltsize), offsave); + build_int_cst (ssizetype, len)); + tree lenexp = size_diffop_loc (loc, ssize_int (strelts), offsave); return fold_build3_loc (loc, COND_EXPR, ssizetype, condexp, lenexp, build_zero_cst (ssizetype)); } @@ -684,6 +686,11 @@ c_strlen (tree src, int only_value) return NULL_TREE; } + /* If eltoff is larger than strelts but less than maxelts the + string length is zero, since the excess memory will be zero. */ + if (eltoff > strelts) + return ssize_int (0); + /* Use strlen to search for the first zero byte. Since any strings constructed with build_string will have nulls appended, we win even if we get handed something like (char[4])"abcd". @@ -691,7 +698,7 @@ c_strlen (tree src, int only_value) Since ELTOFF is our starting index into the string, no further calculation is needed. */ unsigned len = string_length (ptr + eltoff * eltsize, eltsize, - maxelts - eltoff); + strelts - eltoff); return ssize_int (len); } diff --git a/gcc/builtins.h b/gcc/builtins.h index 1113bd37f1b..805f1801604 100644 --- a/gcc/builtins.h +++ b/gcc/builtins.h @@ -58,7 +58,7 @@ extern bool get_pointer_alignment_1 (tree, unsigned int *, unsigned HOST_WIDE_INT *); extern unsigned int get_pointer_alignment (tree); extern unsigned string_length (const void*, unsigned, unsigned); -extern tree c_strlen (tree, int); +extern tree c_strlen (tree, int, unsigned = 1); extern void expand_builtin_setjmp_setup (rtx, rtx); extern void expand_builtin_setjmp_receiver (rtx); extern void expand_builtin_update_setjmp_buf (rtx); diff --git a/gcc/expr.c b/gcc/expr.c index e8a4f883128..c071be67783 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -11292,10 +11292,12 @@ is_aligning_offset (const_tree offset, const_tree exp) /* Return the tree node if an ARG corresponds to a string constant or zero if it doesn't. If we return nonzero, set *PTR_OFFSET to the (possibly non-constant) offset in bytes within the string that ARG is accessing. - The type of the offset is sizetype. */ + The type of the offset is sizetype. If MEM_SIZE is non-zero the storage + size of the memory is returned. If MEM_SIZE is zero, the string is + only returned when it is properly zero terminated. */ tree -string_constant (tree arg, tree *ptr_offset) +string_constant (tree arg, tree *ptr_offset, tree *mem_size) { tree array; STRIP_NOPS (arg); @@ -11349,7 +11351,7 @@ string_constant (tree arg, tree *ptr_offset) return NULL_TREE; tree offset; - if (tree str = string_constant (arg0, &offset)) + if (tree str = string_constant (arg0, &offset, mem_size)) { /* Avoid pointers to arrays (see bug 86622). */ if (POINTER_TYPE_P (TREE_TYPE (arg)) @@ -11389,6 +11391,8 @@ string_constant (tree arg, tree *ptr_offset) if (TREE_CODE (array) == STRING_CST) { *ptr_offset = fold_convert (sizetype, offset); + if (mem_size) + *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (array)); return array; } @@ -11448,7 +11452,9 @@ string_constant (tree arg, tree *ptr_offset) unsigned HOST_WIDE_INT length = TREE_STRING_LENGTH (init); length = string_length (TREE_STRING_POINTER (init), charsize, length / charsize); - if (compare_tree_int (array_size, length + 1) < 0) + if (mem_size) + *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (init)); + else if (compare_tree_int (array_size, length + 1) < 0) return NULL_TREE; *ptr_offset = offset; diff --git a/gcc/expr.h b/gcc/expr.h index cf047d49e2e..d4d25645f61 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -288,7 +288,7 @@ expand_normal (tree exp) /* Return the tree node and offset if a given argument corresponds to a string constant. */ -extern tree string_constant (tree, tree *); +extern tree string_constant (tree, tree *, tree * = NULL); /* Two different ways of generating switch statements. */ extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx, profile_probability); diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index 42673b5e6c9..07341ebe66f 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -1275,11 +1275,13 @@ gimple_fold_builtin_memset (gimple_stmt_iterator *gsi, tree c, tree len) Set *FLEXP to true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound - due to it being used as a poor-man's flexible array member. */ + due to it being used as a poor-man's flexible array member. + ELTSIZE is 1 for normal single byte character strings, and 2 or + 4 for wide characer strings. ELTSIZE is by default 1. */ static bool get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, - int fuzzy, bool *flexp) + int fuzzy, bool *flexp, unsigned eltsize = 1) { tree var, val = NULL_TREE; gimple *def_stmt; @@ -1300,8 +1302,8 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, tree aop0 = TREE_OPERAND (op, 0); if (TREE_CODE (aop0) == INDIRECT_REF && TREE_CODE (TREE_OPERAND (aop0, 0)) == SSA_NAME) - return get_range_strlen (TREE_OPERAND (aop0, 0), - length, visited, type, fuzzy, flexp); + return get_range_strlen (TREE_OPERAND (aop0, 0), length, + visited, type, fuzzy, flexp, eltsize); } else if (TREE_CODE (TREE_OPERAND (op, 0)) == COMPONENT_REF && fuzzy) { @@ -1329,13 +1331,13 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, return false; } else - val = c_strlen (arg, 1); + val = c_strlen (arg, 1, eltsize); if (!val && fuzzy) { if (TREE_CODE (arg) == ADDR_EXPR) return get_range_strlen (TREE_OPERAND (arg, 0), length, - visited, type, fuzzy, flexp); + visited, type, fuzzy, flexp, eltsize); if (TREE_CODE (arg) == ARRAY_REF) { @@ -1477,7 +1479,8 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, || gimple_assign_unary_nop_p (def_stmt)) { tree rhs = gimple_assign_rhs1 (def_stmt); - return get_range_strlen (rhs, length, visited, type, fuzzy, flexp); + return get_range_strlen (rhs, length, visited, type, fuzzy, flexp, + eltsize); } else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR) { @@ -1486,7 +1489,7 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, for (unsigned int i = 0; i < 2; i++) if (!get_range_strlen (ops[i], length, visited, type, fuzzy, - flexp)) + flexp, eltsize)) { if (fuzzy == 2) *maxlen = build_all_ones_cst (size_type_node); @@ -1513,7 +1516,8 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, if (arg == gimple_phi_result (def_stmt)) continue; - if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp)) + if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp, + eltsize)) { if (fuzzy == 2) *maxlen = build_all_ones_cst (size_type_node); @@ -1545,10 +1549,13 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, and false if PHIs and COND_EXPRs are to be handled optimistically, if we can determine string length minimum and maximum; it will use the minimum from the ones where it can be determined. - STRICT false should be only used for warning code. */ + STRICT false should be only used for warning code. + + ELTSIZE is 1 for normal single byte character strings, and 2 or + 4 for wide characer strings. ELTSIZE is by default 1. */ bool -get_range_strlen (tree arg, tree minmaxlen[2], bool strict) +get_range_strlen (tree arg, tree minmaxlen[2], unsigned eltsize, bool strict) { bitmap visited = NULL; @@ -1557,7 +1564,7 @@ get_range_strlen (tree arg, tree minmaxlen[2], bool strict) bool flexarray = false; if (!get_range_strlen (arg, minmaxlen, &visited, 1, strict ? 1 : 2, - &flexarray)) + &flexarray, eltsize)) { minmaxlen[0] = NULL_TREE; minmaxlen[1] = NULL_TREE; @@ -3500,7 +3507,7 @@ gimple_fold_builtin_strlen (gimple_stmt_iterator *gsi) wide_int maxlen; tree lenrange[2]; - if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, true) + if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, 1, true) && lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST && lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST) { diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h index 04e9bfa0851..e3fad83bbfb 100644 --- a/gcc/gimple-fold.h +++ b/gcc/gimple-fold.h @@ -25,7 +25,7 @@ along with GCC; see the file COPYING3. If not see extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL); extern tree canonicalize_constructor_val (tree, tree); extern tree get_symbol_constant_value (tree); -extern bool get_range_strlen (tree, tree[2], bool = false); +extern bool get_range_strlen (tree, tree[2], unsigned = 1, bool = false); extern tree get_maxval_strlen (tree, int); extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree); extern bool fold_stmt (gimple_stmt_iterator *); diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c index 5213e17d68f..2431b52e652 100644 --- a/gcc/gimple-ssa-sprintf.c +++ b/gcc/gimple-ssa-sprintf.c @@ -2125,12 +2125,12 @@ format_floating (const directive &dir, tree arg, vr_values *) Used by the format_string function below. */ static fmtresult -get_string_length (tree str) +get_string_length (tree str, unsigned eltsize) { if (!str) return fmtresult (); - if (tree slen = c_strlen (str, 1)) + if (tree slen = c_strlen (str, 1, eltsize)) { /* Simply return the length of the string. */ fmtresult res (tree_to_shwi (slen)); @@ -2143,7 +2143,7 @@ get_string_length (tree str) aren't known to point any such arrays result in LENRANGE[1] set to SIZE_MAX. */ tree lenrange[2]; - bool flexarray = get_range_strlen (str, lenrange); + bool flexarray = get_range_strlen (str, lenrange, eltsize); if (lenrange [0] || lenrange [1]) { @@ -2195,7 +2195,7 @@ get_string_length (tree str) return res; } - return get_string_length (NULL_TREE); + return fmtresult (); } /* Return the minimum and maximum number of characters formatted @@ -2274,7 +2274,8 @@ format_string (const directive &dir, tree arg, vr_values *) fmtresult res; /* Compute the range the argument's length can be in. */ - fmtresult slen = get_string_length (arg); + int count_by = dir.specifier == 'S' || dir.modifier == FMT_LEN_l ? 4 : 1; + fmtresult slen = get_string_length (arg, count_by); if (slen.range.min == slen.range.max && slen.range.min < HOST_WIDE_INT_MAX) { -- 2.30.2