From: Jakub Jelinek Date: Tue, 17 Mar 2020 12:36:41 +0000 (+0100) Subject: tree-ssa-strlen: Fix up count_nonzero_bytes* [PR94015] X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a9a437ffc4269650e34af92c4fb095b7ed98f94a;p=gcc.git tree-ssa-strlen: Fix up count_nonzero_bytes* [PR94015] As I said already yesterday in another PR, I'm afraid the mixing of apples and oranges (what we are actually computing, whether what bytes are zero or non-zero in the native representation of EXP itself or what EXP points to) in a single function where it performs some handling which must be specific to one or the other case unconditionally and only from time to time determines something based on if nbytes is 0 or not will continue to bite us again and again. So, this patch performs at least a partial cleanup to separate those two cases into two functions. In addition to the separation, the patch uses e.g. ctor_for_folding so that it does handle volatile loads properly and various other checks instead of directly using DECL_INITIAL or does guard native_encode_expr call the way it is guarded elsewhere (that host and target byte sizes are expected). I've left other issues I found as is for now, like the *allnonnul being IMHO wrongly computed (if we don't know anything about the bytes, such as if _1 = MEM[s_2(D)]; MEM[whatever] = _1; where nothing really is known about strlen(s) etc., the code right now clears *nulterm and *allnul, but keeps *allnonnull set), but the callers seem to never use that value for anything (so the question is why is it computed and how exactly should it be defined). Another thing I find quite weird is the distinction between count_nonzero_bytes failing (return false) and when it succeeds, but sets values to a don't know state (the warning is only issued if it succeeds), plus what lenrange[2] is for. The size of the store should be visible already from the store statement. Also the looking at the type of the MEM_REF first operand to determine if it is is_char_store is really weird, because both in user code and through sccvn where pointer conversions are useless the type of the MEM_REF operand doesn't have to have anything to do with what the code actually does. 2020-03-17 Jakub Jelinek PR tree-optimization/94015 * tree-ssa-strlen.c (count_nonzero_bytes): Split portions of the function where EXP is address of the bytes being stored rather than the bytes themselves into count_nonzero_bytes_addr. Punt on zero sized MEM_REF. Use VAR_P macro and handle CONST_DECL like VAR_DECLs. Use ctor_for_folding instead of looking at DECL_INITIAL. Punt before calling native_encode_expr if host or target doesn't have 8-bit chars. Formatting fixes. (count_nonzero_bytes_addr): New function. * gcc.dg/pr94015.c: New test. --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ec3d190bf11..c51606cf53e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2020-03-17 Jakub Jelinek + + PR tree-optimization/94015 + * tree-ssa-strlen.c (count_nonzero_bytes): Split portions of the + function where EXP is address of the bytes being stored rather than + the bytes themselves into count_nonzero_bytes_addr. Punt on zero + sized MEM_REF. Use VAR_P macro and handle CONST_DECL like VAR_DECLs. + Use ctor_for_folding instead of looking at DECL_INITIAL. Punt before + calling native_encode_expr if host or target doesn't have 8-bit + chars. Formatting fixes. + (count_nonzero_bytes_addr): New function. + 2020-03-17 Andre Vieira Mihail Ionescu Srinath Parvathaneni diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d0d93e7744e..a389a6a1a14 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-03-17 Jakub Jelinek + + PR tree-optimization/94015 + * gcc.dg/pr94015.c: New test. + 2020-03-17 Andre Vieira Mihail Ionescu Srinath Parvathaneni diff --git a/gcc/testsuite/gcc.dg/pr94015.c b/gcc/testsuite/gcc.dg/pr94015.c new file mode 100644 index 00000000000..2c91b235834 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr94015.c @@ -0,0 +1,107 @@ +/* PR tree-optimization/94015 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +char buf[10] = "AAAAAAAAA"; + +__attribute__((noipa)) char * +alloc (void) +{ + return buf; +} + +__attribute__((noipa)) void +f1 (void) +{ + char *s = alloc (); + *(char **)s = "1234567"; + s[7] = '\0'; +} + +__attribute__((noipa)) void +f2 (void) +{ + char *s = alloc (); + *(char **)s = "123456"; + s[6] = '\0'; +} + +__attribute__((noipa)) void +f3 (void) +{ + char *s = alloc (); + *(char **)s = "12345"; + s[5] = '\0'; +} + +__attribute__((noipa)) void +f4 (void) +{ + char *s = alloc (); + *(char **)s = "1234"; + s[4] = '\0'; +} + +__attribute__((noipa)) void +f5 (void) +{ + char *s = alloc (); + *(char **)s = "123"; + s[3] = '\0'; +} + +__attribute__((noipa)) void +f6 (void) +{ + char *s = alloc (); + *(char **)s = "12"; + s[2] = '\0'; +} + +__attribute__((noipa)) void +f7 (void) +{ + char *s = alloc (); + *(char **)s = "1"; + s[1] = '\0'; +} + +__attribute__((noipa)) void +f8 (void) +{ + char *s = alloc (); + *(char **)s = ""; + s[0] = '\0'; +} + +int +main () +{ + if (sizeof (char *) > 8) + return 0; + f1 (); + if (buf[7] != 0) + __builtin_abort (); + f2 (); + if (buf[6] != 0) + __builtin_abort (); + f3 (); + if (buf[5] != 0) + __builtin_abort (); + f4 (); + if (buf[4] != 0) + __builtin_abort (); + f5 (); + if (buf[3] != 0) + __builtin_abort (); + f6 (); + if (buf[2] != 0) + __builtin_abort (); + f7 (); + if (buf[1] != 0) + __builtin_abort (); + f8 (); + if (buf[0] != 0) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c index ec33d7c4baf..7fcc6107857 100644 --- a/gcc/tree-ssa-strlen.c +++ b/gcc/tree-ssa-strlen.c @@ -4605,6 +4605,11 @@ int ssa_name_limit_t::next_ssa_name (tree ssa_name) return 0; } +static bool +count_nonzero_bytes_addr (tree, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, + unsigned [3], bool *, bool *, bool *, + const vr_values *, ssa_name_limit_t &); + /* Determines the minimum and maximum number of leading non-zero bytes in the representation of EXP and set LENRANGE[0] and LENRANGE[1] to each. @@ -4627,102 +4632,6 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, bool *allnul, bool *allnonnul, const vr_values *rvals, ssa_name_limit_t &snlim) { - int idx = get_stridx (exp); - if (idx > 0) - { - strinfo *si = get_strinfo (idx); - if (!si) - return false; - - /* Handle both constant lengths as well non-constant lengths - in some range. */ - unsigned HOST_WIDE_INT minlen, maxlen; - if (tree_fits_shwi_p (si->nonzero_chars)) - minlen = maxlen = tree_to_shwi (si->nonzero_chars); - else if (nbytes - && si->nonzero_chars - && TREE_CODE (si->nonzero_chars) == SSA_NAME) - { - const value_range_equiv *vr - = CONST_CAST (class vr_values *, rvals) - ->get_value_range (si->nonzero_chars); - if (vr->kind () != VR_RANGE - || !range_int_cst_p (vr)) - return false; - - minlen = tree_to_uhwi (vr->min ()); - maxlen = tree_to_uhwi (vr->max ()); - } - else - return false; - - if (maxlen < offset) - return false; - - minlen = minlen < offset ? 0 : minlen - offset; - maxlen -= offset; - if (maxlen + 1 < nbytes) - return false; - - if (!nbytes - && TREE_CODE (si->ptr) == SSA_NAME - && !POINTER_TYPE_P (TREE_TYPE (si->ptr))) - { - /* SI->PTR is an SSA_NAME with a DEF_STMT like - _1 = MEM [(char * {ref-all})s_4(D)]; */ - gimple *stmt = SSA_NAME_DEF_STMT (exp); - if (gimple_assign_single_p (stmt) - && gimple_assign_rhs_code (stmt) == MEM_REF) - { - tree rhs = gimple_assign_rhs1 (stmt); - if (tree refsize = TYPE_SIZE_UNIT (TREE_TYPE (rhs))) - if (tree_fits_uhwi_p (refsize)) - { - nbytes = tree_to_uhwi (refsize); - maxlen = nbytes; - } - } - - if (!nbytes) - return false; - } - - if (nbytes <= minlen) - *nulterm = false; - - if (nbytes < minlen) - { - minlen = nbytes; - if (nbytes < maxlen) - maxlen = nbytes; - } - - if (minlen < lenrange[0]) - lenrange[0] = minlen; - if (lenrange[1] < maxlen) - lenrange[1] = maxlen; - - if (lenrange[2] < nbytes) - lenrange[2] = nbytes; - - /* Since only the length of the string are known and not its contents, - clear ALLNUL and ALLNONNUL purely on the basis of the length. */ - *allnul = false; - if (minlen < nbytes) - *allnonnul = false; - - return true; - } - - if (TREE_CODE (exp) == ADDR_EXPR) - { - /* If the size of the access hasn't been determined yet it's that - of a pointer. */ - if (!nbytes) - nbytes = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (exp))); - exp = TREE_OPERAND (exp, 0); - } - if (TREE_CODE (exp) == SSA_NAME) { /* Handle non-zero single-character stores specially. */ @@ -4778,8 +4687,7 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, tree arg = TREE_OPERAND (exp, 0); tree off = TREE_OPERAND (exp, 1); - if (TREE_CODE (off) != INTEGER_CST - || !tree_fits_uhwi_p (off)) + if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off)) return false; unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off); @@ -4796,15 +4704,17 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, if (!typesize || !tree_fits_uhwi_p (typesize)) return false; nbytes = tree_to_uhwi (typesize); + if (!nbytes) + return false; /* Handle MEM_REF = SSA_NAME types of assignments. */ - return count_nonzero_bytes (arg, offset, nbytes, lenrange, nulterm, - allnul, allnonnul, rvals, snlim); + return count_nonzero_bytes_addr (arg, offset, nbytes, lenrange, nulterm, + allnul, allnonnul, rvals, snlim); } - if (TREE_CODE (exp) == VAR_DECL && TREE_READONLY (exp)) + if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL) { - exp = DECL_INITIAL (exp); + exp = ctor_for_folding (exp); if (!exp) return false; } @@ -4831,6 +4741,8 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, unsigned char buf[256]; if (!prep) { + if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) + return false; /* If the pointer to representation hasn't been set above for STRING_CST point it at the buffer. */ prep = reinterpret_cast (buf); @@ -4874,8 +4786,8 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, if (n) { /* When the initial number of non-zero bytes N is non-zero, reset - *ALLNUL; if N is less than that the size of the representation - also clear *ALLNONNUL. */ + *ALLNUL; if N is less than that the size of the representation + also clear *ALLNONNUL. */ *allnul = false; if (n < nbytes) *allnonnul = false; @@ -4901,6 +4813,120 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, return true; } +/* Like count_nonzero_bytes, but instead of counting bytes in EXP, count + bytes that are pointed to by EXP, which should be a pointer. */ + +static bool +count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset, + unsigned HOST_WIDE_INT nbytes, + unsigned lenrange[3], bool *nulterm, + bool *allnul, bool *allnonnul, + const vr_values *rvals, ssa_name_limit_t &snlim) +{ + int idx = get_stridx (exp); + if (idx > 0) + { + strinfo *si = get_strinfo (idx); + if (!si) + return false; + + /* Handle both constant lengths as well non-constant lengths + in some range. */ + unsigned HOST_WIDE_INT minlen, maxlen; + if (tree_fits_shwi_p (si->nonzero_chars)) + minlen = maxlen = tree_to_shwi (si->nonzero_chars); + else if (si->nonzero_chars + && TREE_CODE (si->nonzero_chars) == SSA_NAME) + { + vr_values *v = CONST_CAST (vr_values *, rvals); + const value_range_equiv *vr = v->get_value_range (si->nonzero_chars); + if (vr->kind () != VR_RANGE || !range_int_cst_p (vr)) + return false; + + minlen = tree_to_uhwi (vr->min ()); + maxlen = tree_to_uhwi (vr->max ()); + } + else + return false; + + if (maxlen < offset) + return false; + + minlen = minlen < offset ? 0 : minlen - offset; + maxlen -= offset; + if (maxlen + 1 < nbytes) + return false; + + if (nbytes <= minlen) + *nulterm = false; + + if (nbytes < minlen) + { + minlen = nbytes; + if (nbytes < maxlen) + maxlen = nbytes; + } + + if (minlen < lenrange[0]) + lenrange[0] = minlen; + if (lenrange[1] < maxlen) + lenrange[1] = maxlen; + + if (lenrange[2] < nbytes) + lenrange[2] = nbytes; + + /* Since only the length of the string are known and not its contents, + clear ALLNUL and ALLNONNUL purely on the basis of the length. */ + *allnul = false; + if (minlen < nbytes) + *allnonnul = false; + + return true; + } + + if (TREE_CODE (exp) == ADDR_EXPR) + return count_nonzero_bytes (TREE_OPERAND (exp, 0), offset, nbytes, + lenrange, nulterm, allnul, allnonnul, rvals, + snlim); + + if (TREE_CODE (exp) == SSA_NAME) + { + gimple *stmt = SSA_NAME_DEF_STMT (exp); + if (gimple_code (stmt) == GIMPLE_PHI) + { + /* Avoid processing an SSA_NAME that has already been visited + or if an SSA_NAME limit has been reached. Indicate success + if the former and failure if the latter. */ + if (int res = snlim.next_ssa_name (exp)) + return res > 0; + + /* Determine the minimum and maximum from the PHI arguments. */ + unsigned int n = gimple_phi_num_args (stmt); + for (unsigned i = 0; i != n; i++) + { + tree def = gimple_phi_arg_def (stmt, i); + if (!count_nonzero_bytes_addr (def, offset, nbytes, lenrange, + nulterm, allnul, allnonnul, rvals, + snlim)) + return false; + } + + return true; + } + } + + /* Otherwise we don't know anything. */ + lenrange[0] = 0; + if (lenrange[1] < nbytes) + lenrange[1] = nbytes; + if (lenrange[2] < nbytes) + lenrange[2] = nbytes; + *nulterm = false; + *allnul = false; + *allnonnul = false; + return true; +} + /* Same as above except with an implicit SSA_NAME limit. RVALS is used to determine ranges of dynamically computed string lengths (the results of strlen). */