From 467a48205279cab368dbeb02879bbbbe4b721516 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 11 Feb 2021 20:31:28 -0500 Subject: [PATCH] analyzer: fix ICE in print_mem_ref [PR98969] PR analyzer/98969 and PR analyzer/99064 describes ICEs, in both cases within print_mem_ref, when falsely reporting memory leaks - though it is possible to generate the ICE on other diagnostics (which I added in one of the test cases). This patch fixes the ICE, leaving the fix for the leak false positives as followup work. The analyzer uses region_model::get_representative_path_var and region_model::get_representative_tree to map back from its svalue and region classes to the tree type used by the rest of the compiler, and, in particular, for diagnostics. The root cause of the ICE is sloppiness about types within those functions; specifically when casts were stripped off svalues. To track these down I added wrapper functions that verify that the types of the results are correct, and in doing so found various other type-safety issues, which the patch also fixes. Doing so led to various changes in diagnostics messages due to more accurate types, but I felt that these changes weren't desirable. For example, the warning at CVE-2005-1689-minimal.c line 48 which expects: double-'free' of 'inbuf.data' changed fo double-'free' of '(char *)inbuf.data' So I added stripping of top-level casts where necessary to avoid cluttering diagnostics. Finally, the more accurate types led to worse results from readability_comparator, where e.g. the event message at line 50 of sensitive-1.c regressed from the precise: passing sensitive value 'password' in call to 'called_by_test_5' from 'test_5' to the vaguer: calling 'called_by_test_5' from 'test_5' This was due to erroneously picking the initial value of "password" in the caller frame as the best value within the *callee* frame, due to "char *" vs "const char *", which confuses the logic for tracking values that pass along callgraph edges. The patch fixes this by combining the readability tests for tree and stack depth, rather than performing them in sequence, so that it favors the value in the deepest frame. As noted above, the patch fixes the ICEs, but does not fix the leak false positives. gcc/analyzer/ChangeLog: PR analyzer/98969 * engine.cc (readability): Add names for the various arbitrary values. Handle NOP_EXPR and INTEGER_CST. (readability_comparator): Combine the readability tests for tree and stack depth, rather than performing them sequentially. (impl_region_model_context::on_state_leak): Strip off top-level casts. * region-model.cc (region_model::get_representative_path_var): Add type-checking, moving the bulk of the implementation to... (region_model::get_representative_path_var_1): ...here. Respect types in casts by recursing and re-adding the cast, rather than merely stripping them off. Use the correct type when handling region_svalue. (region_model::get_representative_tree): Strip off any top-level cast. (region_model::get_representative_path_var): Add type-checking, moving the bulk of the implementation to... (region_model::get_representative_path_var_1): ...here. * region-model.h (region_model::get_representative_path_var_1): New decl (region_model::get_representative_path_var_1): New decl. * store.cc (append_pathvar_with_type): New. (binding_cluster::get_representative_path_vars): Cast path_vars to the correct type when adding them to *OUT_PVS. gcc/testsuite/ChangeLog: PR analyzer/98969 * g++.dg/analyzer/pr99064.C: New test. * gcc.dg/analyzer/pr98969.c: New test. --- gcc/analyzer/engine.cc | 43 +++++++++-- gcc/analyzer/region-model.cc | 98 +++++++++++++++++++++---- gcc/analyzer/region-model.h | 7 ++ gcc/analyzer/store.cc | 19 ++++- gcc/testsuite/g++.dg/analyzer/pr99064.C | 39 ++++++++++ gcc/testsuite/gcc.dg/analyzer/pr98969.c | 18 +++++ 6 files changed, 200 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/g++.dg/analyzer/pr99064.C create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr98969.c diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 45aed8f0d37..89b3be22ecb 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -456,6 +456,9 @@ private: static int readability (const_tree expr) { + /* Arbitrarily-chosen "high readability" value. */ + const int HIGH_READABILITY = 65536; + gcc_assert (expr); switch (TREE_CODE (expr)) { @@ -479,8 +482,7 @@ readability (const_tree expr) case PARM_DECL: case VAR_DECL: if (DECL_NAME (expr)) - /* Arbitrarily-chosen "high readability" value. */ - return 65536; + return HIGH_READABILITY; else /* We don't want to print temporaries. For example, the C FE prints them as e.g. "" where "xxxx" is the low 16 bits @@ -490,7 +492,17 @@ readability (const_tree expr) case RESULT_DECL: /* Printing "" isn't ideal, but is less awful than trying to print a temporary. */ - return 32768; + return HIGH_READABILITY / 2; + + case NOP_EXPR: + { + /* Impose a moderate readability penalty for casts. */ + const int CAST_PENALTY = 32; + return readability (TREE_OPERAND (expr, 0)) - CAST_PENALTY; + } + + case INTEGER_CST: + return HIGH_READABILITY; default: return 0; @@ -508,14 +520,25 @@ readability_comparator (const void *p1, const void *p2) path_var pv1 = *(path_var const *)p1; path_var pv2 = *(path_var const *)p2; - int r1 = readability (pv1.m_tree); - int r2 = readability (pv2.m_tree); - if (int cmp = r2 - r1) - return cmp; + const int tree_r1 = readability (pv1.m_tree); + const int tree_r2 = readability (pv2.m_tree); /* Favor items that are deeper on the stack and hence more recent; this also favors locals over globals. */ - if (int cmp = pv2.m_stack_depth - pv1.m_stack_depth) + const int COST_PER_FRAME = 64; + const int depth_r1 = pv1.m_stack_depth * COST_PER_FRAME; + const int depth_r2 = pv2.m_stack_depth * COST_PER_FRAME; + + /* Combine the scores from the tree and from the stack depth. + This e.g. lets us have a slightly penalized cast in the most + recent stack frame "beat" an uncast value in an older stack frame. */ + const int sum_r1 = tree_r1 + depth_r1; + const int sum_r2 = tree_r2 + depth_r2; + if (int cmp = sum_r2 - sum_r1) + return cmp; + + /* Otherwise, more readable trees win. */ + if (int cmp = tree_r2 - tree_r1) return cmp; /* Otherwise, if they have the same readability, then impose an @@ -580,6 +603,10 @@ impl_region_model_context::on_state_leak (const state_machine &sm, = m_old_state->m_region_model->get_representative_path_var (sval, &visited); + /* Strip off top-level casts */ + if (leaked_pv.m_tree && TREE_CODE (leaked_pv.m_tree) == NOP_EXPR) + leaked_pv.m_tree = TREE_OPERAND (leaked_pv.m_tree, 0); + /* This might be NULL; the pending_diagnostic subclasses need to cope with this. */ tree leaked_tree = leaked_pv.m_tree; diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index cfe8a391dd9..2fc07c49649 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -2202,25 +2202,33 @@ region_model::eval_condition (tree lhs, return eval_condition (get_rvalue (lhs, ctxt), op, get_rvalue (rhs, ctxt)); } -/* Attempt to return a path_var that represents SVAL, or return NULL_TREE. +/* Implementation of region_model::get_representative_path_var. + Attempt to return a path_var that represents SVAL, or return NULL_TREE. Use VISITED to prevent infinite mutual recursion with the overload for regions. */ path_var -region_model::get_representative_path_var (const svalue *sval, - svalue_set *visited) const +region_model::get_representative_path_var_1 (const svalue *sval, + svalue_set *visited) const { - if (sval == NULL) - return path_var (NULL_TREE, 0); - - if (const svalue *cast_sval = sval->maybe_undo_cast ()) - sval = cast_sval; + gcc_assert (sval); /* Prevent infinite recursion. */ if (visited->contains (sval)) return path_var (NULL_TREE, 0); visited->add (sval); + /* Handle casts by recursion into get_representative_path_var. */ + if (const svalue *cast_sval = sval->maybe_undo_cast ()) + { + path_var result = get_representative_path_var (cast_sval, visited); + tree orig_type = sval->get_type (); + /* If necessary, wrap the result in a cast. */ + if (result.m_tree && orig_type) + result.m_tree = build1 (NOP_EXPR, orig_type, result.m_tree); + return result; + } + auto_vec pvs; m_store.get_representative_path_vars (this, visited, sval, &pvs); @@ -2233,7 +2241,7 @@ region_model::get_representative_path_var (const svalue *sval, const region *reg = ptr_sval->get_pointee (); if (path_var pv = get_representative_path_var (reg, visited)) return path_var (build1 (ADDR_EXPR, - TREE_TYPE (sval->get_type ()), + sval->get_type (), pv.m_tree), pv.m_stack_depth); } @@ -2261,16 +2269,54 @@ region_model::get_representative_path_var (const svalue *sval, return pvs[0]; } -/* Attempt to return a tree that represents SVAL, or return NULL_TREE. */ +/* Attempt to return a path_var that represents SVAL, or return NULL_TREE. + Use VISITED to prevent infinite mutual recursion with the overload for + regions + + This function defers to get_representative_path_var_1 to do the work; + it adds verification that get_representative_path_var_1 returned a tree + of the correct type. */ + +path_var +region_model::get_representative_path_var (const svalue *sval, + svalue_set *visited) const +{ + if (sval == NULL) + return path_var (NULL_TREE, 0); + + tree orig_type = sval->get_type (); + + path_var result = get_representative_path_var_1 (sval, visited); + + /* Verify that the result has the same type as SVAL, if any. */ + if (result.m_tree && orig_type) + gcc_assert (TREE_TYPE (result.m_tree) == orig_type); + + return result; +} + +/* Attempt to return a tree that represents SVAL, or return NULL_TREE. + + Strip off any top-level cast, to avoid messages like + double-free of '(void *)ptr' + from analyzer diagnostics. */ tree region_model::get_representative_tree (const svalue *sval) const { svalue_set visited; - return get_representative_path_var (sval, &visited).m_tree; + tree expr = get_representative_path_var (sval, &visited).m_tree; + + /* Strip off any top-level cast. */ + if (expr && TREE_CODE (expr) == NOP_EXPR) + return TREE_OPERAND (expr, 0); + + return expr; } -/* Attempt to return a path_var that represents REG, or return +/* Implementation of region_model::get_representative_path_var. + + Attempt to return a path_var that represents REG, or return the NULL path_var. For example, a region for a field of a local would be a path_var wrapping a COMPONENT_REF. @@ -2278,8 +2324,8 @@ region_model::get_representative_tree (const svalue *sval) const svalues. */ path_var -region_model::get_representative_path_var (const region *reg, - svalue_set *visited) const +region_model::get_representative_path_var_1 (const region *reg, + svalue_set *visited) const { switch (reg->get_kind ()) { @@ -2411,6 +2457,30 @@ region_model::get_representative_path_var (const region *reg, } } +/* Attempt to return a path_var that represents REG, or return + the NULL path_var. + For example, a region for a field of a local would be a path_var + wrapping a COMPONENT_REF. + Use VISITED to prevent infinite mutual recursion with the overload for + svalues. + + This function defers to get_representative_path_var_1 to do the work; + it adds verification that get_representative_path_var_1 returned a tree + of the correct type. */ + +path_var +region_model::get_representative_path_var (const region *reg, + svalue_set *visited) const +{ + path_var result = get_representative_path_var_1 (reg, visited); + + /* Verify that the result has the same type as REG, if any. */ + if (result.m_tree && reg->get_type ()) + gcc_assert (TREE_TYPE (result.m_tree) == reg->get_type ()); + + return result; +} + /* Update this model for any phis in SNODE, assuming we came from LAST_CFG_SUPEREDGE. */ diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index efd1a09e362..c73e39fddd9 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -581,6 +581,13 @@ class region_model const region *get_lvalue_1 (path_var pv, region_model_context *ctxt); const svalue *get_rvalue_1 (path_var pv, region_model_context *ctxt); + path_var + get_representative_path_var_1 (const svalue *sval, + svalue_set *visited) const; + path_var + get_representative_path_var_1 (const region *reg, + svalue_set *visited) const; + void add_any_constraints_from_ssa_def_stmt (tree lhs, enum tree_code op, tree rhs, diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index da5b5adb5b4..53b6e21aa75 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -1375,6 +1375,21 @@ binding_cluster::redundant_p () const && !m_touched); } +/* Add PV to OUT_PVS, casting it to TYPE if if is not already of that type. */ + +static void +append_pathvar_with_type (path_var pv, + tree type, + auto_vec *out_pvs) +{ + gcc_assert (pv.m_tree); + + if (TREE_TYPE (pv.m_tree) != type) + pv.m_tree = build1 (NOP_EXPR, type, pv.m_tree); + + out_pvs->safe_push (pv); +} + /* Find representative path_vars for SVAL within this binding of BASE_REG, appending the results to OUT_PVS. */ @@ -1411,7 +1426,7 @@ binding_cluster::get_representative_path_vars (const region_model *model, if (path_var pv = model->get_representative_path_var (subregion, visited)) - out_pvs->safe_push (pv); + append_pathvar_with_type (pv, sval->get_type (), out_pvs); } } else @@ -1420,7 +1435,7 @@ binding_cluster::get_representative_path_vars (const region_model *model, if (path_var pv = model->get_representative_path_var (skey->get_region (), visited)) - out_pvs->safe_push (pv); + append_pathvar_with_type (pv, sval->get_type (), out_pvs); } } } diff --git a/gcc/testsuite/g++.dg/analyzer/pr99064.C b/gcc/testsuite/g++.dg/analyzer/pr99064.C new file mode 100644 index 00000000000..a002219bf58 --- /dev/null +++ b/gcc/testsuite/g++.dg/analyzer/pr99064.C @@ -0,0 +1,39 @@ +// { dg-do compile { target c++17 } } +// { dg-additional-options "-Wno-analyzer-too-complex" } */ + +template struct iterator_traits; +template struct iterator_traits<_Tp *> { + typedef _Tp &reference; +}; +template struct __normal_iterator { + _Iterator _M_current; + __normal_iterator(_Iterator &__i) : _M_current(__i) {} + typename iterator_traits<_Iterator>::reference operator*() { + return *_M_current; + } +}; +template struct allocator; +template struct allocator_traits; +template struct allocator_traits> { + using pointer = _Tp *; +}; +struct TPkcs11Token; +struct __alloc_traits : allocator_traits> {}; +struct _Vector_base { + typedef __alloc_traits::pointer pointer; + struct { + pointer _M_start; + } _M_impl; +}; +struct : _Vector_base { + __normal_iterator begin() { return _M_impl._M_start; } +} list_tokens_token_list; +struct TPkcs11Token { + int *add_info; +}; +void list_tokens() { + for (__normal_iterator base = list_tokens_token_list.begin();;) { + int *add_info = new int; + (*base).add_info = add_info; // { dg-bogus "leak" "PR analyzer/98969" { xfail *-*-* } } + } +} diff --git a/gcc/testsuite/gcc.dg/analyzer/pr98969.c b/gcc/testsuite/gcc.dg/analyzer/pr98969.c new file mode 100644 index 00000000000..8298f263898 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr98969.c @@ -0,0 +1,18 @@ +struct foo +{ + char *expr; +}; + +void +test_1 (long int i) +{ + struct foo *f = (struct foo *)i; + f->expr = __builtin_malloc (1024); +} /* { dg-bogus "leak" "PR analyzer/98969" { xfail *-*-* } } */ + +void +test_2 (long int i) +{ + __builtin_free (((struct foo *)i)->expr); + __builtin_free (((struct foo *)i)->expr); /* { dg-warning "double-'free' of '\\*\\(\\(struct foo \\*\\)i\\)\\.expr'" } */ +} -- 2.30.2