1 /* An experimental state machine, for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
4 Copyright (C) 2019-2020 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
28 #include "basic-block.h"
31 #include "diagnostic-path.h"
32 #include "diagnostic-metadata.h"
35 #include "analyzer/analyzer.h"
36 #include "diagnostic-event-id.h"
37 #include "analyzer/analyzer-logging.h"
38 #include "analyzer/sm.h"
39 #include "analyzer/pending-diagnostic.h"
47 /* An experimental state machine, for tracking "taint": unsanitized uses
48 of data potentially under an attacker's control. */
50 class taint_state_machine
: public state_machine
53 taint_state_machine (logger
*logger
);
55 bool inherited_state_p () const FINAL OVERRIDE
{ return true; }
57 bool on_stmt (sm_context
*sm_ctxt
,
58 const supernode
*node
,
59 const gimple
*stmt
) const FINAL OVERRIDE
;
61 void on_condition (sm_context
*sm_ctxt
,
62 const supernode
*node
,
66 tree rhs
) const FINAL OVERRIDE
;
68 bool can_purge_p (state_t s
) const FINAL OVERRIDE
;
70 /* State for a "tainted" value: unsanitized data potentially under an
71 attacker's control. */
74 /* State for a "tainted" value that has a lower bound. */
77 /* State for a "tainted" value that has an upper bound. */
80 /* Stop state, for a value we don't want to track any more. */
91 class tainted_array_index
92 : public pending_diagnostic_subclass
<tainted_array_index
>
95 tainted_array_index (const taint_state_machine
&sm
, tree arg
,
96 enum bounds has_bounds
)
97 : m_sm (sm
), m_arg (arg
), m_has_bounds (has_bounds
) {}
99 const char *get_kind () const FINAL OVERRIDE
{ return "tainted_array_index"; }
101 bool operator== (const tainted_array_index
&other
) const
103 return same_tree_p (m_arg
, other
.m_arg
);
106 bool emit (rich_location
*rich_loc
) FINAL OVERRIDE
108 diagnostic_metadata m
;
110 switch (m_has_bounds
)
115 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_array_index
,
116 "use of tainted value %qE in array lookup"
117 " without bounds checking",
121 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_array_index
,
122 "use of tainted value %qE in array lookup"
123 " without lower-bounds checking",
127 return warning_meta (rich_loc
, m
, OPT_Wanalyzer_tainted_array_index
,
128 "use of tainted value %qE in array lookup"
129 " without upper-bounds checking",
135 label_text
describe_state_change (const evdesc::state_change
&change
)
138 if (change
.m_new_state
== m_sm
.m_tainted
)
141 return change
.formatted_print ("%qE has an unchecked value here"
143 change
.m_expr
, change
.m_origin
);
145 return change
.formatted_print ("%qE gets an unchecked value here",
148 else if (change
.m_new_state
== m_sm
.m_has_lb
)
149 return change
.formatted_print ("%qE has its lower bound checked here",
151 else if (change
.m_new_state
== m_sm
.m_has_ub
)
152 return change
.formatted_print ("%qE has its upper bound checked here",
154 return label_text ();
157 label_text
describe_final_event (const evdesc::final_event
&ev
) FINAL OVERRIDE
159 switch (m_has_bounds
)
164 return ev
.formatted_print ("use of tainted value %qE in array lookup"
165 " without bounds checking",
168 return ev
.formatted_print ("use of tainted value %qE in array lookup"
169 " without lower-bounds checking",
172 return ev
.formatted_print ("use of tainted value %qE in array lookup"
173 " without upper-bounds checking",
179 const taint_state_machine
&m_sm
;
181 enum bounds m_has_bounds
;
184 /* taint_state_machine's ctor. */
186 taint_state_machine::taint_state_machine (logger
*logger
)
187 : state_machine ("taint", logger
)
189 m_tainted
= add_state ("tainted");
190 m_has_lb
= add_state ("has_lb");
191 m_has_ub
= add_state ("has_ub");
192 m_stop
= add_state ("stop");
195 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
198 taint_state_machine::on_stmt (sm_context
*sm_ctxt
,
199 const supernode
*node
,
200 const gimple
*stmt
) const
202 if (const gcall
*call
= dyn_cast
<const gcall
*> (stmt
))
203 if (tree callee_fndecl
= sm_ctxt
->get_fndecl_for_call (call
))
205 if (is_named_call_p (callee_fndecl
, "fread", call
, 4))
207 tree arg
= gimple_call_arg (call
, 0);
209 sm_ctxt
->on_transition (node
, stmt
, arg
, m_start
, m_tainted
);
211 /* Dereference an ADDR_EXPR. */
212 // TODO: should the engine do this?
213 if (TREE_CODE (arg
) == ADDR_EXPR
)
214 sm_ctxt
->on_transition (node
, stmt
, TREE_OPERAND (arg
, 0),
219 // TODO: ...etc; many other sources of untrusted data
221 if (const gassign
*assign
= dyn_cast
<const gassign
*> (stmt
))
223 tree rhs1
= gimple_assign_rhs1 (assign
);
224 enum tree_code op
= gimple_assign_rhs_code (assign
);
226 /* Check array accesses. */
229 tree arg
= TREE_OPERAND (rhs1
, 1);
230 tree diag_arg
= sm_ctxt
->get_diagnostic_tree (arg
);
232 /* Unsigned types have an implicit lower bound. */
233 bool is_unsigned
= false;
234 if (INTEGRAL_TYPE_P (TREE_TYPE (arg
)))
235 is_unsigned
= TYPE_UNSIGNED (TREE_TYPE (arg
));
237 state_t state
= sm_ctxt
->get_state (stmt
, arg
);
238 /* Can't use a switch as the states are non-const. */
239 if (state
== m_tainted
)
241 /* Complain about missing bounds. */
242 pending_diagnostic
*d
243 = new tainted_array_index (*this, diag_arg
,
245 ? BOUNDS_LOWER
: BOUNDS_NONE
);
246 sm_ctxt
->warn (node
, stmt
, arg
, d
);
247 sm_ctxt
->set_next_state (stmt
, arg
, m_stop
);
249 else if (state
== m_has_lb
)
251 /* Complain about missing upper bound. */
252 sm_ctxt
->warn (node
, stmt
, arg
,
253 new tainted_array_index (*this, diag_arg
,
255 sm_ctxt
->set_next_state (stmt
, arg
, m_stop
);
257 else if (state
== m_has_ub
)
259 /* Complain about missing lower bound. */
262 sm_ctxt
->warn (node
, stmt
, arg
,
263 new tainted_array_index (*this, diag_arg
,
265 sm_ctxt
->set_next_state (stmt
, arg
, m_stop
);
274 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
275 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
276 and states 'has_ub' and 'has_lb' to 'stop'. */
279 taint_state_machine::on_condition (sm_context
*sm_ctxt
,
280 const supernode
*node
,
284 tree rhs ATTRIBUTE_UNUSED
) const
289 // TODO: this doesn't use the RHS; should we make it symmetric?
299 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_tainted
,
301 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_has_ub
,
308 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_tainted
,
310 sm_ctxt
->on_transition (node
, stmt
, lhs
, m_has_lb
,
320 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED
) const
325 } // anonymous namespace
327 /* Internal interface to this file. */
330 make_taint_state_machine (logger
*logger
)
332 return new taint_state_machine (logger
);
337 #endif /* #if ENABLE_ANALYZER */