analyzer: implement region_model::get_representative_path_var for labels
[gcc.git] / gcc / analyzer / sm-taint.cc
1 /* An experimental state machine, for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
3
4 Copyright (C) 2019-2020 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tree.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "gimple.h"
30 #include "options.h"
31 #include "diagnostic-path.h"
32 #include "diagnostic-metadata.h"
33 #include "function.h"
34 #include "json.h"
35 #include "analyzer/analyzer.h"
36 #include "diagnostic-event-id.h"
37 #include "analyzer/analyzer-logging.h"
38 #include "analyzer/sm.h"
39 #include "analyzer/pending-diagnostic.h"
40
41 #if ENABLE_ANALYZER
42
43 namespace ana {
44
45 namespace {
46
47 /* An experimental state machine, for tracking "taint": unsanitized uses
48 of data potentially under an attacker's control. */
49
50 class taint_state_machine : public state_machine
51 {
52 public:
53 taint_state_machine (logger *logger);
54
55 bool inherited_state_p () const FINAL OVERRIDE { return true; }
56
57 bool on_stmt (sm_context *sm_ctxt,
58 const supernode *node,
59 const gimple *stmt) const FINAL OVERRIDE;
60
61 void on_condition (sm_context *sm_ctxt,
62 const supernode *node,
63 const gimple *stmt,
64 tree lhs,
65 enum tree_code op,
66 tree rhs) const FINAL OVERRIDE;
67
68 bool can_purge_p (state_t s) const FINAL OVERRIDE;
69
70 /* State for a "tainted" value: unsanitized data potentially under an
71 attacker's control. */
72 state_t m_tainted;
73
74 /* State for a "tainted" value that has a lower bound. */
75 state_t m_has_lb;
76
77 /* State for a "tainted" value that has an upper bound. */
78 state_t m_has_ub;
79
80 /* Stop state, for a value we don't want to track any more. */
81 state_t m_stop;
82 };
83
84 enum bounds
85 {
86 BOUNDS_NONE,
87 BOUNDS_UPPER,
88 BOUNDS_LOWER
89 };
90
91 class tainted_array_index
92 : public pending_diagnostic_subclass<tainted_array_index>
93 {
94 public:
95 tainted_array_index (const taint_state_machine &sm, tree arg,
96 enum bounds has_bounds)
97 : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds) {}
98
99 const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
100
101 bool operator== (const tainted_array_index &other) const
102 {
103 return same_tree_p (m_arg, other.m_arg);
104 }
105
106 bool emit (rich_location *rich_loc) FINAL OVERRIDE
107 {
108 diagnostic_metadata m;
109 m.add_cwe (129);
110 switch (m_has_bounds)
111 {
112 default:
113 gcc_unreachable ();
114 case BOUNDS_NONE:
115 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
116 "use of tainted value %qE in array lookup"
117 " without bounds checking",
118 m_arg);
119 break;
120 case BOUNDS_UPPER:
121 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
122 "use of tainted value %qE in array lookup"
123 " without lower-bounds checking",
124 m_arg);
125 break;
126 case BOUNDS_LOWER:
127 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
128 "use of tainted value %qE in array lookup"
129 " without upper-bounds checking",
130 m_arg);
131 break;
132 }
133 }
134
135 label_text describe_state_change (const evdesc::state_change &change)
136 FINAL OVERRIDE
137 {
138 if (change.m_new_state == m_sm.m_tainted)
139 {
140 if (change.m_origin)
141 return change.formatted_print ("%qE has an unchecked value here"
142 " (from %qE)",
143 change.m_expr, change.m_origin);
144 else
145 return change.formatted_print ("%qE gets an unchecked value here",
146 change.m_expr);
147 }
148 else if (change.m_new_state == m_sm.m_has_lb)
149 return change.formatted_print ("%qE has its lower bound checked here",
150 change.m_expr);
151 else if (change.m_new_state == m_sm.m_has_ub)
152 return change.formatted_print ("%qE has its upper bound checked here",
153 change.m_expr);
154 return label_text ();
155 }
156
157 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
158 {
159 switch (m_has_bounds)
160 {
161 default:
162 gcc_unreachable ();
163 case BOUNDS_NONE:
164 return ev.formatted_print ("use of tainted value %qE in array lookup"
165 " without bounds checking",
166 m_arg);
167 case BOUNDS_UPPER:
168 return ev.formatted_print ("use of tainted value %qE in array lookup"
169 " without lower-bounds checking",
170 m_arg);
171 case BOUNDS_LOWER:
172 return ev.formatted_print ("use of tainted value %qE in array lookup"
173 " without upper-bounds checking",
174 m_arg);
175 }
176 }
177
178 private:
179 const taint_state_machine &m_sm;
180 tree m_arg;
181 enum bounds m_has_bounds;
182 };
183
184 /* taint_state_machine's ctor. */
185
186 taint_state_machine::taint_state_machine (logger *logger)
187 : state_machine ("taint", logger)
188 {
189 m_tainted = add_state ("tainted");
190 m_has_lb = add_state ("has_lb");
191 m_has_ub = add_state ("has_ub");
192 m_stop = add_state ("stop");
193 }
194
195 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
196
197 bool
198 taint_state_machine::on_stmt (sm_context *sm_ctxt,
199 const supernode *node,
200 const gimple *stmt) const
201 {
202 if (const gcall *call = dyn_cast <const gcall *> (stmt))
203 if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
204 {
205 if (is_named_call_p (callee_fndecl, "fread", call, 4))
206 {
207 tree arg = gimple_call_arg (call, 0);
208
209 sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
210
211 /* Dereference an ADDR_EXPR. */
212 // TODO: should the engine do this?
213 if (TREE_CODE (arg) == ADDR_EXPR)
214 sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
215 m_start, m_tainted);
216 return true;
217 }
218 }
219 // TODO: ...etc; many other sources of untrusted data
220
221 if (const gassign *assign = dyn_cast <const gassign *> (stmt))
222 {
223 tree rhs1 = gimple_assign_rhs1 (assign);
224 enum tree_code op = gimple_assign_rhs_code (assign);
225
226 /* Check array accesses. */
227 if (op == ARRAY_REF)
228 {
229 tree arg = TREE_OPERAND (rhs1, 1);
230 tree diag_arg = sm_ctxt->get_diagnostic_tree (arg);
231
232 /* Unsigned types have an implicit lower bound. */
233 bool is_unsigned = false;
234 if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
235 is_unsigned = TYPE_UNSIGNED (TREE_TYPE (arg));
236
237 state_t state = sm_ctxt->get_state (stmt, arg);
238 /* Can't use a switch as the states are non-const. */
239 if (state == m_tainted)
240 {
241 /* Complain about missing bounds. */
242 pending_diagnostic *d
243 = new tainted_array_index (*this, diag_arg,
244 is_unsigned
245 ? BOUNDS_LOWER : BOUNDS_NONE);
246 sm_ctxt->warn (node, stmt, arg, d);
247 sm_ctxt->set_next_state (stmt, arg, m_stop);
248 }
249 else if (state == m_has_lb)
250 {
251 /* Complain about missing upper bound. */
252 sm_ctxt->warn (node, stmt, arg,
253 new tainted_array_index (*this, diag_arg,
254 BOUNDS_LOWER));
255 sm_ctxt->set_next_state (stmt, arg, m_stop);
256 }
257 else if (state == m_has_ub)
258 {
259 /* Complain about missing lower bound. */
260 if (!is_unsigned)
261 {
262 sm_ctxt->warn (node, stmt, arg,
263 new tainted_array_index (*this, diag_arg,
264 BOUNDS_UPPER));
265 sm_ctxt->set_next_state (stmt, arg, m_stop);
266 }
267 }
268 }
269 }
270
271 return false;
272 }
273
274 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
275 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
276 and states 'has_ub' and 'has_lb' to 'stop'. */
277
278 void
279 taint_state_machine::on_condition (sm_context *sm_ctxt,
280 const supernode *node,
281 const gimple *stmt,
282 tree lhs,
283 enum tree_code op,
284 tree rhs ATTRIBUTE_UNUSED) const
285 {
286 if (stmt == NULL)
287 return;
288
289 // TODO: this doesn't use the RHS; should we make it symmetric?
290
291 // TODO
292 switch (op)
293 {
294 //case NE_EXPR:
295 //case EQ_EXPR:
296 case GE_EXPR:
297 case GT_EXPR:
298 {
299 sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
300 m_has_lb);
301 sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
302 m_stop);
303 }
304 break;
305 case LE_EXPR:
306 case LT_EXPR:
307 {
308 sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
309 m_has_ub);
310 sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
311 m_stop);
312 }
313 break;
314 default:
315 break;
316 }
317 }
318
319 bool
320 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
321 {
322 return true;
323 }
324
325 } // anonymous namespace
326
327 /* Internal interface to this file. */
328
329 state_machine *
330 make_taint_state_machine (logger *logger)
331 {
332 return new taint_state_machine (logger);
333 }
334
335 } // namespace ana
336
337 #endif /* #if ENABLE_ANALYZER */