PR middle-end/78703 - -fprintf-return-value floating point handling incorrect...
[gcc.git] / gcc / gimple-ssa-sprintf.c
1 /* Copyright (C) 2016-2017 Free Software Foundation, Inc.
2 Contributed by Martin Sebor <msebor@redhat.com>.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file implements the printf-return-value pass. The pass does
21 two things: 1) it analyzes calls to formatted output functions like
22 sprintf looking for possible buffer overflows and calls to bounded
23 functions like snprintf for early truncation (and under the control
24 of the -Wformat-length option issues warnings), and 2) under the
25 control of the -fprintf-return-value option it folds the return
26 value of safe calls into constants, making it possible to eliminate
27 code that depends on the value of those constants.
28
29 For all functions (bounded or not) the pass uses the size of the
30 destination object. That means that it will diagnose calls to
31 snprintf not on the basis of the size specified by the function's
32 second argument but rathger on the basis of the size the first
33 argument points to (if possible). For bound-checking built-ins
34 like __builtin___snprintf_chk the pass uses the size typically
35 determined by __builtin_object_size and passed to the built-in
36 by the Glibc inline wrapper.
37
38 The pass handles all forms standard sprintf format directives,
39 including character, integer, floating point, pointer, and strings,
40 with the standard C flags, widths, and precisions. For integers
41 and strings it computes the length of output itself. For floating
42 point it uses MPFR to fornmat known constants with up and down
43 rounding and uses the resulting range of output lengths. For
44 strings it uses the length of string literals and the sizes of
45 character arrays that a character pointer may point to as a bound
46 on the longest string. */
47
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "params.h"
64 #include "tree-cfg.h"
65 #include "tree-ssa-propagate.h"
66 #include "calls.h"
67 #include "cfgloop.h"
68 #include "intl.h"
69
70 #include "builtins.h"
71 #include "stor-layout.h"
72
73 #include "realmpfr.h"
74 #include "target.h"
75
76 #include "cpplib.h"
77 #include "input.h"
78 #include "toplev.h"
79 #include "substring-locations.h"
80 #include "diagnostic.h"
81
82 /* The likely worst case value of MB_LEN_MAX for the target, large enough
83 for UTF-8. Ideally, this would be obtained by a target hook if it were
84 to be used for optimization but it's good enough as is for warnings. */
85 #define target_mb_len_max() 6
86
87 /* The maximum number of bytes a single non-string directive can result
88 in. This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
89 LDBL_MAX_10_EXP of 4932. */
90 #define IEEE_MAX_10_EXP 4932
91 #define target_dir_max() (target_int_max () + IEEE_MAX_10_EXP + 2)
92
93 namespace {
94
95 const pass_data pass_data_sprintf_length = {
96 GIMPLE_PASS, // pass type
97 "printf-return-value", // pass name
98 OPTGROUP_NONE, // optinfo_flags
99 TV_NONE, // tv_id
100 PROP_cfg, // properties_required
101 0, // properties_provided
102 0, // properties_destroyed
103 0, // properties_start
104 0, // properties_finish
105 };
106
107 /* Set to the warning level for the current function which is equal
108 either to warn_format_trunc for bounded functions or to
109 warn_format_overflow otherwise. */
110
111 static int warn_level;
112
113 struct format_result;
114
115 class pass_sprintf_length : public gimple_opt_pass
116 {
117 bool fold_return_value;
118
119 public:
120 pass_sprintf_length (gcc::context *ctxt)
121 : gimple_opt_pass (pass_data_sprintf_length, ctxt),
122 fold_return_value (false)
123 { }
124
125 opt_pass * clone () { return new pass_sprintf_length (m_ctxt); }
126
127 virtual bool gate (function *);
128
129 virtual unsigned int execute (function *);
130
131 void set_pass_param (unsigned int n, bool param)
132 {
133 gcc_assert (n == 0);
134 fold_return_value = param;
135 }
136
137 bool handle_gimple_call (gimple_stmt_iterator *);
138
139 struct call_info;
140 bool compute_format_length (call_info &, format_result *);
141 };
142
143 bool
144 pass_sprintf_length::gate (function *)
145 {
146 /* Run the pass iff -Warn-format-overflow or -Warn-format-truncation
147 is specified and either not optimizing and the pass is being invoked
148 early, or when optimizing and the pass is being invoked during
149 optimization (i.e., "late"). */
150 return ((warn_format_overflow > 0
151 || warn_format_trunc > 0
152 || flag_printf_return_value)
153 && (optimize > 0) == fold_return_value);
154 }
155
156 /* The result of a call to a formatted function. */
157
158 struct format_result
159 {
160 /* Number of characters written by the formatted function, exact,
161 minimum and maximum when an exact number cannot be determined.
162 Setting the minimum to HOST_WIDE_INT_MAX disables all length
163 tracking for the remainder of the format string.
164 Setting either of the other two members to HOST_WIDE_INT_MAX
165 disables the exact or maximum length tracking, respectively,
166 but continues to track the maximum. */
167 unsigned HOST_WIDE_INT number_chars;
168 unsigned HOST_WIDE_INT number_chars_min;
169 unsigned HOST_WIDE_INT number_chars_max;
170
171 /* True when the range given by NUMBER_CHARS_MIN and NUMBER_CHARS_MAX
172 can be relied on for value range propagation, false otherwise.
173 This means that BOUNDED must not be set if the number of bytes
174 produced by any directive is unspecified or implementation-
175 defined (unless the implementation's behavior is known and
176 determined via a target hook).
177 Note that BOUNDED only implies that the length of a function's
178 output is known to be within some range, not that it's constant
179 and a candidate for string folding. BOUNDED is a stronger
180 guarantee than KNOWNRANGE. */
181 bool bounded;
182
183 /* True when the range above is obtained from known values of
184 directive arguments or their bounds and not the result of
185 heuristics that depend on warning levels. It is used to
186 issue stricter diagnostics in cases where strings of unknown
187 lengths are bounded by the arrays they are determined to
188 refer to. KNOWNRANGE must not be used to set the range of
189 the return value of a call. */
190 bool knownrange;
191
192 /* True if no individual directive resulted in more than 4095 bytes
193 of output (the total NUMBER_CHARS might be greater). */
194 bool under4k;
195
196 /* True when a floating point directive has been seen in the format
197 string. */
198 bool floating;
199
200 /* True when an intermediate result has caused a warning. Used to
201 avoid issuing duplicate warnings while finishing the processing
202 of a call. */
203 bool warned;
204
205 /* Preincrement the number of output characters by 1. */
206 format_result& operator++ ()
207 {
208 return *this += 1;
209 }
210
211 /* Postincrement the number of output characters by 1. */
212 format_result operator++ (int)
213 {
214 format_result prev (*this);
215 *this += 1;
216 return prev;
217 }
218
219 /* Increment the number of output characters by N. */
220 format_result& operator+= (unsigned HOST_WIDE_INT);
221 };
222
223 format_result&
224 format_result::operator+= (unsigned HOST_WIDE_INT n)
225 {
226 gcc_assert (n < HOST_WIDE_INT_MAX);
227
228 if (number_chars < HOST_WIDE_INT_MAX)
229 number_chars += n;
230
231 if (number_chars_min < HOST_WIDE_INT_MAX)
232 number_chars_min += n;
233
234 if (number_chars_max < HOST_WIDE_INT_MAX)
235 number_chars_max += n;
236
237 return *this;
238 }
239
240 /* Return the value of INT_MIN for the target. */
241
242 static inline HOST_WIDE_INT
243 target_int_min ()
244 {
245 return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
246 }
247
248 /* Return the value of INT_MAX for the target. */
249
250 static inline unsigned HOST_WIDE_INT
251 target_int_max ()
252 {
253 return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
254 }
255
256 /* Return the value of SIZE_MAX for the target. */
257
258 static inline unsigned HOST_WIDE_INT
259 target_size_max ()
260 {
261 return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
262 }
263
264 /* Return the constant initial value of DECL if available or DECL
265 otherwise. Same as the synonymous function in c/c-typeck.c. */
266
267 static tree
268 decl_constant_value (tree decl)
269 {
270 if (/* Don't change a variable array bound or initial value to a constant
271 in a place where a variable is invalid. Note that DECL_INITIAL
272 isn't valid for a PARM_DECL. */
273 current_function_decl != 0
274 && TREE_CODE (decl) != PARM_DECL
275 && !TREE_THIS_VOLATILE (decl)
276 && TREE_READONLY (decl)
277 && DECL_INITIAL (decl) != 0
278 && TREE_CODE (DECL_INITIAL (decl)) != ERROR_MARK
279 /* This is invalid if initial value is not constant.
280 If it has either a function call, a memory reference,
281 or a variable, then re-evaluating it could give different results. */
282 && TREE_CONSTANT (DECL_INITIAL (decl))
283 /* Check for cases where this is sub-optimal, even though valid. */
284 && TREE_CODE (DECL_INITIAL (decl)) != CONSTRUCTOR)
285 return DECL_INITIAL (decl);
286 return decl;
287 }
288
289 /* Given FORMAT, set *PLOC to the source location of the format string
290 and return the format string if it is known or null otherwise. */
291
292 static const char*
293 get_format_string (tree format, location_t *ploc)
294 {
295 if (VAR_P (format))
296 {
297 /* Pull out a constant value if the front end didn't. */
298 format = decl_constant_value (format);
299 STRIP_NOPS (format);
300 }
301
302 if (integer_zerop (format))
303 {
304 /* FIXME: Diagnose null format string if it hasn't been diagnosed
305 by -Wformat (the latter diagnoses only nul pointer constants,
306 this pass can do better). */
307 return NULL;
308 }
309
310 HOST_WIDE_INT offset = 0;
311
312 if (TREE_CODE (format) == POINTER_PLUS_EXPR)
313 {
314 tree arg0 = TREE_OPERAND (format, 0);
315 tree arg1 = TREE_OPERAND (format, 1);
316 STRIP_NOPS (arg0);
317 STRIP_NOPS (arg1);
318
319 if (TREE_CODE (arg1) != INTEGER_CST)
320 return NULL;
321
322 format = arg0;
323
324 /* POINTER_PLUS_EXPR offsets are to be interpreted signed. */
325 if (!cst_and_fits_in_hwi (arg1))
326 return NULL;
327
328 offset = int_cst_value (arg1);
329 }
330
331 if (TREE_CODE (format) != ADDR_EXPR)
332 return NULL;
333
334 *ploc = EXPR_LOC_OR_LOC (format, input_location);
335
336 format = TREE_OPERAND (format, 0);
337
338 if (TREE_CODE (format) == ARRAY_REF
339 && tree_fits_shwi_p (TREE_OPERAND (format, 1))
340 && (offset += tree_to_shwi (TREE_OPERAND (format, 1))) >= 0)
341 format = TREE_OPERAND (format, 0);
342
343 if (offset < 0)
344 return NULL;
345
346 tree array_init;
347 tree array_size = NULL_TREE;
348
349 if (VAR_P (format)
350 && TREE_CODE (TREE_TYPE (format)) == ARRAY_TYPE
351 && (array_init = decl_constant_value (format)) != format
352 && TREE_CODE (array_init) == STRING_CST)
353 {
354 /* Extract the string constant initializer. Note that this may
355 include a trailing NUL character that is not in the array (e.g.
356 const char a[3] = "foo";). */
357 array_size = DECL_SIZE_UNIT (format);
358 format = array_init;
359 }
360
361 if (TREE_CODE (format) != STRING_CST)
362 return NULL;
363
364 if (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (format))) != char_type_node)
365 {
366 /* Wide format string. */
367 return NULL;
368 }
369
370 const char *fmtstr = TREE_STRING_POINTER (format);
371 unsigned fmtlen = TREE_STRING_LENGTH (format);
372
373 if (array_size)
374 {
375 /* Variable length arrays can't be initialized. */
376 gcc_assert (TREE_CODE (array_size) == INTEGER_CST);
377
378 if (tree_fits_shwi_p (array_size))
379 {
380 HOST_WIDE_INT array_size_value = tree_to_shwi (array_size);
381 if (array_size_value > 0
382 && array_size_value == (int) array_size_value
383 && fmtlen > array_size_value)
384 fmtlen = array_size_value;
385 }
386 }
387 if (offset)
388 {
389 if (offset >= fmtlen)
390 return NULL;
391
392 fmtstr += offset;
393 fmtlen -= offset;
394 }
395
396 if (fmtlen < 1 || fmtstr[--fmtlen] != 0)
397 {
398 /* FIXME: Diagnose an unterminated format string if it hasn't been
399 diagnosed by -Wformat. Similarly to a null format pointer,
400 -Wformay diagnoses only nul pointer constants, this pass can
401 do better). */
402 return NULL;
403 }
404
405 return fmtstr;
406 }
407
408 /* The format_warning_at_substring function is not used here in a way
409 that makes using attribute format viable. Suppress the warning. */
410
411 #pragma GCC diagnostic push
412 #pragma GCC diagnostic ignored "-Wsuggest-attribute=format"
413
414 /* For convenience and brevity. */
415
416 static bool
417 (* const fmtwarn) (const substring_loc &, const source_range *,
418 const char *, int, const char *, ...)
419 = format_warning_at_substring;
420
421 /* Format length modifiers. */
422
423 enum format_lengths
424 {
425 FMT_LEN_none,
426 FMT_LEN_hh, // char argument
427 FMT_LEN_h, // short
428 FMT_LEN_l, // long
429 FMT_LEN_ll, // long long
430 FMT_LEN_L, // long double (and GNU long long)
431 FMT_LEN_z, // size_t
432 FMT_LEN_t, // ptrdiff_t
433 FMT_LEN_j // intmax_t
434 };
435
436
437 /* A minimum and maximum number of bytes. */
438
439 struct result_range
440 {
441 unsigned HOST_WIDE_INT min, max;
442 };
443
444 /* Description of the result of conversion either of a single directive
445 or the whole format string. */
446
447 struct fmtresult
448 {
449 /* Construct a FMTRESULT object with all counters initialized
450 to MIN. KNOWNRANGE is set when MIN is valid. */
451 fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
452 : argmin (), argmax (),
453 knownrange (min < HOST_WIDE_INT_MAX),
454 bounded (),
455 nullp ()
456 {
457 range.min = min;
458 range.max = min;
459 }
460
461 /* Construct a FMTRESULT object with all counters initialized
462 to MIN. KNOWNRANGE is set when MIN is valid. */
463 fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max)
464 : argmin (), argmax (),
465 knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
466 bounded (),
467 nullp ()
468 {
469 range.min = min;
470 range.max = max;
471 }
472
473 /* The range a directive's argument is in. */
474 tree argmin, argmax;
475
476 /* The minimum and maximum number of bytes that a directive
477 results in on output for an argument in the range above. */
478 result_range range;
479
480 /* True when the range above is obtained from a known value of
481 a directive's argument or its bounds and not the result of
482 heuristics that depend on warning levels. */
483 bool knownrange;
484
485 /* True when the range is the result of an argument determined
486 to be bounded to a subrange of its type or value (such as by
487 value range propagation or the width of the formt directive),
488 false otherwise. */
489 bool bounded;
490
491 /* True when the argument is a null pointer. */
492 bool nullp;
493 };
494
495 /* Description of a conversion specification. */
496
497 struct directive
498 {
499 /* The 1-based directive number (for debugging). */
500 unsigned dirno;
501
502 /* The first character of the directive and its length. */
503 const char *beg;
504 size_t len;
505
506 /* A bitmap of flags, one for each character. */
507 unsigned flags[256 / sizeof (int)];
508
509 /* The specified width, or -1 if not specified. */
510 HOST_WIDE_INT width;
511 /* The specified precision, or -1 if not specified. */
512 HOST_WIDE_INT prec;
513
514 /* Length modifier. */
515 format_lengths modifier;
516
517 /* Format specifier character. */
518 char specifier;
519
520 /* The argument of the directive or null when the directive doesn't
521 take one or when none is available (such as for vararg functions). */
522 tree arg;
523
524 /* Format conversion function that given a conversion specification
525 and an argument returns the formatting result. */
526 fmtresult (*fmtfunc) (const directive &, tree);
527
528 /* Return True when a the format flag CHR has been used. */
529 bool get_flag (char chr) const
530 {
531 unsigned char c = chr & 0xff;
532 return (flags[c / (CHAR_BIT * sizeof *flags)]
533 & (1U << (c % (CHAR_BIT * sizeof *flags))));
534 }
535
536 /* Make a record of the format flag CHR having been used. */
537 void set_flag (char chr)
538 {
539 unsigned char c = chr & 0xff;
540 flags[c / (CHAR_BIT * sizeof *flags)]
541 |= (1U << (c % (CHAR_BIT * sizeof *flags)));
542 }
543
544 /* Reset the format flag CHR. */
545 void clear_flag (char chr)
546 {
547 unsigned char c = chr & 0xff;
548 flags[c / (CHAR_BIT * sizeof *flags)]
549 &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
550 }
551
552 /* Set the width to VAL. */
553 void set_width (HOST_WIDE_INT val)
554 {
555 width = val;
556 }
557
558 /* Set the width to ARG. */
559 void set_width (tree arg)
560 {
561 if (tree_fits_shwi_p (arg))
562 {
563 width = tree_to_shwi (arg);
564 if (width < 0)
565 {
566 if (width == HOST_WIDE_INT_MIN)
567 {
568 /* Avoid undefined behavior due to negating a minimum.
569 This case will be diagnosed since it will result in
570 more than INT_MAX bytes on output, either by the
571 directive itself (when INT_MAX < HOST_WIDE_INT_MAX)
572 or by the format function itself. */
573 width = HOST_WIDE_INT_MAX;
574 }
575 else
576 width = -width;
577 }
578 }
579 else
580 width = HOST_WIDE_INT_MIN;
581 }
582
583 /* Set the precision to val. */
584 void set_precision (HOST_WIDE_INT val)
585 {
586 prec = val;
587 }
588
589 /* Set the precision to ARG. */
590 void set_precision (tree arg)
591 {
592 if (tree_fits_shwi_p (arg))
593 {
594 prec = tree_to_shwi (arg);
595 if (prec < 0)
596 prec = -1;
597 }
598 else
599 prec = HOST_WIDE_INT_MIN;
600 }
601 };
602
603 /* Return the logarithm of X in BASE. */
604
605 static int
606 ilog (unsigned HOST_WIDE_INT x, int base)
607 {
608 int res = 0;
609 do
610 {
611 ++res;
612 x /= base;
613 } while (x);
614 return res;
615 }
616
617 /* Return the number of bytes resulting from converting into a string
618 the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
619 PLUS indicates whether 1 for a plus sign should be added for positive
620 numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
621 ('0x') prefix should be added for nonzero numbers. Return -1 if X cannot
622 be represented. */
623
624 static HOST_WIDE_INT
625 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
626 {
627 unsigned HOST_WIDE_INT absval;
628
629 HOST_WIDE_INT res;
630
631 if (TYPE_UNSIGNED (TREE_TYPE (x)))
632 {
633 if (tree_fits_uhwi_p (x))
634 {
635 absval = tree_to_uhwi (x);
636 res = plus;
637 }
638 else
639 return -1;
640 }
641 else
642 {
643 if (tree_fits_shwi_p (x))
644 {
645 HOST_WIDE_INT i = tree_to_shwi (x);
646 if (HOST_WIDE_INT_MIN == i)
647 {
648 /* Avoid undefined behavior due to negating a minimum. */
649 absval = HOST_WIDE_INT_MAX;
650 res = 1;
651 }
652 else if (i < 0)
653 {
654 absval = -i;
655 res = 1;
656 }
657 else
658 {
659 absval = i;
660 res = plus;
661 }
662 }
663 else
664 return -1;
665 }
666
667 int ndigs = ilog (absval, base);
668
669 res += prec < ndigs ? ndigs : prec;
670
671 if (prefix && absval)
672 {
673 if (base == 8)
674 res += 1;
675 else if (base == 16)
676 res += 2;
677 }
678
679 return res;
680 }
681
682 /* Given the formatting result described by RES and NAVAIL, the number
683 of available in the destination, return the number of bytes remaining
684 in the destination. */
685
686 static inline result_range
687 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
688 {
689 result_range range;
690
691 if (HOST_WIDE_INT_MAX <= navail)
692 {
693 range.min = range.max = navail;
694 return range;
695 }
696
697 if (res.number_chars < navail)
698 {
699 range.min = range.max = navail - res.number_chars;
700 }
701 else if (res.number_chars_min < navail)
702 {
703 range.max = navail - res.number_chars_min;
704 }
705 else
706 range.max = 0;
707
708 if (res.number_chars_max < navail)
709 range.min = navail - res.number_chars_max;
710 else
711 range.min = 0;
712
713 return range;
714 }
715
716 /* Given the formatting result described by RES and NAVAIL, the number
717 of available in the destination, return the minimum number of bytes
718 remaining in the destination. */
719
720 static inline unsigned HOST_WIDE_INT
721 min_bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
722 {
723 if (HOST_WIDE_INT_MAX <= navail)
724 return navail;
725
726 if (warn_format_overflow > 1 || res.knownrange)
727 {
728 /* At level 2, or when all directives output an exact number
729 of bytes or when their arguments were bounded by known
730 ranges, use the greater of the two byte counters if it's
731 valid to compute the result. */
732 if (res.number_chars_max < HOST_WIDE_INT_MAX)
733 navail -= res.number_chars_max;
734 else if (res.number_chars < HOST_WIDE_INT_MAX)
735 navail -= res.number_chars;
736 else if (res.number_chars_min < HOST_WIDE_INT_MAX)
737 navail -= res.number_chars_min;
738 }
739 else
740 {
741 /* At level 1 use the smaller of the byte counters to compute
742 the result. */
743 if (res.number_chars < HOST_WIDE_INT_MAX)
744 navail -= res.number_chars;
745 else if (res.number_chars_min < HOST_WIDE_INT_MAX)
746 navail -= res.number_chars_min;
747 else if (res.number_chars_max < HOST_WIDE_INT_MAX)
748 navail -= res.number_chars_max;
749 }
750
751 if (navail > HOST_WIDE_INT_MAX)
752 navail = 0;
753
754 return navail;
755 }
756
757 /* Description of a call to a formatted function. */
758
759 struct pass_sprintf_length::call_info
760 {
761 /* Function call statement. */
762 gimple *callstmt;
763
764 /* Function called. */
765 tree func;
766
767 /* Called built-in function code. */
768 built_in_function fncode;
769
770 /* Format argument and format string extracted from it. */
771 tree format;
772 const char *fmtstr;
773
774 /* The location of the format argument. */
775 location_t fmtloc;
776
777 /* The destination object size for __builtin___xxx_chk functions
778 typically determined by __builtin_object_size, or -1 if unknown. */
779 unsigned HOST_WIDE_INT objsize;
780
781 /* Number of the first variable argument. */
782 unsigned HOST_WIDE_INT argidx;
783
784 /* True for functions like snprintf that specify the size of
785 the destination, false for others like sprintf that don't. */
786 bool bounded;
787
788 /* True for bounded functions like snprintf that specify a zero-size
789 buffer as a request to compute the size of output without actually
790 writing any. NOWRITE is cleared in response to the %n directive
791 which has side-effects similar to writing output. */
792 bool nowrite;
793
794 /* Return true if the called function's return value is used. */
795 bool retval_used () const
796 {
797 return gimple_get_lhs (callstmt);
798 }
799
800 /* Return the warning option corresponding to the called function. */
801 int warnopt () const
802 {
803 return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
804 }
805 };
806
807 /* Return the result of formatting a no-op directive (such as '%n'). */
808
809 static fmtresult
810 format_none (const directive &, tree)
811 {
812 fmtresult res (0);
813 res.bounded = true;
814 return res;
815 }
816
817 /* Return the result of formatting the '%%' directive. */
818
819 static fmtresult
820 format_percent (const directive &, tree)
821 {
822 fmtresult res (1);
823 res.bounded = true;
824 return res;
825 }
826
827
828 /* Compute intmax_type_node and uintmax_type_node similarly to how
829 tree.c builds size_type_node. */
830
831 static void
832 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
833 {
834 if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
835 {
836 *pintmax = integer_type_node;
837 *puintmax = unsigned_type_node;
838 }
839 else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
840 {
841 *pintmax = long_integer_type_node;
842 *puintmax = long_unsigned_type_node;
843 }
844 else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
845 {
846 *pintmax = long_long_integer_type_node;
847 *puintmax = long_long_unsigned_type_node;
848 }
849 else
850 {
851 for (int i = 0; i < NUM_INT_N_ENTS; i++)
852 if (int_n_enabled_p[i])
853 {
854 char name[50];
855 sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
856
857 if (strcmp (name, UINTMAX_TYPE) == 0)
858 {
859 *pintmax = int_n_trees[i].signed_type;
860 *puintmax = int_n_trees[i].unsigned_type;
861 return;
862 }
863 }
864 gcc_unreachable ();
865 }
866 }
867
868 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
869 argument, due to the conversion from either *ARGMIN or *ARGMAX to
870 the type of the directive's formal argument it's possible for both
871 to result in the same number of bytes or a range of bytes that's
872 less than the number of bytes that would result from formatting
873 some other value in the range [*ARGMIN, *ARGMAX]. This can be
874 determined by checking for the actual argument being in the range
875 of the type of the directive. If it isn't it must be assumed to
876 take on the full range of the directive's type.
877 Return true when the range has been adjusted to the full unsigned
878 range of DIRTYPE, or [0, DIRTYPE_MAX], and false otherwise. */
879
880 static bool
881 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
882 {
883 tree argtype = TREE_TYPE (*argmin);
884 unsigned argprec = TYPE_PRECISION (argtype);
885 unsigned dirprec = TYPE_PRECISION (dirtype);
886
887 /* If the actual argument and the directive's argument have the same
888 precision and sign there can be no overflow and so there is nothing
889 to adjust. */
890 if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
891 return false;
892
893 /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
894 branch in the extract_range_from_unary_expr function in tree-vrp.c. */
895
896 if (TREE_CODE (*argmin) == INTEGER_CST
897 && TREE_CODE (*argmax) == INTEGER_CST
898 && (dirprec >= argprec
899 || integer_zerop (int_const_binop (RSHIFT_EXPR,
900 int_const_binop (MINUS_EXPR,
901 *argmax,
902 *argmin),
903 size_int (dirprec)))))
904 {
905 *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
906 *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
907
908 /* If *ARGMIN is still less than *ARGMAX the conversion above
909 is safe. Otherwise, it has overflowed and would be unsafe. */
910 if (tree_int_cst_le (*argmin, *argmax))
911 return false;
912 }
913
914 tree dirmin = TYPE_MIN_VALUE (dirtype);
915 tree dirmax = TYPE_MAX_VALUE (dirtype);
916
917 if (TYPE_UNSIGNED (dirtype))
918 {
919 *argmin = dirmin;
920 *argmax = dirmax;
921 }
922 else
923 {
924 *argmin = integer_zero_node;
925 *argmax = dirmin;
926 }
927
928 return true;
929 }
930
931 /* Return a range representing the minimum and maximum number of bytes
932 that the conversion specification DIR will write on output for the
933 integer argument ARG when non-null. ARG may be null (for vararg
934 functions). */
935
936 static fmtresult
937 format_integer (const directive &dir, tree arg)
938 {
939 tree intmax_type_node;
940 tree uintmax_type_node;
941
942 /* Set WIDTH and PRECISION based on the specification. */
943 HOST_WIDE_INT width = dir.width;
944 HOST_WIDE_INT prec = dir.prec;
945
946 /* Base to format the number in. */
947 int base;
948
949 /* True when a signed conversion is preceded by a sign or space. */
950 bool maybesign = false;
951
952 /* True for signed conversions (i.e., 'd' and 'i'). */
953 bool sign = false;
954
955 switch (dir.specifier)
956 {
957 case 'd':
958 case 'i':
959 /* Space and '+' are only meaningful for signed conversions. */
960 maybesign = dir.get_flag (' ') | dir.get_flag ('+');
961 sign = true;
962 base = 10;
963 break;
964 case 'u':
965 base = 10;
966 break;
967 case 'o':
968 base = 8;
969 break;
970 case 'X':
971 case 'x':
972 base = 16;
973 break;
974 default:
975 gcc_unreachable ();
976 }
977
978 /* The type of the "formal" argument expected by the directive. */
979 tree dirtype = NULL_TREE;
980
981 /* Determine the expected type of the argument from the length
982 modifier. */
983 switch (dir.modifier)
984 {
985 case FMT_LEN_none:
986 if (dir.specifier == 'p')
987 dirtype = ptr_type_node;
988 else
989 dirtype = sign ? integer_type_node : unsigned_type_node;
990 break;
991
992 case FMT_LEN_h:
993 dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
994 break;
995
996 case FMT_LEN_hh:
997 dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
998 break;
999
1000 case FMT_LEN_l:
1001 dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1002 break;
1003
1004 case FMT_LEN_L:
1005 case FMT_LEN_ll:
1006 dirtype = (sign
1007 ? long_long_integer_type_node
1008 : long_long_unsigned_type_node);
1009 break;
1010
1011 case FMT_LEN_z:
1012 dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1013 break;
1014
1015 case FMT_LEN_t:
1016 dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1017 break;
1018
1019 case FMT_LEN_j:
1020 build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1021 dirtype = sign ? intmax_type_node : uintmax_type_node;
1022 break;
1023
1024 default:
1025 return fmtresult ();
1026 }
1027
1028 /* The type of the argument to the directive, either deduced from
1029 the actual non-constant argument if one is known, or from
1030 the directive itself when none has been provided because it's
1031 a va_list. */
1032 tree argtype = NULL_TREE;
1033
1034 if (!arg)
1035 {
1036 /* When the argument has not been provided, use the type of
1037 the directive's argument as an approximation. This will
1038 result in false positives for directives like %i with
1039 arguments with smaller precision (such as short or char). */
1040 argtype = dirtype;
1041 }
1042 else if (TREE_CODE (arg) == INTEGER_CST)
1043 {
1044 /* When a constant argument has been provided use its value
1045 rather than type to determine the length of the output. */
1046
1047 HOST_WIDE_INT len;
1048
1049 if ((prec == HOST_WIDE_INT_MIN || prec == 0) && integer_zerop (arg))
1050 {
1051 /* As a special case, a precision of zero with a zero argument
1052 results in zero bytes except in base 8 when the '#' flag is
1053 specified, and for signed conversions in base 8 and 10 when
1054 flags when either the space or '+' flag has been specified
1055 when it results in just one byte (with width having the normal
1056 effect). This must extend to the case of a specified precision
1057 with an unknown value because it can be zero. */
1058 len = ((base == 8 && dir.get_flag ('#')) || maybesign);
1059 }
1060 else
1061 {
1062 /* Convert the argument to the type of the directive. */
1063 arg = fold_convert (dirtype, arg);
1064
1065 /* True when a conversion is preceded by a prefix indicating the base
1066 of the argument (octal or hexadecimal). */
1067 bool maybebase = dir.get_flag ('#');
1068 len = tree_digits (arg, base, prec, maybesign, maybebase);
1069 if (len < 1)
1070 len = HOST_WIDE_INT_MAX;
1071 }
1072
1073 if (len < width)
1074 len = width;
1075
1076 /* The minimum and maximum number of bytes produced by the directive. */
1077 fmtresult res;
1078
1079 res.range.min = len;
1080
1081 /* The upper bound of the number of bytes is unlimited when either
1082 width or precision is specified but its value is unknown, and
1083 the same as the lower bound otherwise. */
1084 if (width == HOST_WIDE_INT_MIN || prec == HOST_WIDE_INT_MIN)
1085 {
1086 res.range.max = HOST_WIDE_INT_MAX;
1087 }
1088 else
1089 {
1090 res.range.max = len;
1091 res.bounded = true;
1092 res.knownrange = true;
1093 res.bounded = true;
1094 }
1095
1096 return res;
1097 }
1098 else if (TREE_CODE (TREE_TYPE (arg)) == INTEGER_TYPE
1099 || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1100 /* Determine the type of the provided non-constant argument. */
1101 argtype = TREE_TYPE (arg);
1102 else
1103 /* Don't bother with invalid arguments since they likely would
1104 have already been diagnosed, and disable any further checking
1105 of the format string by returning [-1, -1]. */
1106 return fmtresult ();
1107
1108 fmtresult res;
1109
1110 /* The result is bounded unless width or precision has been specified
1111 whose value is unknown. */
1112 res.bounded = width != HOST_WIDE_INT_MIN && prec != HOST_WIDE_INT_MIN;
1113
1114 /* Using either the range the non-constant argument is in, or its
1115 type (either "formal" or actual), create a range of values that
1116 constrain the length of output given the warning level. */
1117 tree argmin = NULL_TREE;
1118 tree argmax = NULL_TREE;
1119
1120 if (arg
1121 && TREE_CODE (arg) == SSA_NAME
1122 && TREE_CODE (argtype) == INTEGER_TYPE)
1123 {
1124 /* Try to determine the range of values of the integer argument
1125 (range information is not available for pointers). */
1126 wide_int min, max;
1127 enum value_range_type range_type = get_range_info (arg, &min, &max);
1128 if (range_type == VR_RANGE)
1129 {
1130 argmin = build_int_cst (argtype, wi::fits_uhwi_p (min)
1131 ? min.to_uhwi () : min.to_shwi ());
1132 argmax = build_int_cst (argtype, wi::fits_uhwi_p (max)
1133 ? max.to_uhwi () : max.to_shwi ());
1134
1135 /* Set KNOWNRANGE if the argument is in a known subrange
1136 of the directive's type (KNOWNRANGE may be reset below). */
1137 res.knownrange
1138 = (!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1139 || !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax));
1140
1141 res.argmin = argmin;
1142 res.argmax = argmax;
1143 }
1144 else if (range_type == VR_ANTI_RANGE)
1145 {
1146 /* Handle anti-ranges if/when bug 71690 is resolved. */
1147 }
1148 else if (range_type == VR_VARYING)
1149 {
1150 /* The argument here may be the result of promoting the actual
1151 argument to int. Try to determine the type of the actual
1152 argument before promotion and narrow down its range that
1153 way. */
1154 gimple *def = SSA_NAME_DEF_STMT (arg);
1155 if (is_gimple_assign (def))
1156 {
1157 tree_code code = gimple_assign_rhs_code (def);
1158 if (code == INTEGER_CST)
1159 {
1160 arg = gimple_assign_rhs1 (def);
1161 return format_integer (dir, arg);
1162 }
1163
1164 if (code == NOP_EXPR)
1165 {
1166 tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1167 if (TREE_CODE (type) == INTEGER_TYPE
1168 || TREE_CODE (type) == POINTER_TYPE)
1169 argtype = type;
1170 }
1171 }
1172 }
1173 }
1174
1175 if (!argmin)
1176 {
1177 /* For an unknown argument (e.g., one passed to a vararg function)
1178 or one whose value range cannot be determined, create a T_MIN
1179 constant if the argument's type is signed and T_MAX otherwise,
1180 and use those to compute the range of bytes that the directive
1181 can output. When precision is specified but unknown, use zero
1182 as the minimum since it results in no bytes on output (unless
1183 width is specified to be greater than 0). */
1184 argmin = build_int_cst (argtype, prec && prec != HOST_WIDE_INT_MIN);
1185
1186 int typeprec = TYPE_PRECISION (dirtype);
1187 int argprec = TYPE_PRECISION (argtype);
1188
1189 if (argprec < typeprec)
1190 {
1191 if (POINTER_TYPE_P (argtype))
1192 argmax = build_all_ones_cst (argtype);
1193 else if (TYPE_UNSIGNED (argtype))
1194 argmax = TYPE_MAX_VALUE (argtype);
1195 else
1196 argmax = TYPE_MIN_VALUE (argtype);
1197 }
1198 else
1199 {
1200 if (POINTER_TYPE_P (dirtype))
1201 argmax = build_all_ones_cst (dirtype);
1202 else if (TYPE_UNSIGNED (dirtype))
1203 argmax = TYPE_MAX_VALUE (dirtype);
1204 else
1205 argmax = TYPE_MIN_VALUE (dirtype);
1206 }
1207
1208 res.argmin = argmin;
1209 res.argmax = argmax;
1210 }
1211
1212 if (tree_int_cst_lt (argmax, argmin))
1213 {
1214 tree tmp = argmax;
1215 argmax = argmin;
1216 argmin = tmp;
1217 }
1218
1219 /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1220 of the directive. If it has been cleared then since ARGMIN and/or
1221 ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1222 ARGMAX in the result to include in diagnostics. */
1223 if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1224 {
1225 res.knownrange = false;
1226 res.argmin = argmin;
1227 res.argmax = argmax;
1228 }
1229
1230 /* Recursively compute the minimum and maximum from the known range,
1231 taking care to swap them if the lower bound results in longer
1232 output than the upper bound (e.g., in the range [-1, 0]. */
1233
1234 if (TYPE_UNSIGNED (dirtype))
1235 {
1236 /* For unsigned conversions/directives, use the minimum (i.e., 0
1237 or 1) and maximum to compute the shortest and longest output,
1238 respectively. */
1239 res.range.min = format_integer (dir, argmin).range.min;
1240 res.range.max = format_integer (dir, argmax).range.max;
1241 }
1242 else
1243 {
1244 /* For signed conversions/directives, use the maximum (i.e., 0)
1245 to compute the shortest output and the minimum (i.e., TYPE_MIN)
1246 to compute the longest output. This is important when precision
1247 is specified but unknown because otherwise both output lengths
1248 would reflect the largest possible precision (i.e., INT_MAX). */
1249 res.range.min = format_integer (dir, argmax).range.min;
1250 res.range.max = format_integer (dir, argmin).range.max;
1251 }
1252
1253 /* The result is bounded either when the argument is determined to be
1254 (e.g., when it's within some range) or when the minimum and maximum
1255 are the same. That can happen here for example when the specified
1256 width is as wide as the greater of MIN and MAX, as would be the case
1257 with sprintf (d, "%08x", x) with a 32-bit integer x. */
1258 res.bounded |= res.range.min == res.range.max;
1259
1260 if (res.range.max < res.range.min)
1261 {
1262 unsigned HOST_WIDE_INT tmp = res.range.max;
1263 res.range.max = res.range.min;
1264 res.range.min = tmp;
1265 }
1266
1267 return res;
1268 }
1269
1270 /* Return the number of bytes that a format directive consisting of FLAGS,
1271 PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1272 would result for argument X under ideal conditions (i.e., if PREC
1273 weren't excessive). MPFR 3.1 allocates large amounts of memory for
1274 values of PREC with large magnitude and can fail (see MPFR bug #21056).
1275 This function works around those problems. */
1276
1277 static unsigned HOST_WIDE_INT
1278 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1279 char spec, char rndspec)
1280 {
1281 char fmtstr[40];
1282
1283 HOST_WIDE_INT len = strlen (flags);
1284
1285 fmtstr[0] = '%';
1286 memcpy (fmtstr + 1, flags, len);
1287 memcpy (fmtstr + 1 + len, ".*R", 3);
1288 fmtstr[len + 4] = rndspec;
1289 fmtstr[len + 5] = spec;
1290 fmtstr[len + 6] = '\0';
1291
1292 spec = TOUPPER (spec);
1293 if (spec == 'E' || spec == 'F')
1294 {
1295 /* For %e, specify the precision explicitly since mpfr_sprintf
1296 does its own thing just to be different (see MPFR bug 21088). */
1297 if (prec < 0)
1298 prec = 6;
1299 }
1300 else
1301 {
1302 /* Avoid passing negative precisions with larger magnitude to MPFR
1303 to avoid exposing its bugs. (A negative precision is supposed
1304 to be ignored.) */
1305 if (prec < 0)
1306 prec = -1;
1307 }
1308
1309 HOST_WIDE_INT p = prec;
1310
1311 if (spec == 'G')
1312 {
1313 /* For G/g, precision gives the maximum number of significant
1314 digits which is bounded by LDBL_MAX_10_EXP, or, for a 128
1315 bit IEEE extended precision, 4932. Using twice as much
1316 here should be more than sufficient for any real format. */
1317 if ((IEEE_MAX_10_EXP * 2) < prec)
1318 prec = IEEE_MAX_10_EXP * 2;
1319 p = prec;
1320 }
1321 else
1322 {
1323 /* Cap precision arbitrarily at 1KB and add the difference
1324 (if any) to the MPFR result. */
1325 if (1024 < prec)
1326 p = 1024;
1327 }
1328
1329 len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1330
1331 /* Handle the unlikely (impossible?) error by returning more than
1332 the maximum dictated by the function's return type. */
1333 if (len < 0)
1334 return target_dir_max () + 1;
1335
1336 /* Adjust the return value by the difference. */
1337 if (p < prec)
1338 len += prec - p;
1339
1340 return len;
1341 }
1342
1343 /* Return the number of bytes to format using the format specifier
1344 SPEC and the precision PREC the largest value in the real floating
1345 TYPE. */
1346
1347 static unsigned HOST_WIDE_INT
1348 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1349 {
1350 machine_mode mode = TYPE_MODE (type);
1351
1352 /* IBM Extended mode. */
1353 if (MODE_COMPOSITE_P (mode))
1354 mode = DFmode;
1355
1356 /* Get the real type format desription for the target. */
1357 const real_format *rfmt = REAL_MODE_FORMAT (mode);
1358 REAL_VALUE_TYPE rv;
1359
1360 real_maxval (&rv, 0, mode);
1361
1362 /* Convert the GCC real value representation with the precision
1363 of the real type to the mpfr_t format with the GCC default
1364 round-to-nearest mode. */
1365 mpfr_t x;
1366 mpfr_init2 (x, rfmt->p);
1367 mpfr_from_real (x, &rv, GMP_RNDN);
1368
1369 /* Return a value one greater to account for the leading minus sign. */
1370 return 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1371 }
1372
1373 /* Return a range representing the minimum and maximum number of bytes
1374 that the conversion specification DIR will output for any argument
1375 given the WIDTH and PRECISION (extracted from DIR). This function
1376 is used when the directive argument or its value isn't known. */
1377
1378 static fmtresult
1379 format_floating (const directive &dir)
1380 {
1381 tree type;
1382
1383 switch (dir.modifier)
1384 {
1385 case FMT_LEN_l:
1386 case FMT_LEN_none:
1387 type = double_type_node;
1388 break;
1389
1390 case FMT_LEN_L:
1391 type = long_double_type_node;
1392 break;
1393
1394 case FMT_LEN_ll:
1395 type = long_double_type_node;
1396 break;
1397
1398 default:
1399 return fmtresult ();
1400 }
1401
1402 /* The minimum and maximum number of bytes produced by the directive. */
1403 fmtresult res;
1404
1405 /* The result is always bounded (though the range may be all of int). */
1406 res.bounded = true;
1407
1408 /* The minimum output as determined by flags. It's always at least 1. */
1409 int flagmin = (1 /* for the first digit */
1410 + (dir.get_flag ('+') | dir.get_flag (' '))
1411 + (dir.prec == 0 && dir.get_flag ('#')));
1412
1413 if (dir.width == HOST_WIDE_INT_MIN || dir.prec == HOST_WIDE_INT_MIN)
1414 {
1415 /* When either width or precision is specified but unknown
1416 the upper bound is the maximum. Otherwise it will be
1417 computed for each directive below. */
1418 res.range.max = HOST_WIDE_INT_MAX;
1419 }
1420 else
1421 res.range.max = HOST_WIDE_INT_M1U;
1422
1423 switch (dir.specifier)
1424 {
1425 case 'A':
1426 case 'a':
1427 {
1428 res.range.min = flagmin + 5 + (dir.prec > 0 ? dir.prec + 1 : 0);
1429 if (res.range.max == HOST_WIDE_INT_M1U)
1430 {
1431 /* Compute the upper bound for -TYPE_MAX. */
1432 res.range.max = format_floating_max (type, 'a', dir.prec);
1433 }
1434
1435 break;
1436 }
1437
1438 case 'E':
1439 case 'e':
1440 {
1441 /* The minimum output is "[-+]1.234567e+00" regardless
1442 of the value of the actual argument. */
1443 res.range.min = (flagmin
1444 + (dir.prec == HOST_WIDE_INT_MIN
1445 ? 0 : dir.prec < 0 ? 7 : dir.prec ? dir.prec + 1 : 0)
1446 + 2 /* e+ */ + 2);
1447
1448 if (res.range.max == HOST_WIDE_INT_M1U)
1449 {
1450 /* MPFR uses a precision of 16 by default for some reason.
1451 Set it to the C default of 6. */
1452 res.range.max = format_floating_max (type, 'e',
1453 -1 == dir.prec ? 6 : dir.prec);
1454 }
1455 break;
1456 }
1457
1458 case 'F':
1459 case 'f':
1460 {
1461 /* The lower bound when precision isn't specified is 8 bytes
1462 ("1.23456" since precision is taken to be 6). When precision
1463 is zero, the lower bound is 1 byte (e.g., "1"). Otherwise,
1464 when precision is greater than zero, then the lower bound
1465 is 2 plus precision (plus flags). */
1466 res.range.min = (flagmin
1467 + (dir.prec != HOST_WIDE_INT_MIN) /* decimal point */
1468 + (dir.prec == HOST_WIDE_INT_MIN
1469 ? 0 : dir.prec < 0 ? 6 : dir.prec ? dir.prec : -1));
1470
1471 if (res.range.max == HOST_WIDE_INT_M1U)
1472 {
1473 /* Compute the upper bound for -TYPE_MAX. */
1474 res.range.max = format_floating_max (type, 'f', dir.prec);
1475 }
1476 break;
1477 }
1478
1479 case 'G':
1480 case 'g':
1481 {
1482 /* The %g output depends on precision and the exponent of
1483 the argument. Since the value of the argument isn't known
1484 the lower bound on the range of bytes (not counting flags
1485 or width) is 1. */
1486 res.range.min = flagmin;
1487 if (res.range.max == HOST_WIDE_INT_M1U)
1488 {
1489 /* Compute the upper bound for -TYPE_MAX which should be
1490 the lesser of %e and %f. */
1491 res.range.max = format_floating_max (type, 'g', dir.prec);
1492 }
1493 break;
1494 }
1495
1496 default:
1497 return fmtresult ();
1498 }
1499
1500 if (dir.width > 0)
1501 {
1502 /* If width has been specified use it to adjust the range. */
1503 if (res.range.min < (unsigned)dir.width)
1504 res.range.min = dir.width;
1505 if (res.range.max < (unsigned)dir.width)
1506 res.range.max = dir.width;
1507 }
1508
1509 return res;
1510 }
1511
1512 /* Return a range representing the minimum and maximum number of bytes
1513 that the conversion specification DIR will write on output for the
1514 floating argument ARG. */
1515
1516 static fmtresult
1517 format_floating (const directive &dir, tree arg)
1518 {
1519 if (!arg || TREE_CODE (arg) != REAL_CST)
1520 return format_floating (dir);
1521
1522 HOST_WIDE_INT prec = dir.prec;
1523
1524 if (prec < 0 && TOUPPER (dir.specifier) != 'A')
1525 {
1526 /* Specify the precision explicitly since mpfr_sprintf defaults
1527 to zero. */
1528 prec = 6;
1529 }
1530
1531 /* The minimum and maximum number of bytes produced by the directive. */
1532 fmtresult res;
1533
1534 /* Get the real type format desription for the target. */
1535 const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
1536 const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
1537
1538 char fmtstr [40];
1539 char *pfmt = fmtstr;
1540
1541 /* Append flags. */
1542 for (const char *pf = "-+ #0"; *pf; ++pf)
1543 if (dir.get_flag (*pf))
1544 *pfmt++ = *pf;
1545
1546 *pfmt = '\0';
1547
1548 {
1549 /* Set up an array to easily iterate over. */
1550 unsigned HOST_WIDE_INT* const minmax[] = {
1551 &res.range.min, &res.range.max
1552 };
1553
1554 for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
1555 {
1556 /* Convert the GCC real value representation with the precision
1557 of the real type to the mpfr_t format rounding down in the
1558 first iteration that computes the minimm and up in the second
1559 that computes the maximum. This order is arbibtrary because
1560 rounding in either direction can result in longer output. */
1561 mpfr_t mpfrval;
1562 mpfr_init2 (mpfrval, rfmt->p);
1563 mpfr_from_real (mpfrval, rvp, i ? MPFR_RNDU : MPFR_RNDD);
1564
1565 /* Use the MPFR rounding specifier to round down in the first
1566 iteration and then up. In most but not all cases this will
1567 result in the same number of bytes. */
1568 char rndspec = "DU"[i];
1569
1570 /* Format it and store the result in the corresponding member
1571 of the result struct. */
1572 unsigned HOST_WIDE_INT len
1573 = get_mpfr_format_length (mpfrval, fmtstr, prec,
1574 dir.specifier, rndspec);
1575
1576 if (0 < dir.width && len < (unsigned)dir.width)
1577 len = dir.width;
1578
1579 *minmax[i] = len;
1580 }
1581 }
1582
1583 /* Make sure the minimum is less than the maximum (MPFR rounding
1584 in the call to mpfr_snprintf can result in the reverse. */
1585 if (res.range.max < res.range.min)
1586 {
1587 unsigned HOST_WIDE_INT tmp = res.range.min;
1588 res.range.min = res.range.max;
1589 res.range.max = tmp;
1590 }
1591
1592 /* The range of output is known even if the result isn't bounded. */
1593 if (dir.width == HOST_WIDE_INT_MIN)
1594 {
1595 res.knownrange = false;
1596 res.range.max = HOST_WIDE_INT_MAX;
1597 }
1598 else
1599 res.knownrange = true;
1600
1601 /* The output of all directives except "%a" is fully specified
1602 and so the result is bounded unless it exceeds INT_MAX.
1603 For "%a" the output is fully specified only when precision
1604 is explicitly specified. */
1605 res.bounded = (res.knownrange
1606 && (TOUPPER (dir.specifier) != 'A'
1607 || (0 <= dir.prec && (unsigned) dir.prec < target_int_max ()))
1608 && res.range.min < target_int_max ());
1609
1610 return res;
1611 }
1612
1613 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
1614 strings referenced by the expression STR, or (-1, -1) when not known.
1615 Used by the format_string function below. */
1616
1617 static fmtresult
1618 get_string_length (tree str)
1619 {
1620 if (!str)
1621 return fmtresult ();
1622
1623 if (tree slen = c_strlen (str, 1))
1624 {
1625 /* Simply return the length of the string. */
1626 fmtresult res;
1627 res.range.min = res.range.max = tree_to_shwi (slen);
1628 res.bounded = true;
1629 res.knownrange = true;
1630 return res;
1631 }
1632
1633 /* Determine the length of the shortest and longest string referenced
1634 by STR. Strings of unknown lengths are bounded by the sizes of
1635 arrays that subexpressions of STR may refer to. Pointers that
1636 aren't known to point any such arrays result in LENRANGE[1] set
1637 to SIZE_MAX. */
1638 tree lenrange[2];
1639 get_range_strlen (str, lenrange);
1640
1641 if (lenrange [0] || lenrange [1])
1642 {
1643 fmtresult res;
1644
1645 res.range.min = (tree_fits_uhwi_p (lenrange[0])
1646 ? tree_to_uhwi (lenrange[0]) : warn_format_overflow > 1);
1647 res.range.max = (tree_fits_uhwi_p (lenrange[1])
1648 ? tree_to_uhwi (lenrange[1]) : HOST_WIDE_INT_M1U);
1649
1650 /* Set RES.BOUNDED to true if and only if all strings referenced
1651 by STR are known to be bounded (though not necessarily by their
1652 actual length but perhaps by their maximum possible length). */
1653 res.bounded = res.range.max < target_int_max ();
1654 res.knownrange = res.bounded;
1655
1656 return res;
1657 }
1658
1659 return get_string_length (NULL_TREE);
1660 }
1661
1662 /* Return the minimum and maximum number of characters formatted
1663 by the '%c' format directives and its wide character form for
1664 the argument ARG. ARG can be null (for functions such as
1665 vsprinf). */
1666
1667 static fmtresult
1668 format_character (const directive &dir, tree arg)
1669 {
1670 fmtresult res;
1671
1672 /* The maximum number of bytes for an unknown wide character argument
1673 to a "%lc" directive adjusted for precision but not field width.
1674 6 is the longest UTF-8 sequence for a single wide character. */
1675 const unsigned HOST_WIDE_INT max_bytes_for_unknown_wc
1676 = (0 <= dir.prec ? dir.prec : warn_level > 1 ? 6 : 1);
1677
1678 if (dir.modifier == FMT_LEN_l)
1679 {
1680 /* Positive if the argument is a wide NUL character. */
1681 int nul = (arg && TREE_CODE (arg) == INTEGER_CST
1682 ? integer_zerop (arg) : -1);
1683
1684 /* A '%lc' directive is the same as '%ls' for a two element
1685 wide string character with the second element of NUL, so
1686 when the character is unknown the minimum number of bytes
1687 is the smaller of either 0 (at level 1) or 1 (at level 2)
1688 and WIDTH, and the maximum is MB_CUR_MAX in the selected
1689 locale, which is unfortunately, unknown. */
1690 res.range.min = warn_level == 1 ? !nul : nul < 1;
1691 res.range.max = max_bytes_for_unknown_wc;
1692 /* The range above is good enough to issue warnings but not
1693 for value range propagation, so clear BOUNDED. */
1694 res.bounded = false;
1695 }
1696 else
1697 {
1698 /* A plain '%c' directive. Its ouput is exactly 1. */
1699 res.range.min = res.range.max = 1;
1700 res.bounded = true;
1701 res.knownrange = true;
1702 }
1703
1704 /* Adjust the lengths for field width. */
1705 if (0 < dir.width)
1706 {
1707 if (res.range.min < (unsigned HOST_WIDE_INT)dir.width)
1708 res.range.min = dir.width;
1709
1710 if (res.range.max < (unsigned HOST_WIDE_INT)dir.width)
1711 res.range.max = dir.width;
1712
1713 /* Adjust BOUNDED if width happens to make them equal. */
1714 if (res.range.min == res.range.max && res.range.min < target_int_max ())
1715 res.bounded = true;
1716 }
1717
1718 /* When precision is specified the range of characters on output
1719 is known to be bounded by it. */
1720 if (-1 < dir.width && -1 < dir.prec)
1721 res.knownrange = true;
1722
1723 return res;
1724 }
1725
1726 /* Return the minimum and maximum number of characters formatted
1727 by the '%c' and '%s' format directives and ther wide character
1728 forms for the argument ARG. ARG can be null (for functions
1729 such as vsprinf). */
1730
1731 static fmtresult
1732 format_string (const directive &dir, tree arg)
1733 {
1734 fmtresult res;
1735
1736 /* The maximum number of bytes for an unknown string argument to either
1737 a "%s" or "%ls" directive adjusted for precision but not field width. */
1738 const unsigned HOST_WIDE_INT max_bytes_for_unknown_str
1739 = (0 <= dir.prec ? dir.prec : warn_format_overflow > 1);
1740
1741 /* The result is bounded unless overriddden for a non-constant string
1742 of an unknown length. */
1743 bool bounded = true;
1744
1745 /* Compute the range the argument's length can be in. */
1746 fmtresult slen = get_string_length (arg);
1747 if (slen.range.min == slen.range.max
1748 && slen.range.min < HOST_WIDE_INT_MAX)
1749 {
1750 gcc_checking_assert (slen.range.min == slen.range.max);
1751
1752 /* A '%s' directive with a string argument with constant length. */
1753 res.range = slen.range;
1754
1755 /* The output of "%s" and "%ls" directives with a constant
1756 string is in a known range unless width of an unknown value
1757 is specified. For "%s" it is the length of the string. For
1758 "%ls" it is in the range [length, length * MB_LEN_MAX].
1759 (The final range can be further constrained by width and
1760 precision but it's always known.) */
1761 res.knownrange = HOST_WIDE_INT_MIN != dir.width;
1762
1763 if (dir.modifier == FMT_LEN_l)
1764 {
1765 bounded = false;
1766
1767 if (warn_level > 1)
1768 {
1769 /* Leave the minimum number of bytes the wide string
1770 converts to equal to its length and set the maximum
1771 to the worst case length which is the string length
1772 multiplied by MB_LEN_MAX. */
1773
1774 /* It's possible to be smarter about computing the maximum
1775 by scanning the wide string for any 8-bit characters and
1776 if it contains none, using its length for the maximum.
1777 Even though this would be simple to do it's unlikely to
1778 be worth it when dealing with wide characters. */
1779 res.range.max *= target_mb_len_max();
1780 }
1781
1782 /* For a wide character string, use precision as the maximum
1783 even if precision is greater than the string length since
1784 the number of bytes the string converts to may be greater
1785 (due to MB_CUR_MAX). */
1786 if (0 <= dir.prec)
1787 res.range.max = dir.prec;
1788 }
1789 else if (-1 <= dir.width)
1790 {
1791 /* The output of a "%s" directive with a constant argument
1792 and constant or no width is bounded. It is constant if
1793 precision is either not specified or it is specified and
1794 its value is known. */
1795 res.bounded = true;
1796 }
1797 else if (dir.width == HOST_WIDE_INT_MIN)
1798 {
1799 /* Specified but unknown width makes the output unbounded. */
1800 res.range.max = HOST_WIDE_INT_MAX;
1801 }
1802
1803 if (0 <= dir.prec && (unsigned HOST_WIDE_INT)dir.prec < res.range.min)
1804 {
1805 res.range.min = dir.prec;
1806 res.range.max = dir.prec;
1807 }
1808 else if (dir.prec == HOST_WIDE_INT_MIN)
1809 {
1810 /* When precision is specified but not known the lower
1811 bound is assumed to be as low as zero. */
1812 res.range.min = 0;
1813 }
1814 }
1815 else if (arg && integer_zerop (arg))
1816 {
1817 /* Handle null pointer argument. */
1818
1819 fmtresult res (0);
1820 res.nullp = true;
1821 return res;
1822 }
1823 else
1824 {
1825 /* For a '%s' and '%ls' directive with a non-constant string,
1826 the minimum number of characters is the greater of WIDTH
1827 and either 0 in mode 1 or the smaller of PRECISION and 1
1828 in mode 2, and the maximum is PRECISION or -1 to disable
1829 tracking. */
1830
1831 if (0 <= dir.prec)
1832 {
1833 if (slen.range.min >= target_int_max ())
1834 slen.range.min = 0;
1835 else if ((unsigned HOST_WIDE_INT)dir.prec < slen.range.min)
1836 slen.range.min = dir.prec;
1837
1838 if ((unsigned HOST_WIDE_INT)dir.prec < slen.range.max
1839 || slen.range.max >= target_int_max ())
1840 slen.range.max = dir.prec;
1841 }
1842 else if (slen.range.min >= target_int_max ())
1843 {
1844 slen.range.min = max_bytes_for_unknown_str;
1845 slen.range.max = max_bytes_for_unknown_str;
1846 bounded = false;
1847 }
1848
1849 res.range = slen.range;
1850
1851 /* The output is considered bounded when a precision has been
1852 specified to limit the number of bytes or when the number
1853 of bytes is known or contrained to some range. */
1854 res.bounded = 0 <= dir.prec || slen.bounded;
1855 res.knownrange = slen.knownrange;
1856 }
1857
1858 /* Adjust the lengths for field width. */
1859 if (0 < dir.width)
1860 {
1861 if (res.range.min < (unsigned HOST_WIDE_INT)dir.width)
1862 res.range.min = dir.width;
1863
1864 if (res.range.max < (unsigned HOST_WIDE_INT)dir.width)
1865 res.range.max = dir.width;
1866
1867 /* Adjust BOUNDED if width happens to make them equal. */
1868 if (res.range.min == res.range.max && res.range.min < target_int_max ()
1869 && bounded)
1870 res.bounded = true;
1871 }
1872
1873 /* When precision is specified the range of characters on output
1874 is known to be bounded by it. */
1875 if (HOST_WIDE_INT_MIN != dir.width && -1 < dir.prec)
1876 res.knownrange = true;
1877
1878 return res;
1879 }
1880
1881 /* At format string location describe by DIRLOC in a call described
1882 by INFO, issue a warning for a directive DIR whose output may be
1883 in excess of the available space AVAIL_RANGE in the destination
1884 given the formatting result FMTRES. This function does nothing
1885 except decide whether to issue a warning for a possible write
1886 past the end or truncation and, if so, format the warning.
1887 Return true if a warning has been issued. */
1888
1889 static bool
1890 maybe_warn (substring_loc &dirloc, source_range *pargrange,
1891 const pass_sprintf_length::call_info &info,
1892 unsigned HOST_WIDE_INT navail, const result_range &res,
1893 const directive &dir)
1894 {
1895 bool warned = false;
1896
1897 if (res.min < res.max)
1898 {
1899 /* The result is a range (i.e., it's inexact). */
1900 if (!warned)
1901 {
1902 if (navail < res.min)
1903 {
1904 /* The minimum directive output is longer than there is
1905 room in the destination. */
1906 if (res.min == res.max)
1907 {
1908 const char* fmtstr
1909 = (info.bounded
1910 ? G_("%<%.*s%> directive output truncated writing "
1911 "%wu bytes into a region of size %wu")
1912 : G_("%<%.*s%> directive writing %wu bytes "
1913 "into a region of size %wu"));
1914 warned = fmtwarn (dirloc, pargrange, NULL, info.warnopt (),
1915 fmtstr,
1916 (int)dir.len, dir.beg, res.min,
1917 navail);
1918 }
1919 else if (res.max < HOST_WIDE_INT_MAX)
1920 {
1921 const char* fmtstr
1922 = (info.bounded
1923 ? G_("%<%.*s%> directive output truncated writing "
1924 "between %wu and %wu bytes into a region of "
1925 "size %wu")
1926 : G_("%<%.*s%> directive writing between %wu and "
1927 "%wu bytes into a region of size %wu"));
1928 warned = fmtwarn (dirloc, pargrange, NULL,
1929 info.warnopt (), fmtstr,
1930 (int)dir.len, dir.beg,
1931 res.min, res.max, navail);
1932 }
1933 else
1934 {
1935 const char* fmtstr
1936 = (info.bounded
1937 ? G_("%<%.*s%> directive output truncated writing "
1938 "%wu or more bytes into a region of size %wu")
1939 : G_("%<%.*s%> directive writing %wu or more bytes "
1940 "into a region of size %wu"));
1941 warned = fmtwarn (dirloc, pargrange, NULL,
1942 info.warnopt (), fmtstr,
1943 (int)dir.len, dir.beg,
1944 res.min, navail);
1945 }
1946 }
1947 else if (navail < res.max
1948 && (dir.specifier != 's'
1949 || res.max < HOST_WIDE_INT_MAX)
1950 && ((info.bounded
1951 && (!info.retval_used ()
1952 || warn_level > 1))
1953 || (!info.bounded
1954 && (dir.specifier == 's'
1955 || warn_level > 1))))
1956 {
1957 /* The maximum directive output is longer than there is
1958 room in the destination and the output length is either
1959 explicitly constrained by the precision (for strings)
1960 or the warning level is greater than 1. */
1961 if (res.max >= HOST_WIDE_INT_MAX)
1962 {
1963 const char* fmtstr
1964 = (info.bounded
1965 ? G_("%<%.*s%> directive output may be truncated "
1966 "writing %wu or more bytes into a region "
1967 "of size %wu")
1968 : G_("%<%.*s%> directive writing %wu or more bytes "
1969 "into a region of size %wu"));
1970 warned = fmtwarn (dirloc, pargrange, NULL,
1971 info.warnopt (), fmtstr,
1972 (int)dir.len, dir.beg,
1973 res.min, navail);
1974 }
1975 else
1976 {
1977 const char* fmtstr
1978 = (info.bounded
1979 ? G_("%<%.*s%> directive output may be truncated "
1980 "writing between %wu and %wu bytes into a region "
1981 "of size %wu")
1982 : G_("%<%.*s%> directive writing between %wu and %wu "
1983 "bytes into a region of size %wu"));
1984 warned = fmtwarn (dirloc, pargrange, NULL,
1985 info.warnopt (), fmtstr,
1986 (int)dir.len, dir.beg,
1987 res.min, res.max,
1988 navail);
1989 }
1990 }
1991 }
1992 }
1993 else
1994 {
1995 if (!warned && res.min > 0 && navail < res.min)
1996 {
1997 const char* fmtstr
1998 = (info.bounded
1999 ? (1 < res.min
2000 ? G_("%<%.*s%> directive output truncated while writing "
2001 "%wu bytes into a region of size %wu")
2002 : G_("%<%.*s%> directive output truncated while writing "
2003 "%wu byte into a region of size %wu"))
2004 : (1 < res.min
2005 ? G_("%<%.*s%> directive writing %wu bytes "
2006 "into a region of size %wu")
2007 : G_("%<%.*s%> directive writing %wu byte "
2008 "into a region of size %wu")));
2009
2010 warned = fmtwarn (dirloc, pargrange, NULL,
2011 info.warnopt (), fmtstr,
2012 (int)dir.len, dir.beg, res.min,
2013 navail);
2014 }
2015 }
2016
2017 return warned;
2018 }
2019
2020 /* Compute the length of the output resulting from the conversion
2021 specification DIR with the argument ARG in a call described by INFO
2022 and update the overall result of the call in *RES. The format directive
2023 corresponding to DIR starts at CVTBEG and is CVTLEN characters long. */
2024
2025 static bool
2026 format_directive (const pass_sprintf_length::call_info &info,
2027 format_result *res, const directive &dir)
2028 {
2029 const char *cvtbeg = dir.beg;
2030 size_t cvtlen = dir.len;
2031 tree arg = dir.arg;
2032
2033 /* Offset of the beginning of the directive from the beginning
2034 of the format string. */
2035 size_t offset = cvtbeg - info.fmtstr;
2036
2037 /* Create a location for the whole directive from the % to the format
2038 specifier. */
2039 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
2040 offset, offset, offset + cvtlen - 1);
2041
2042 /* Also create a location range for the argument if possible.
2043 This doesn't work for integer literals or function calls. */
2044 source_range argrange;
2045 source_range *pargrange;
2046 if (arg && CAN_HAVE_LOCATION_P (arg))
2047 {
2048 argrange = EXPR_LOCATION_RANGE (arg);
2049 pargrange = &argrange;
2050 }
2051 else
2052 pargrange = NULL;
2053
2054 /* Bail when there is no function to compute the output length,
2055 or when minimum length checking has been disabled. */
2056 if (!dir.fmtfunc || res->number_chars_min >= HOST_WIDE_INT_MAX)
2057 return false;
2058
2059 /* Compute the (approximate) length of the formatted output. */
2060 fmtresult fmtres = dir.fmtfunc (dir, arg);
2061
2062 /* The overall result is bounded only if the output of every directive
2063 is bounded. */
2064 res->bounded &= fmtres.bounded;
2065
2066 /* Record whether the output of all directives is known to be
2067 bounded by some maximum, implying that their arguments are
2068 either known exactly or determined to be in a known range
2069 or, for strings, limited by the upper bounds of the arrays
2070 they refer to. */
2071 res->knownrange &= fmtres.knownrange;
2072
2073 if (!fmtres.knownrange)
2074 {
2075 /* Only when the range is known, check it against the host value
2076 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
2077 INT_MAX precision, which is the longest possible output of any
2078 single directive). That's the largest valid byte count (though
2079 not valid call to a printf-like function because it can never
2080 return such a count). Otherwise, the range doesn't correspond
2081 to known values of the argument. */
2082 if (fmtres.range.max > target_dir_max ())
2083 {
2084 /* Normalize the MAX counter to avoid having to deal with it
2085 later. The counter can be less than HOST_WIDE_INT_M1U
2086 when compiling for an ILP32 target on an LP64 host. */
2087 fmtres.range.max = HOST_WIDE_INT_M1U;
2088 /* Disable exact and maximum length checking after a failure
2089 to determine the maximum number of characters (for example
2090 for wide characters or wide character strings) but continue
2091 tracking the minimum number of characters. */
2092 res->number_chars_max = HOST_WIDE_INT_M1U;
2093 res->number_chars = HOST_WIDE_INT_M1U;
2094 }
2095
2096 if (fmtres.range.min > target_dir_max ())
2097 {
2098 /* Disable exact length checking after a failure to determine
2099 even the minimum number of characters (it shouldn't happen
2100 except in an error) but keep tracking the minimum and maximum
2101 number of characters. */
2102 res->number_chars = HOST_WIDE_INT_M1U;
2103 return true;
2104 }
2105 }
2106
2107 if (fmtres.nullp)
2108 {
2109 fmtwarn (dirloc, pargrange, NULL, info.warnopt (),
2110 "%<%.*s%> directive argument is null",
2111 (int)cvtlen, cvtbeg);
2112
2113 /* Don't bother processing the rest of the format string. */
2114 res->warned = true;
2115 res->number_chars = HOST_WIDE_INT_M1U;
2116 res->number_chars_min = res->number_chars_max = res->number_chars;
2117 return false;
2118 }
2119
2120 /* Compute the number of available bytes in the destination. There
2121 must always be at least one byte of space for the terminating
2122 NUL that's appended after the format string has been processed. */
2123 unsigned HOST_WIDE_INT navail = min_bytes_remaining (info.objsize, *res);
2124
2125 bool warned = res->warned;
2126
2127 if (!warned)
2128 warned = maybe_warn (dirloc, pargrange, info, navail,
2129 fmtres.range, dir);
2130
2131 if (fmtres.range.min < fmtres.range.max)
2132 {
2133 /* Disable exact length checking but adjust the minimum and maximum. */
2134 res->number_chars = HOST_WIDE_INT_M1U;
2135 if (res->number_chars_max < HOST_WIDE_INT_MAX
2136 && fmtres.range.max < HOST_WIDE_INT_MAX)
2137 res->number_chars_max += fmtres.range.max;
2138
2139 res->number_chars_min += fmtres.range.min;
2140 }
2141 else
2142 *res += fmtres.range.min;
2143
2144 /* Has the minimum directive output length exceeded the maximum
2145 of 4095 bytes required to be supported? */
2146 bool minunder4k = fmtres.range.min < 4096;
2147 if (!minunder4k || fmtres.range.max > 4095)
2148 res->under4k = false;
2149
2150 if (!warned && warn_level > 1
2151 && (!minunder4k || fmtres.range.max > 4095))
2152 {
2153 /* The directive output may be longer than the maximum required
2154 to be handled by an implementation according to 7.21.6.1, p15
2155 of C11. Warn on this only at level 2 but remember this and
2156 prevent folding the return value when done. This allows for
2157 the possibility of the actual libc call failing due to ENOMEM
2158 (like Glibc does under some conditions). */
2159
2160 if (fmtres.range.min == fmtres.range.max)
2161 warned = fmtwarn (dirloc, pargrange, NULL,
2162 info.warnopt (),
2163 "%<%.*s%> directive output of %wu bytes exceeds "
2164 "minimum required size of 4095",
2165 (int)cvtlen, cvtbeg, fmtres.range.min);
2166 else
2167 {
2168 const char *fmtstr
2169 = (minunder4k
2170 ? G_("%<%.*s%> directive output between %qu and %wu "
2171 "bytes may exceed minimum required size of 4095")
2172 : G_("%<%.*s%> directive output between %qu and %wu "
2173 "bytes exceeds minimum required size of 4095"));
2174
2175 warned = fmtwarn (dirloc, pargrange, NULL,
2176 info.warnopt (), fmtstr,
2177 (int)cvtlen, cvtbeg,
2178 fmtres.range.min, fmtres.range.max);
2179 }
2180 }
2181
2182 /* Has the minimum directive output length exceeded INT_MAX? */
2183 bool exceedmin = res->number_chars_min > target_int_max ();
2184
2185 if (!warned
2186 && (exceedmin
2187 || (warn_level > 1
2188 && res->number_chars_max > target_int_max ())))
2189 {
2190 /* The directive output causes the total length of output
2191 to exceed INT_MAX bytes. */
2192
2193 if (fmtres.range.min == fmtres.range.max)
2194 warned = fmtwarn (dirloc, pargrange, NULL, info.warnopt (),
2195 "%<%.*s%> directive output of %wu bytes causes "
2196 "result to exceed %<INT_MAX%>",
2197 (int)cvtlen, cvtbeg, fmtres.range.min);
2198 else
2199 {
2200 const char *fmtstr
2201 = (exceedmin
2202 ? G_ ("%<%.*s%> directive output between %wu and %wu "
2203 "bytes causes result to exceed %<INT_MAX%>")
2204 : G_ ("%<%.*s%> directive output between %wu and %wu "
2205 "bytes may cause result to exceed %<INT_MAX%>"));
2206 warned = fmtwarn (dirloc, pargrange, NULL,
2207 info.warnopt (), fmtstr,
2208 (int)cvtlen, cvtbeg,
2209 fmtres.range.min, fmtres.range.max);
2210 }
2211 }
2212
2213 if (warned && fmtres.argmin)
2214 {
2215 if (fmtres.argmin == fmtres.argmax)
2216 inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
2217 else if (fmtres.knownrange)
2218 inform (info.fmtloc, "directive argument in the range [%E, %E]",
2219 fmtres.argmin, fmtres.argmax);
2220 else
2221 inform (info.fmtloc,
2222 "using the range [%E, %E] for directive argument",
2223 fmtres.argmin, fmtres.argmax);
2224 }
2225
2226 res->warned |= warned;
2227
2228 if (dump_file && *dir.beg)
2229 {
2230 fprintf (dump_file, " Result: %lli, %lli "
2231 "(%lli, %lli, %lli)\n",
2232 (long long)fmtres.range.min,
2233 (long long)fmtres.range.max,
2234 (long long)res->number_chars,
2235 (long long)res->number_chars_min,
2236 (long long)res->number_chars_max);
2237 }
2238
2239 return true;
2240 }
2241
2242 /* Account for the number of bytes between BEG and END (or between
2243 BEG + strlen (BEG) when END is null) in the format string in a call
2244 to a formatted output function described by INFO. Reflect the count
2245 in RES and issue warnings as appropriate. */
2246
2247 static void
2248 add_bytes (const pass_sprintf_length::call_info &info,
2249 const char *beg, const char *end, format_result *res)
2250 {
2251 if (res->number_chars_min >= HOST_WIDE_INT_MAX)
2252 return;
2253
2254 /* The number of bytes to output is the number of bytes between
2255 the end of the last directive and the beginning of the next
2256 one if it exists, otherwise the number of characters remaining
2257 in the format string plus 1 for the terminating NUL. */
2258 size_t nbytes = end ? end - beg : strlen (beg) + 1;
2259
2260 /* Return if there are no bytes to add at this time but there are
2261 directives remaining in the format string. */
2262 if (!nbytes)
2263 return;
2264
2265 /* Compute the range of available bytes in the destination. There
2266 must always be at least one byte left for the terminating NUL
2267 that's appended after the format string has been processed. */
2268 result_range avail_range = bytes_remaining (info.objsize, *res);
2269
2270 /* If issuing a diagnostic (only when one hasn't already been issued),
2271 distinguish between a possible overflow ("may write") and a certain
2272 overflow somewhere "past the end." (Ditto for truncation.)
2273 KNOWNRANGE is used to warn even at level 1 about possibly writing
2274 past the end or truncation due to strings of unknown lengths that
2275 are bounded by the arrays they are known to refer to. */
2276 if (!res->warned
2277 && (avail_range.max < nbytes
2278 || ((res->knownrange || warn_level > 1)
2279 && avail_range.min < nbytes)))
2280 {
2281 /* Set NAVAIL to the number of available bytes used to decide
2282 whether or not to issue a warning below. The exact kind of
2283 warning will depend on AVAIL_RANGE. */
2284 unsigned HOST_WIDE_INT navail = avail_range.max;
2285 if (nbytes <= navail && avail_range.min < HOST_WIDE_INT_MAX
2286 && (res->knownrange || warn_level > 1))
2287 navail = avail_range.min;
2288
2289 /* Compute the offset of the first format character that is beyond
2290 the end of the destination region and the length of the rest of
2291 the format string from that point on. */
2292 unsigned HOST_WIDE_INT off
2293 = (unsigned HOST_WIDE_INT)(beg - info.fmtstr) + navail;
2294
2295 size_t len = strlen (info.fmtstr + off);
2296
2297 /* Create a location that underscores the substring of the format
2298 string that is or may be written past the end (or is or may be
2299 truncated), pointing the caret at the first character of the
2300 substring. */
2301 substring_loc loc
2302 (info.fmtloc, TREE_TYPE (info.format), off, len ? off : 0,
2303 off + len - !!len);
2304
2305 /* Is the output of the last directive the result of the argument
2306 being within a range whose lower bound would fit in the buffer
2307 but the upper bound would not? If so, use the word "may" to
2308 indicate that the overflow/truncation may (but need not) happen. */
2309 bool boundrange
2310 = (res->number_chars_min < res->number_chars_max
2311 && res->number_chars_min + nbytes <= info.objsize);
2312
2313 if (!end && ((nbytes - navail) == 1 || boundrange))
2314 {
2315 /* There is room for the rest of the format string but none
2316 for the terminating nul. */
2317 const char *text
2318 = (info.bounded // Snprintf and the like.
2319 ? (boundrange
2320 ? G_("output may be truncated before the last format character"
2321 : "output truncated before the last format character"))
2322 : (boundrange
2323 ? G_("may write a terminating nul past the end "
2324 "of the destination")
2325 : G_("writing a terminating nul past the end "
2326 "of the destination")));
2327
2328 if (!info.bounded
2329 || !boundrange
2330 || !info.retval_used ()
2331 || warn_level > 1)
2332 res->warned = fmtwarn (loc, NULL, NULL, info.warnopt (), text);
2333 }
2334 else
2335 {
2336 /* There isn't enough room for 1 or more characters that remain
2337 to copy from the format string. */
2338 const char *text
2339 = (info.bounded // Snprintf and the like.
2340 ? (boundrange
2341 ? G_("output may be truncated at or before format character "
2342 "%qc at offset %wu")
2343 : G_("output truncated at format character %qc at offset %wu"))
2344 : (res->number_chars >= HOST_WIDE_INT_MAX
2345 ? G_("may write format character %#qc at offset %wu past "
2346 "the end of the destination")
2347 : G_("writing format character %#qc at offset %wu past "
2348 "the end of the destination")));
2349
2350 if (!info.bounded
2351 || !boundrange
2352 || !info.retval_used ()
2353 || warn_level > 1)
2354 res->warned = fmtwarn (loc, NULL, NULL, info.warnopt (),
2355 text, info.fmtstr[off], off);
2356 }
2357 }
2358
2359 if (res->warned && !end && info.objsize < HOST_WIDE_INT_MAX)
2360 {
2361 /* If a warning has been issued for buffer overflow or truncation
2362 (but not otherwise) help the user figure out how big a buffer
2363 they need. */
2364
2365 location_t callloc = gimple_location (info.callstmt);
2366
2367 unsigned HOST_WIDE_INT min = res->number_chars_min;
2368 unsigned HOST_WIDE_INT max = res->number_chars_max;
2369 unsigned HOST_WIDE_INT exact
2370 = (res->number_chars < HOST_WIDE_INT_MAX
2371 ? res->number_chars : res->number_chars_min);
2372
2373 if (min < max && max < HOST_WIDE_INT_MAX)
2374 inform (callloc,
2375 "format output between %wu and %wu bytes into "
2376 "a destination of size %wu",
2377 min + nbytes, max + nbytes, info.objsize);
2378 else
2379 inform (callloc,
2380 (nbytes + exact == 1
2381 ? G_("format output %wu byte into a destination of size %wu")
2382 : G_("format output %wu bytes into a destination of size %wu")),
2383 nbytes + exact, info.objsize);
2384 }
2385
2386 /* Add the number of bytes and then check for INT_MAX overflow. */
2387 *res += nbytes;
2388
2389 /* Has the minimum output length minus the terminating nul exceeded
2390 INT_MAX? */
2391 bool exceedmin = (res->number_chars_min - !end) > target_int_max ();
2392
2393 if (!res->warned
2394 && (exceedmin
2395 || (warn_level > 1
2396 && (res->number_chars_max - !end) > target_int_max ())))
2397 {
2398 /* The function's output exceeds INT_MAX bytes. */
2399
2400 /* Set NAVAIL to the number of available bytes used to decide
2401 whether or not to issue a warning below. The exact kind of
2402 warning will depend on AVAIL_RANGE. */
2403 unsigned HOST_WIDE_INT navail = avail_range.max;
2404 if (nbytes <= navail && avail_range.min < HOST_WIDE_INT_MAX
2405 && (res->bounded || warn_level > 1))
2406 navail = avail_range.min;
2407
2408 /* Compute the offset of the first format character that is beyond
2409 the end of the destination region and the length of the rest of
2410 the format string from that point on. */
2411 unsigned HOST_WIDE_INT off = (unsigned HOST_WIDE_INT)(beg - info.fmtstr);
2412 if (navail < HOST_WIDE_INT_MAX)
2413 off += navail;
2414
2415 size_t len = strlen (info.fmtstr + off);
2416
2417 substring_loc loc
2418 (info.fmtloc, TREE_TYPE (info.format), off - !len, len ? off : 0,
2419 off + len - !!len);
2420
2421 if (res->number_chars_min == res->number_chars_max)
2422 res->warned = fmtwarn (loc, NULL, NULL, info.warnopt (),
2423 "output of %wu bytes causes "
2424 "result to exceed %<INT_MAX%>",
2425 res->number_chars_min - !end);
2426 else
2427 {
2428 const char *text
2429 = (exceedmin
2430 ? G_ ("output between %wu and %wu bytes causes "
2431 "result to exceed %<INT_MAX%>")
2432 : G_ ("output between %wu and %wu bytes may cause "
2433 "result to exceed %<INT_MAX%>"));
2434 res->warned = fmtwarn (loc, NULL, NULL, info.warnopt (), text,
2435 res->number_chars_min - !end,
2436 res->number_chars_max - !end);
2437 }
2438 }
2439 }
2440
2441 #pragma GCC diagnostic pop
2442
2443 /* Parse a format directive in function call described by INFO starting
2444 at STR and populate DIR structure. Bump up *ARGNO by the number of
2445 arguments extracted for the directive. Return the length of
2446 the directive. */
2447
2448 static size_t
2449 parse_directive (pass_sprintf_length::call_info &info,
2450 directive &dir, format_result *res,
2451 const char *str, unsigned *argno)
2452 {
2453 const char *pcnt = strchr (str, '%');
2454 dir.beg = str;
2455
2456 if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
2457 {
2458 /* This directive is either a plain string or the terminating nul
2459 (which isn't really a directive but it simplifies things to
2460 handle it as if it were). */
2461 dir.len = len;
2462 dir.fmtfunc = NULL;
2463
2464 if (dump_file)
2465 {
2466 fprintf (dump_file, " Directive %u at offset %zu: \"%.*s\", "
2467 "length = %zu\n",
2468 dir.dirno, (size_t)(dir.beg - info.fmtstr),
2469 (int)dir.len, dir.beg, dir.len);
2470 }
2471
2472 return len - !*str;
2473 }
2474
2475 const char *pf = pcnt + 1;
2476
2477 /* POSIX numbered argument index or zero when none. */
2478 unsigned dollar = 0;
2479
2480 /* With and precision. -1 when not specified, HOST_WIDE_INT_MIN
2481 when given by a va_list argument, and a non-negative value
2482 when specified in the format string itself. */
2483 HOST_WIDE_INT width = -1;
2484 HOST_WIDE_INT precision = -1;
2485
2486 /* Width specified via the asterisk. Need not be INTEGER_CST.
2487 For vararg functions set to void_node. */
2488 tree star_width = NULL_TREE;
2489
2490 /* Width specified via the asterisk. Need not be INTEGER_CST.
2491 For vararg functions set to void_node. */
2492 tree star_precision = NULL_TREE;
2493
2494 if (ISDIGIT (*pf))
2495 {
2496 /* This could be either a POSIX positional argument, the '0'
2497 flag, or a width, depending on what follows. Store it as
2498 width and sort it out later after the next character has
2499 been seen. */
2500 char *end;
2501 width = strtol (pf, &end, 10);
2502 pf = end;
2503 }
2504 else if ('*' == *pf)
2505 {
2506 /* Similarly to the block above, this could be either a POSIX
2507 positional argument or a width, depending on what follows. */
2508 if (*argno < gimple_call_num_args (info.callstmt))
2509 star_width = gimple_call_arg (info.callstmt, (*argno)++);
2510 else
2511 star_width = void_node;
2512 ++pf;
2513 }
2514
2515 if (*pf == '$')
2516 {
2517 /* Handle the POSIX dollar sign which references the 1-based
2518 positional argument number. */
2519 if (width != -1)
2520 dollar = width + info.argidx;
2521 else if (star_width
2522 && TREE_CODE (star_width) == INTEGER_CST)
2523 dollar = width + tree_to_shwi (star_width);
2524
2525 /* Bail when the numbered argument is out of range (it will
2526 have already been diagnosed by -Wformat). */
2527 if (dollar == 0
2528 || dollar == info.argidx
2529 || dollar > gimple_call_num_args (info.callstmt))
2530 return false;
2531
2532 --dollar;
2533
2534 star_width = NULL_TREE;
2535 width = -1;
2536 ++pf;
2537 }
2538
2539 if (dollar || !star_width)
2540 {
2541 if (width != -1)
2542 {
2543 if (width == 0)
2544 {
2545 /* The '0' that has been interpreted as a width above is
2546 actually a flag. Reset HAVE_WIDTH, set the '0' flag,
2547 and continue processing other flags. */
2548 width = -1;
2549 dir.set_flag ('0');
2550 }
2551 else if (!dollar)
2552 {
2553 /* (Non-zero) width has been seen. The next character
2554 is either a period or a digit. */
2555 goto start_precision;
2556 }
2557 }
2558 /* When either '$' has been seen, or width has not been seen,
2559 the next field is the optional flags followed by an optional
2560 width. */
2561 for ( ; ; ) {
2562 switch (*pf)
2563 {
2564 case ' ':
2565 case '0':
2566 case '+':
2567 case '-':
2568 case '#':
2569 dir.set_flag (*pf++);
2570 break;
2571
2572 default:
2573 goto start_width;
2574 }
2575 }
2576
2577 start_width:
2578 if (ISDIGIT (*pf))
2579 {
2580 char *end;
2581 width = strtol (pf, &end, 10);
2582 pf = end;
2583 }
2584 else if ('*' == *pf)
2585 {
2586 if (*argno < gimple_call_num_args (info.callstmt))
2587 star_width = gimple_call_arg (info.callstmt, (*argno)++);
2588 else
2589 {
2590 /* This is (likely) a va_list. It could also be an invalid
2591 call with insufficient arguments. */
2592 star_width = void_node;
2593 }
2594 ++pf;
2595 }
2596 else if ('\'' == *pf)
2597 {
2598 /* The POSIX apostrophe indicating a numeric grouping
2599 in the current locale. Even though it's possible to
2600 estimate the upper bound on the size of the output
2601 based on the number of digits it probably isn't worth
2602 continuing. */
2603 return 0;
2604 }
2605 }
2606
2607 start_precision:
2608 if ('.' == *pf)
2609 {
2610 ++pf;
2611
2612 if (ISDIGIT (*pf))
2613 {
2614 char *end;
2615 precision = strtol (pf, &end, 10);
2616 pf = end;
2617 }
2618 else if ('*' == *pf)
2619 {
2620 if (*argno < gimple_call_num_args (info.callstmt))
2621 star_precision = gimple_call_arg (info.callstmt, (*argno)++);
2622 else
2623 {
2624 /* This is (likely) a va_list. It could also be an invalid
2625 call with insufficient arguments. */
2626 star_precision = void_node;
2627 }
2628 ++pf;
2629 }
2630 else
2631 {
2632 /* The decimal precision or the asterisk are optional.
2633 When neither is dirified it's taken to be zero. */
2634 precision = 0;
2635 }
2636 }
2637
2638 switch (*pf)
2639 {
2640 case 'h':
2641 if (pf[1] == 'h')
2642 {
2643 ++pf;
2644 dir.modifier = FMT_LEN_hh;
2645 }
2646 else
2647 dir.modifier = FMT_LEN_h;
2648 ++pf;
2649 break;
2650
2651 case 'j':
2652 dir.modifier = FMT_LEN_j;
2653 ++pf;
2654 break;
2655
2656 case 'L':
2657 dir.modifier = FMT_LEN_L;
2658 ++pf;
2659 break;
2660
2661 case 'l':
2662 if (pf[1] == 'l')
2663 {
2664 ++pf;
2665 dir.modifier = FMT_LEN_ll;
2666 }
2667 else
2668 dir.modifier = FMT_LEN_l;
2669 ++pf;
2670 break;
2671
2672 case 't':
2673 dir.modifier = FMT_LEN_t;
2674 ++pf;
2675 break;
2676
2677 case 'z':
2678 dir.modifier = FMT_LEN_z;
2679 ++pf;
2680 break;
2681 }
2682
2683 switch (*pf)
2684 {
2685 /* Handle a sole '%' character the same as "%%" but since it's
2686 undefined prevent the result from being folded. */
2687 case '\0':
2688 --pf;
2689 res->bounded = false;
2690 /* FALLTHRU */
2691 case '%':
2692 dir.fmtfunc = format_percent;
2693 break;
2694
2695 case 'a':
2696 case 'A':
2697 case 'e':
2698 case 'E':
2699 case 'f':
2700 case 'F':
2701 case 'g':
2702 case 'G':
2703 res->floating = true;
2704 dir.fmtfunc = format_floating;
2705 break;
2706
2707 case 'd':
2708 case 'i':
2709 case 'o':
2710 case 'u':
2711 case 'x':
2712 case 'X':
2713 dir.fmtfunc = format_integer;
2714 break;
2715
2716 case 'p':
2717 /* The %p output is implementation-defined. It's possible
2718 to determine this format but due to extensions (edirially
2719 those of the Linux kernel -- see bug 78512) the first %p
2720 in the format string disables any further processing. */
2721 return false;
2722
2723 case 'n':
2724 /* %n has side-effects even when nothing is actually printed to
2725 any buffer. */
2726 info.nowrite = false;
2727 dir.fmtfunc = format_none;
2728 break;
2729
2730 case 'c':
2731 dir.fmtfunc = format_character;
2732 break;
2733
2734 case 'S':
2735 case 's':
2736 dir.fmtfunc = format_string;
2737 break;
2738
2739 default:
2740 /* Unknown conversion specification. */
2741 return 0;
2742 }
2743
2744 dir.specifier = *pf++;
2745
2746 if (star_width)
2747 {
2748 if (TREE_CODE (TREE_TYPE (star_width)) == INTEGER_TYPE)
2749 dir.set_width (star_width);
2750 else
2751 {
2752 /* Width specified by a va_list takes on the range [0, -INT_MIN]
2753 (width is the absolute value of that specified). */
2754 dir.width = HOST_WIDE_INT_MIN;
2755 }
2756 }
2757 else
2758 dir.set_width (width);
2759
2760 if (star_precision)
2761 {
2762 if (TREE_CODE (TREE_TYPE (star_precision)) == INTEGER_TYPE)
2763 dir.set_precision (star_precision);
2764 else
2765 {
2766 /* Precision specified by a va_list takes on the range [-1, INT_MAX]
2767 (unlike width, negative precision is ignored). */
2768 dir.prec = HOST_WIDE_INT_MIN;
2769 }
2770 }
2771 else
2772 dir.set_precision (precision);
2773
2774 /* Extract the argument if the directive takes one and if it's
2775 available (e.g., the function doesn't take a va_list). Treat
2776 missing arguments the same as va_list, even though they will
2777 have likely already been diagnosed by -Wformat. */
2778 if (dir.specifier != '%'
2779 && *argno < gimple_call_num_args (info.callstmt))
2780 dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
2781
2782 /* Return the length of the format directive. */
2783 dir.len = pf - pcnt;
2784
2785 if (dump_file)
2786 {
2787 fprintf (dump_file, " Directive %u at offset %zu: \"%.*s\"",
2788 dir.dirno, (size_t)(dir.beg - info.fmtstr),
2789 (int)dir.len, dir.beg);
2790 if (star_width)
2791 fprintf (dump_file, ", width = %lli", (long long)dir.width);
2792
2793 if (star_precision)
2794 fprintf (dump_file, ", precision = %lli", (long long)dir.prec);
2795
2796 fputc ('\n', dump_file);
2797 }
2798
2799 return dir.len;
2800 }
2801
2802 /* Compute the length of the output resulting from the call to a formatted
2803 output function described by INFO and store the result of the call in
2804 *RES. Issue warnings for detected past the end writes. Return true
2805 if the complete format string has been processed and *RES can be relied
2806 on, false otherwise (e.g., when a unknown or unhandled directive was seen
2807 that caused the processing to be terminated early). */
2808
2809 bool
2810 pass_sprintf_length::compute_format_length (call_info &info,
2811 format_result *res)
2812 {
2813 if (dump_file)
2814 {
2815 location_t callloc = gimple_location (info.callstmt);
2816 fprintf (dump_file, "%s:%i: ",
2817 LOCATION_FILE (callloc), LOCATION_LINE (callloc));
2818 print_generic_expr (dump_file, info.func, dump_flags);
2819
2820 fprintf (dump_file, ": objsize = %llu, fmtstr = \"%s\"\n",
2821 (unsigned long long)info.objsize, info.fmtstr);
2822 }
2823
2824 /* Reset exact, minimum, and maximum character counters. */
2825 res->number_chars = res->number_chars_min = res->number_chars_max = 0;
2826
2827 /* No directive has been seen yet so the length of output is bounded
2828 by the known range [0, 0] (with no conversion producing more than
2829 4K bytes) until determined otherwise. */
2830 res->bounded = true;
2831 res->knownrange = true;
2832 res->under4k = true;
2833 res->floating = false;
2834 res->warned = false;
2835
2836 /* 1-based directive counter. */
2837 unsigned dirno = 1;
2838
2839 /* The variadic argument counter. */
2840 unsigned argno = info.argidx;
2841
2842 for (const char *pf = info.fmtstr; ; ++dirno)
2843 {
2844 directive dir = directive ();
2845 dir.dirno = dirno;
2846
2847 size_t n = parse_directive (info, dir, res, pf, &argno);
2848
2849 if (dir.fmtfunc)
2850 {
2851 /* Return failure if the format function fails. */
2852 if (!format_directive (info, res, dir))
2853 return false;
2854 }
2855 else
2856 {
2857 /* Add the number of bytes between the end of the last directive
2858 and either the next if one exists, or the end of the format
2859 string. */
2860 add_bytes (info, pf, n ? pf + n : NULL, res);
2861 }
2862
2863 /* Return success the directive is zero bytes long and it's
2864 the last think in the format string (i.e., it's the terminating
2865 nul, which isn't really a directive but handling it as one makes
2866 things simpler). */
2867 if (!n)
2868 return *pf == '\0';
2869
2870 pf += n;
2871 }
2872
2873 /* Complete format string was processed (with or without warnings). */
2874 return true;
2875 }
2876
2877 /* Return the size of the object referenced by the expression DEST if
2878 available, or -1 otherwise. */
2879
2880 static unsigned HOST_WIDE_INT
2881 get_destination_size (tree dest)
2882 {
2883 /* Initialize object size info before trying to compute it. */
2884 init_object_sizes ();
2885
2886 /* Use __builtin_object_size to determine the size of the destination
2887 object. When optimizing, determine the smallest object (such as
2888 a member array as opposed to the whole enclosing object), otherwise
2889 use type-zero object size to determine the size of the enclosing
2890 object (the function fails without optimization in this type). */
2891
2892 int ost = optimize > 0;
2893 unsigned HOST_WIDE_INT size;
2894 if (compute_builtin_object_size (dest, ost, &size))
2895 return size;
2896
2897 return HOST_WIDE_INT_M1U;
2898 }
2899
2900 /* Given a suitable result RES of a call to a formatted output function
2901 described by INFO, substitute the result for the return value of
2902 the call. The result is suitable if the number of bytes it represents
2903 is known and exact. A result that isn't suitable for substitution may
2904 have its range set to the range of return values, if that is known.
2905 Return true if the call is removed and gsi_next should not be performed
2906 in the caller. */
2907
2908 static bool
2909 try_substitute_return_value (gimple_stmt_iterator *gsi,
2910 const pass_sprintf_length::call_info &info,
2911 const format_result &res)
2912 {
2913 if (!res.bounded)
2914 return false;
2915
2916 tree lhs = gimple_get_lhs (info.callstmt);
2917
2918 /* Set to true when the entire call has been removed. */
2919 bool removed = false;
2920
2921 /* The minumum return value. */
2922 unsigned HOST_WIDE_INT minretval = res.number_chars_min;
2923
2924 /* The maximum return value. */
2925 unsigned HOST_WIDE_INT maxretval = res.number_chars_max;
2926
2927 /* Adjust the number of bytes which includes the terminating nul
2928 to reflect the return value of the function which does not.
2929 Because the valid range of the function is [INT_MIN, INT_MAX],
2930 a valid range before the adjustment below is [0, INT_MAX + 1]
2931 (the functions only return negative values on error or undefined
2932 behavior). */
2933 if (minretval <= target_int_max () + 1)
2934 --minretval;
2935 if (maxretval <= target_int_max () + 1)
2936 --maxretval;
2937
2938 /* Avoid the return value optimization when the behavior of the call
2939 is undefined either because any directive may have produced 4K or
2940 more of output, or the return value exceeds INT_MAX, or because
2941 the output overflows the destination object (but leave it enabled
2942 when the function is bounded because then the behavior is well-
2943 defined). */
2944 if (res.under4k
2945 && minretval == maxretval
2946 && (info.bounded || minretval < info.objsize)
2947 && minretval <= target_int_max ()
2948 /* Not prepared to handle possibly throwing calls here; they shouldn't
2949 appear in non-artificial testcases, except when the __*_chk routines
2950 are badly declared. */
2951 && !stmt_ends_bb_p (info.callstmt))
2952 {
2953 tree cst = build_int_cst (integer_type_node, minretval);
2954
2955 if (lhs == NULL_TREE
2956 && info.nowrite)
2957 {
2958 /* Remove the call to the bounded function with a zero size
2959 (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs. */
2960 unlink_stmt_vdef (info.callstmt);
2961 gsi_remove (gsi, true);
2962 removed = true;
2963 }
2964 else if (info.nowrite)
2965 {
2966 /* Replace the call to the bounded function with a zero size
2967 (e.g., snprintf(0, 0, "%i", 123) with the constant result
2968 of the function. */
2969 if (!update_call_from_tree (gsi, cst))
2970 gimplify_and_update_call_from_tree (gsi, cst);
2971 gimple *callstmt = gsi_stmt (*gsi);
2972 update_stmt (callstmt);
2973 }
2974 else if (lhs)
2975 {
2976 /* Replace the left-hand side of the call with the constant
2977 result of the formatted function. */
2978 gimple_call_set_lhs (info.callstmt, NULL_TREE);
2979 gimple *g = gimple_build_assign (lhs, cst);
2980 gsi_insert_after (gsi, g, GSI_NEW_STMT);
2981 update_stmt (info.callstmt);
2982 }
2983
2984 if (dump_file)
2985 {
2986 if (removed)
2987 fprintf (dump_file, " Removing call statement.");
2988 else
2989 {
2990 fprintf (dump_file, " Substituting ");
2991 print_generic_expr (dump_file, cst, dump_flags);
2992 fprintf (dump_file, " for %s.\n",
2993 info.nowrite ? "statement" : "return value");
2994 }
2995 }
2996 }
2997 else if (lhs)
2998 {
2999 bool setrange = false;
3000
3001 if ((info.bounded || maxretval < info.objsize)
3002 && res.under4k
3003 && (minretval < target_int_max ()
3004 && maxretval < target_int_max ()))
3005 {
3006 /* If the result is in a valid range bounded by the size of
3007 the destination set it so that it can be used for subsequent
3008 optimizations. */
3009 int prec = TYPE_PRECISION (integer_type_node);
3010
3011 wide_int min = wi::shwi (minretval, prec);
3012 wide_int max = wi::shwi (maxretval, prec);
3013 set_range_info (lhs, VR_RANGE, min, max);
3014
3015 setrange = true;
3016 }
3017
3018 if (dump_file)
3019 {
3020 const char *inbounds
3021 = (minretval < info.objsize
3022 ? (maxretval < info.objsize
3023 ? "in" : "potentially out-of")
3024 : "out-of");
3025
3026 const char *what = setrange ? "Setting" : "Discarding";
3027 if (minretval != maxretval)
3028 fprintf (dump_file,
3029 " %s %s-bounds return value range [%llu, %llu].\n",
3030 what, inbounds,
3031 (unsigned long long)minretval,
3032 (unsigned long long)maxretval);
3033 else
3034 fprintf (dump_file, " %s %s-bounds return value %llu.\n",
3035 what, inbounds, (unsigned long long)minretval);
3036 }
3037 }
3038
3039 if (dump_file)
3040 fputc ('\n', dump_file);
3041
3042 return removed;
3043 }
3044
3045 /* Determine if a GIMPLE CALL is to one of the sprintf-like built-in
3046 functions and if so, handle it. Return true if the call is removed
3047 and gsi_next should not be performed in the caller. */
3048
3049 bool
3050 pass_sprintf_length::handle_gimple_call (gimple_stmt_iterator *gsi)
3051 {
3052 call_info info = call_info ();
3053
3054 info.callstmt = gsi_stmt (*gsi);
3055 if (!gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
3056 return false;
3057
3058 info.func = gimple_call_fndecl (info.callstmt);
3059 info.fncode = DECL_FUNCTION_CODE (info.func);
3060
3061 /* The size of the destination as in snprintf(dest, size, ...). */
3062 unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
3063
3064 /* The size of the destination determined by __builtin_object_size. */
3065 unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
3066
3067 /* Buffer size argument number (snprintf and vsnprintf). */
3068 unsigned HOST_WIDE_INT idx_dstsize = HOST_WIDE_INT_M1U;
3069
3070 /* Object size argument number (snprintf_chk and vsnprintf_chk). */
3071 unsigned HOST_WIDE_INT idx_objsize = HOST_WIDE_INT_M1U;
3072
3073 /* Format string argument number (valid for all functions). */
3074 unsigned idx_format;
3075
3076 switch (info.fncode)
3077 {
3078 case BUILT_IN_SPRINTF:
3079 // Signature:
3080 // __builtin_sprintf (dst, format, ...)
3081 idx_format = 1;
3082 info.argidx = 2;
3083 break;
3084
3085 case BUILT_IN_SPRINTF_CHK:
3086 // Signature:
3087 // __builtin___sprintf_chk (dst, ost, objsize, format, ...)
3088 idx_objsize = 2;
3089 idx_format = 3;
3090 info.argidx = 4;
3091 break;
3092
3093 case BUILT_IN_SNPRINTF:
3094 // Signature:
3095 // __builtin_snprintf (dst, size, format, ...)
3096 idx_dstsize = 1;
3097 idx_format = 2;
3098 info.argidx = 3;
3099 info.bounded = true;
3100 break;
3101
3102 case BUILT_IN_SNPRINTF_CHK:
3103 // Signature:
3104 // __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
3105 idx_dstsize = 1;
3106 idx_objsize = 3;
3107 idx_format = 4;
3108 info.argidx = 5;
3109 info.bounded = true;
3110 break;
3111
3112 case BUILT_IN_VSNPRINTF:
3113 // Signature:
3114 // __builtin_vsprintf (dst, size, format, va)
3115 idx_dstsize = 1;
3116 idx_format = 2;
3117 info.argidx = -1;
3118 info.bounded = true;
3119 break;
3120
3121 case BUILT_IN_VSNPRINTF_CHK:
3122 // Signature:
3123 // __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
3124 idx_dstsize = 1;
3125 idx_objsize = 3;
3126 idx_format = 4;
3127 info.argidx = -1;
3128 info.bounded = true;
3129 break;
3130
3131 case BUILT_IN_VSPRINTF:
3132 // Signature:
3133 // __builtin_vsprintf (dst, format, va)
3134 idx_format = 1;
3135 info.argidx = -1;
3136 break;
3137
3138 case BUILT_IN_VSPRINTF_CHK:
3139 // Signature:
3140 // __builtin___vsprintf_chk (dst, ost, objsize, format, va)
3141 idx_format = 3;
3142 idx_objsize = 2;
3143 info.argidx = -1;
3144 break;
3145
3146 default:
3147 return false;
3148 }
3149
3150 /* Set the global warning level for this function. */
3151 warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
3152
3153 /* The first argument is a pointer to the destination. */
3154 tree dstptr = gimple_call_arg (info.callstmt, 0);
3155
3156 info.format = gimple_call_arg (info.callstmt, idx_format);
3157
3158 if (idx_dstsize == HOST_WIDE_INT_M1U)
3159 {
3160 /* For non-bounded functions like sprintf, determine the size
3161 of the destination from the object or pointer passed to it
3162 as the first argument. */
3163 dstsize = get_destination_size (dstptr);
3164 }
3165 else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
3166 {
3167 /* For bounded functions try to get the size argument. */
3168
3169 if (TREE_CODE (size) == INTEGER_CST)
3170 {
3171 dstsize = tree_to_uhwi (size);
3172 /* No object can be larger than SIZE_MAX bytes (half the address
3173 space) on the target.
3174 The functions are defined only for output of at most INT_MAX
3175 bytes. Specifying a bound in excess of that limit effectively
3176 defeats the bounds checking (and on some implementations such
3177 as Solaris cause the function to fail with EINVAL). */
3178 if (dstsize > target_size_max () / 2)
3179 {
3180 /* Avoid warning if -Wstringop-overflow is specified since
3181 it also warns for the same thing though only for the
3182 checking built-ins. */
3183 if ((idx_objsize == HOST_WIDE_INT_M1U
3184 || !warn_stringop_overflow))
3185 warning_at (gimple_location (info.callstmt), info.warnopt (),
3186 "specified bound %wu exceeds maximum object size "
3187 "%wu",
3188 dstsize, target_size_max () / 2);
3189 }
3190 else if (dstsize > target_int_max ())
3191 warning_at (gimple_location (info.callstmt), info.warnopt (),
3192 "specified bound %wu exceeds %<INT_MAX %>",
3193 dstsize);
3194 }
3195 else if (TREE_CODE (size) == SSA_NAME)
3196 {
3197 /* Try to determine the range of values of the argument
3198 and use the greater of the two at -Wformat-level 1 and
3199 the smaller of them at level 2. */
3200 wide_int min, max;
3201 enum value_range_type range_type
3202 = get_range_info (size, &min, &max);
3203 if (range_type == VR_RANGE)
3204 {
3205 dstsize
3206 = (warn_level < 2
3207 ? wi::fits_uhwi_p (max) ? max.to_uhwi () : max.to_shwi ()
3208 : wi::fits_uhwi_p (min) ? min.to_uhwi () : min.to_shwi ());
3209 }
3210 }
3211 }
3212
3213 if (idx_objsize != HOST_WIDE_INT_M1U)
3214 if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
3215 if (tree_fits_uhwi_p (size))
3216 objsize = tree_to_uhwi (size);
3217
3218 if (info.bounded && !dstsize)
3219 {
3220 /* As a special case, when the explicitly specified destination
3221 size argument (to a bounded function like snprintf) is zero
3222 it is a request to determine the number of bytes on output
3223 without actually producing any. Pretend the size is
3224 unlimited in this case. */
3225 info.objsize = HOST_WIDE_INT_MAX;
3226 info.nowrite = true;
3227 }
3228 else
3229 {
3230 /* For calls to non-bounded functions or to those of bounded
3231 functions with a non-zero size, warn if the destination
3232 pointer is null. */
3233 if (integer_zerop (dstptr))
3234 {
3235 /* This is diagnosed with -Wformat only when the null is a constant
3236 pointer. The warning here diagnoses instances where the pointer
3237 is not constant. */
3238 location_t loc = gimple_location (info.callstmt);
3239 warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
3240 info.warnopt (), "null destination pointer");
3241 return false;
3242 }
3243
3244 /* Set the object size to the smaller of the two arguments
3245 of both have been specified and they're not equal. */
3246 info.objsize = dstsize < objsize ? dstsize : objsize;
3247
3248 if (info.bounded
3249 && dstsize < target_size_max () / 2 && objsize < dstsize
3250 /* Avoid warning if -Wstringop-overflow is specified since
3251 it also warns for the same thing though only for the
3252 checking built-ins. */
3253 && (idx_objsize == HOST_WIDE_INT_M1U
3254 || !warn_stringop_overflow))
3255 {
3256 warning_at (gimple_location (info.callstmt), info.warnopt (),
3257 "specified bound %wu exceeds the size %wu "
3258 "of the destination object", dstsize, objsize);
3259 }
3260 }
3261
3262 if (integer_zerop (info.format))
3263 {
3264 /* This is diagnosed with -Wformat only when the null is a constant
3265 pointer. The warning here diagnoses instances where the pointer
3266 is not constant. */
3267 location_t loc = gimple_location (info.callstmt);
3268 warning_at (EXPR_LOC_OR_LOC (info.format, loc),
3269 info.warnopt (), "null format string");
3270 return false;
3271 }
3272
3273 info.fmtstr = get_format_string (info.format, &info.fmtloc);
3274 if (!info.fmtstr)
3275 return false;
3276
3277 /* The result is the number of bytes output by the formatted function,
3278 including the terminating NUL. */
3279 format_result res = format_result ();
3280
3281 bool success = compute_format_length (info, &res);
3282
3283 /* When optimizing and the printf return value optimization is enabled,
3284 attempt to substitute the computed result for the return value of
3285 the call. Avoid this optimization when -frounding-math is in effect
3286 and the format string contains a floating point directive. */
3287 if (success
3288 && optimize > 0
3289 && flag_printf_return_value
3290 && (!flag_rounding_math || !res.floating))
3291 return try_substitute_return_value (gsi, info, res);
3292
3293 return false;
3294 }
3295
3296 /* Execute the pass for function FUN. */
3297
3298 unsigned int
3299 pass_sprintf_length::execute (function *fun)
3300 {
3301 basic_block bb;
3302 FOR_EACH_BB_FN (bb, fun)
3303 {
3304 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); )
3305 {
3306 /* Iterate over statements, looking for function calls. */
3307 gimple *stmt = gsi_stmt (si);
3308
3309 if (is_gimple_call (stmt) && handle_gimple_call (&si))
3310 /* If handle_gimple_call returns true, the iterator is
3311 already pointing to the next statement. */
3312 continue;
3313
3314 gsi_next (&si);
3315 }
3316 }
3317
3318 /* Clean up object size info. */
3319 fini_object_sizes ();
3320
3321 return 0;
3322 }
3323
3324 } /* Unnamed namespace. */
3325
3326 /* Return a pointer to a pass object newly constructed from the context
3327 CTXT. */
3328
3329 gimple_opt_pass *
3330 make_pass_sprintf_length (gcc::context *ctxt)
3331 {
3332 return new pass_sprintf_length (ctxt);
3333 }