From bbcbd744b80c2cd40d7ef41d32dbd6ee7400701f Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Fri, 17 Aug 2018 04:01:14 +0000 Subject: [PATCH] re PR tree-optimization/86853 (sprintf optimization for wide strings doesn't account for conversion failure) gcc/ChangeLog: PR tree-optimization/86853 * gimple-ssa-sprintf.c (struct format_result): Rename member. (struct fmtresult): Add member and initialize it in ctors. (format_character): Handle %C. Extend range to NUL. Set MAYFAIL. (format_string): Handle %S the same as %ls. Set MAYFAIL. (format_directive): Set POSUNDER4K when MAYFAIL is set. (parse_directive): Handle %C same as %c. (sprintf_dom_walker::compute_format_length): Adjust. (is_call_safe): Adjust. gcc/testsuite/ChangeLog: PR tree-optimization/86853 * gcc.dg/tree-ssa/builtin-sprintf-10.c: New test. * gcc.dg/tree-ssa/builtin-sprintf-11.c: New test. * gcc.dg/tree-ssa/builtin-sprintf-warn-18.c: Adjust. From-SVN: r263612 --- gcc/ChangeLog | 12 ++ gcc/gimple-ssa-sprintf.c | 73 ++++++++--- gcc/testsuite/ChangeLog | 7 ++ .../gcc.dg/tree-ssa/builtin-sprintf-10.c | 119 ++++++++++++++++++ .../gcc.dg/tree-ssa/builtin-sprintf-11.c | 65 ++++++++++ .../gcc.dg/tree-ssa/builtin-sprintf-warn-18.c | 5 +- 6 files changed, 259 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-10.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-11.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 897ec9cc91f..9af07fc3a2f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2018-08-16 Martin Sebor + + PR tree-optimization/86853 + * gimple-ssa-sprintf.c (struct format_result): Rename member. + (struct fmtresult): Add member and initialize it in ctors. + (format_character): Handle %C. Extend range to NUL. Set MAYFAIL. + (format_string): Handle %S the same as %ls. Set MAYFAIL. + (format_directive): Set POSUNDER4K when MAYFAIL is set. + (parse_directive): Handle %C same as %c. + (sprintf_dom_walker::compute_format_length): Adjust. + (is_call_safe): Adjust. + 2018-08-16 Bernd Edlinger * builtins.c (c_strlen): Add new parameter eltsize. Use it diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c index 2431b52e652..b5e1a08a88b 100644 --- a/gcc/gimple-ssa-sprintf.c +++ b/gcc/gimple-ssa-sprintf.c @@ -211,12 +211,14 @@ struct format_result the return value optimization. */ bool knownrange; - /* True if no individual directive resulted in more than 4095 bytes - of output (the total NUMBER_CHARS_{MIN,MAX} might be greater). - Implementations are not required to handle directives that produce - more than 4K bytes (leading to undefined behavior) and so when one - is found it disables the return value optimization. */ - bool under4k; + /* True if no individual directive could fail or result in more than + 4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be + greater). Implementations are not required to handle directives + that produce more than 4K bytes (leading to undefined behavior) + and so when one is found it disables the return value optimization. + Similarly, directives that can fail (such as wide character + directives) disable the optimization. */ + bool posunder4k; /* True when a floating point directive has been seen in the format string. */ @@ -651,7 +653,7 @@ struct fmtresult fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX) : argmin (), argmax (), knownrange (min < HOST_WIDE_INT_MAX), - nullp () + mayfail (), nullp () { range.min = min; range.max = min; @@ -665,7 +667,7 @@ struct fmtresult unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX) : argmin (), argmax (), knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX), - nullp () + mayfail (), nullp () { range.min = min; range.max = max; @@ -695,6 +697,10 @@ struct fmtresult heuristics that depend on warning levels. */ bool knownrange; + /* True for a directive that may fail (such as wide character + directives). */ + bool mayfail; + /* True when the argument is a null pointer. */ bool nullp; }; @@ -2210,7 +2216,8 @@ format_character (const directive &dir, tree arg, vr_values *vr_values) res.knownrange = true; - if (dir.modifier == FMT_LEN_l) + if (dir.specifier == 'C' + || dir.modifier == FMT_LEN_l) { /* A wide character can result in as few as zero bytes. */ res.range.min = 0; @@ -2225,13 +2232,20 @@ format_character (const directive &dir, tree arg, vr_values *vr_values) res.range.likely = 0; res.range.unlikely = 0; } - else if (min > 0 && min < 128) + else if (min >= 0 && min < 128) { + /* Be conservative if the target execution character set + is not a 1-to-1 mapping to the source character set or + if the source set is not ASCII. */ + bool one_2_one_ascii + = (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97); + /* A wide character in the ASCII range most likely results in a single byte, and only unlikely in up to MB_LEN_MAX. */ - res.range.max = 1; + res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();; res.range.likely = 1; res.range.unlikely = target_mb_len_max (); + res.mayfail = !one_2_one_ascii; } else { @@ -2240,6 +2254,8 @@ format_character (const directive &dir, tree arg, vr_values *vr_values) res.range.max = target_mb_len_max (); res.range.likely = 2; res.range.unlikely = res.range.max; + /* Converting such a character may fail. */ + res.mayfail = true; } } else @@ -2249,6 +2265,7 @@ format_character (const directive &dir, tree arg, vr_values *vr_values) res.range.max = target_mb_len_max (); res.range.likely = 2; res.range.unlikely = res.range.max; + res.mayfail = true; } } else @@ -2285,7 +2302,8 @@ format_string (const directive &dir, tree arg, vr_values *) /* A '%s' directive with a string argument with constant length. */ res.range = slen.range; - if (dir.modifier == FMT_LEN_l) + if (dir.specifier == 'S' + || dir.modifier == FMT_LEN_l) { /* In the worst case the length of output of a wide string S is bounded by MB_LEN_MAX * wcslen (S). */ @@ -2311,6 +2329,10 @@ format_string (const directive &dir, tree arg, vr_values *) /* Even a non-empty wide character string need not convert into any bytes. */ res.range.min = 0; + + /* A non-empty wide character conversion may fail. */ + if (slen.range.max > 0) + res.mayfail = true; } else { @@ -2349,7 +2371,8 @@ format_string (const directive &dir, tree arg, vr_values *) at level 2. This result is adjust upward for width (if it's specified). */ - if (dir.modifier == FMT_LEN_l) + if (dir.specifier == 'S' + || dir.modifier == FMT_LEN_l) { /* A wide character converts to as few as zero bytes. */ slen.range.min = 0; @@ -2361,6 +2384,10 @@ format_string (const directive &dir, tree arg, vr_values *) if (slen.range.likely < target_int_max ()) slen.range.unlikely *= target_mb_len_max (); + + /* A non-empty wide character conversion may fail. */ + if (slen.range.max > 0) + res.mayfail = true; } res.range = slen.range; @@ -2913,11 +2940,14 @@ format_directive (const sprintf_dom_walker::call_info &info, of 4095 bytes required to be supported? */ bool minunder4k = fmtres.range.min < 4096; bool maxunder4k = fmtres.range.max < 4096; - /* Clear UNDER4K in the overall result if the maximum has exceeded - the 4k (this is necessary to avoid the return valuye optimization + /* Clear POSUNDER4K in the overall result if the maximum has exceeded + the 4k (this is necessary to avoid the return value optimization that may not be safe in the maximum case). */ if (!maxunder4k) - res->under4k = false; + res->posunder4k = false; + /* Also clear POSUNDER4K if the directive may fail. */ + if (fmtres.mayfail) + res->posunder4k = false; if (!warned /* Only warn at level 2. */ @@ -3363,12 +3393,15 @@ parse_directive (sprintf_dom_walker::call_info &info, dir.fmtfunc = format_none; break; + case 'C': case 'c': + /* POSIX wide character and C/POSIX narrow character. */ dir.fmtfunc = format_character; break; case 'S': case 's': + /* POSIX wide string and C/POSIX narrow character string. */ dir.fmtfunc = format_string; break; @@ -3526,10 +3559,10 @@ sprintf_dom_walker::compute_format_length (call_info &info, res->range.min = res->range.max = 0; /* No directive has been seen yet so the length of output is bounded - by the known range [0, 0] (with no conversion producing more than - 4K bytes) until determined otherwise. */ + by the known range [0, 0] (with no conversion resulting in a failure + or producing more than 4K bytes) until determined otherwise. */ res->knownrange = true; - res->under4k = true; + res->posunder4k = true; res->floating = false; res->warned = false; @@ -3597,7 +3630,7 @@ is_call_safe (const sprintf_dom_walker::call_info &info, const format_result &res, bool under4k, unsigned HOST_WIDE_INT retval[2]) { - if (under4k && !res.under4k) + if (under4k && !res.posunder4k) return false; /* The minimum return value. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ad2de8468ec..abe919ebbb3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2018-08-16 Martin Sebor + + PR tree-optimization/86853 + * gcc.dg/tree-ssa/builtin-sprintf-10.c: New test. + * gcc.dg/tree-ssa/builtin-sprintf-11.c: New test. + * gcc.dg/tree-ssa/builtin-sprintf-warn-18.c: Adjust. + 2018-08-16 David Malcolm * gcc.dg/missing-header-fixit-3.c: New test. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-10.c b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-10.c new file mode 100644 index 00000000000..837b6f40c99 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-10.c @@ -0,0 +1,119 @@ +/* PR tree-optimization/86853 - sprintf optimization for wide strings + doesn't account for conversion failure​ + { dg-do compile } + { dg-options "-O2 -Wall -fdump-tree-optimized" } */ + +typedef __SIZE_TYPE__ size_t; +typedef __WCHAR_TYPE__ wchar_t; + +extern int snprintf (char*, size_t, const char*, ...); + +#define CONCAT(x, y) x ## y +#define CAT(x, y) CONCAT (x, y) +#define FAILNAME(name) CAT (call_ ## name ##_on_line_, __LINE__) + +#define FAIL(name) do { \ + extern void FAILNAME (name) (void); \ + FAILNAME (name)(); \ + } while (0) + +/* Macro to emit a call to funcation named + call_in_true_branch_not_eliminated_on_line_NNN() + for each call that's expected to be eliminated. The dg-final + scan-tree-dump-time directive at the bottom of the test verifies + that no such call appears in output. */ +#define ELIM(expr) \ + if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0 + +/* Macro to emit a call to a function named + call_made_in_{true,false}_branch_on_line_NNN() + for each call that's expected to be retained. The dg-final + scan-tree-dump-time directive at the bottom of the test verifies + that the expected number of both kinds of calls appears in output + (a pair for each line with the invocation of the KEEP() macro. */ +#define KEEP(expr) \ + if (expr) \ + FAIL (made_in_true_branch); \ + else \ + FAIL (made_in_false_branch) + + +extern wchar_t wc; +extern wchar_t ws[]; + +const wchar_t ws3[] = L"12\xff"; + +/* Verify that the following calls are eliminated. */ + +void elim_wide_char_call (void) +{ + ELIM (snprintf (0, 0, "%lc", L'\0')); + ELIM (snprintf (0, 0, "%lc", L'1')); + ELIM (snprintf (0, 0, "%lc", L'a')); + ELIM (snprintf (0, 0, "%lc", ws3[0])); + ELIM (snprintf (0, 0, "%lc", ws3[1])); + ELIM (snprintf (0, 0, "%lc", ws3[3])); + + ELIM (snprintf (0, 0, "%C", L'\0')); + ELIM (snprintf (0, 0, "%C", L'9')); + ELIM (snprintf (0, 0, "%C", L'z')); + ELIM (snprintf (0, 0, "%C", ws3[0])); + ELIM (snprintf (0, 0, "%C", ws3[1])); + ELIM (snprintf (0, 0, "%C", ws3[3])); + + /* Verify an unknown character value within the ASCII range. */ + if (wc < 1 || 127 < wc) + wc = 0; + + ELIM (snprintf (0, 0, "%C", wc)); + ELIM (snprintf (0, 0, "%C", wc)); +} + +void elim_wide_string_call (void) +{ + ELIM (snprintf (0, 0, "%ls", L"")); +} + + +#line 1000 + + /* Verify that the following calls are not eliminated. */ + +void keep_wide_char_call (void) +{ + KEEP (snprintf (0, 0, "%lc", L'\xff')); + KEEP (snprintf (0, 0, "%lc", L'\xffff')); + KEEP (snprintf (0, 0, "%lc", wc)); + KEEP (snprintf (0, 0, "%lc", ws3[2])); + + KEEP (snprintf (0, 0, "%C", L'\xff')); + KEEP (snprintf (0, 0, "%C", L'\xffff')); + KEEP (snprintf (0, 0, "%C", wc)); + KEEP (snprintf (0, 0, "%C", ws3[2])); + + /* Verify an unknown character value outside the ASCII range + (with 128 being the only one). */ + if (wc < 32 || 128 < wc) + wc = 32; + + KEEP (snprintf (0, 0, "%lc", wc)); + KEEP (snprintf (0, 0, "%C", wc)); +} + +void keep_wide_string_call (void) +{ + KEEP (snprintf (0, 0, "%ls", L"\xff")); + KEEP (snprintf (0, 0, "%ls", L"\xffff")); + KEEP (snprintf (0, 0, "%ls", ws)); + KEEP (snprintf (0, 0, "%ls", ws3)); + + KEEP (snprintf (0, 0, "%S", L"\xff")); + KEEP (snprintf (0, 0, "%S", L"\xffff")); + KEEP (snprintf (0, 0, "%S", ws)); + KEEP (snprintf (0, 0, "%S", ws3)); +} + +/* { dg-final { scan-tree-dump-times "call_made_in_true_branch_not_eliminated" 0 "optimized" } } + + { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 18 "optimized" } } + { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 18 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-11.c b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-11.c new file mode 100644 index 00000000000..e1effe68341 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-11.c @@ -0,0 +1,65 @@ +/* PR tree-optimization/86853 - sprintf optimization for wide strings + doesn't account for conversion failure​ + Exercise wide character handling in an EBCDIC execution charset. + { dg-do compile } + { dg-require-iconv "IBM1047" } + { dg-options "-O2 -Wall -Wno-format -Wformat-overflow -fexec-charset=IBM1047 -fdump-tree-optimized" } */ + +typedef __WCHAR_TYPE__ wchar_t; + +/* Exercise wide character constants. */ + +void test_lc_cst (void) +{ + /* IBM1047 0x30 maps to ASCII 0x94 which neeed not be representable + in the current locale (and the snprintf() call may fail). Verify + that snprintf() doesn't assume it is. */ + wchar_t wc = 0x30; + + int n = __builtin_snprintf (0, 0, "%lc", wc); + if (n < 0) + __builtin_abort (); +} + +void test_C_cst (void) +{ + /* Same as above but for %C and 0x31 which maps to 0x95. */ + wchar_t wc = 0x31; + + int n = __builtin_snprintf (0, 0, "%C", wc); + if (n < 0) + __builtin_abort (); +} + +/* Exercise wide character values in known ranges. */ + +void test_lc_range (wchar_t wc) +{ + if (wc < 0x40 || 0x49 < wc) + wc = 0x40; + + int n = __builtin_snprintf (0, 0, "%lc", wc); + if (n < 0) + __builtin_abort (); +} + +void test_C_range (wchar_t wc) +{ + if (wc < 0x41 || 0x48 < wc) + wc = 0x41; + + int n = __builtin_snprintf (0, 0, "%C", wc); + if (n < 0) + __builtin_abort (); +} + +/* Exercise unknown wide character values. */ + +void test_var (wchar_t wc) +{ + int n = __builtin_snprintf (0, 0, "%lc", wc); + if (n < 0) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump-times "abort" 5 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c index 961fa487bb9..7064f8a6d58 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c @@ -18,7 +18,7 @@ void test_characters () T ("%A", 0.0); /* { dg-warning ".%A. directive writing between 6 and 20 " } */ T ("%a", 0.0); /* { dg-warning ".%a. directive writing between 6 and 20 " } */ - T ("%C", 'a'); /* { dg-warning ".%C. directive writing 1 byte" "bug 80537" { xfail *-*-* } } */ + T ("%C", L'a'); /* { dg-warning ".%C. directive writing up to 6 bytes" } */ T ("%c", 'a'); /* { dg-warning ".%c. directive writing 1 byte" } */ T ("%d", 12); /* { dg-warning ".%d. directive writing 2 bytes" } */ @@ -93,7 +93,8 @@ void test_characters () T ("%x", 1234); /* { dg-warning ".%x. directive writing 3 bytes" } */ T ("%#X", 1235); /* { dg-warning ".%#X. directive writing 5 bytes" } */ - T ("%S", L"1"); /* { dg-warning ".%S. directive writing 1 byte" } */ + T ("%S", L"1"); /* { dg-warning ".%S. directive writing up to 6 bytes" } */ + T ("%ls", L"12"); /* { dg-warning ".%ls. directive writing up to 12 bytes" } */ T ("%-s", "1"); /* { dg-warning ".%-s. directive writing 1 byte" } */ /* Verify that characters in the source character set appear in -- 2.30.2