From: David Malcolm Date: Thu, 15 Dec 2016 18:05:05 +0000 (+0000) Subject: Fix use-after-free lexing unterminated raw strings (PR preprocessor/78811) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a3998c2fb1630638db83defcd2c680111d65b973;p=gcc.git Fix use-after-free lexing unterminated raw strings (PR preprocessor/78811) gcc/ChangeLog: PR preprocessor/78680 PR preprocessor/78811 * input.c (struct selftest::lexer_test): Add field m_implicitly_expect_EOF. (selftest::lexer_error_sink): New class. (selftest::lexer_error_sink::s_singleton): New global. (selftest::lexer_test::lexer_test): Initialize new field "m_implicitly_expect_EOF". (selftest::lexer_test::~lexer_test): Conditionalize the check for the EOF token on the new field. (selftest::test_lexer_string_locations_raw_string_unterminated): New function. (selftest::input_c_tests): Call the new test. libcpp/ChangeLog: PR preprocessor/78680 PR preprocessor/78811 * lex.c (_cpp_lex_direct): Only determine the end-location of the token and build a range for non-reserved start locations. Do not do it for EOF tokens. From-SVN: r243721 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0d31d0d7a8d..f8b7fed4b1f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2016-12-15 David Malcolm + + PR preprocessor/78680 + PR preprocessor/78811 + * input.c (struct selftest::lexer_test): Add field + m_implicitly_expect_EOF. + (selftest::lexer_error_sink): New class. + (selftest::lexer_error_sink::s_singleton): New global. + (selftest::lexer_test::lexer_test): Initialize new field + "m_implicitly_expect_EOF". + (selftest::lexer_test::~lexer_test): Conditionalize the + check for the EOF token on the new field. + (selftest::test_lexer_string_locations_raw_string_unterminated): + New function. + (selftest::input_c_tests): Call the new test. + 2016-12-15 Wilco Dijkstra * config/arm/arm.h (TARGET_BACKTRACE): Use crtl->is_leaf. diff --git a/gcc/input.c b/gcc/input.c index 1c7228a0080..dcb51016a04 100644 --- a/gcc/input.c +++ b/gcc/input.c @@ -1985,6 +1985,7 @@ struct lexer_test cpp_reader_ptr m_parser; temp_source_file m_tempfile; string_concat_db m_concats; + bool m_implicitly_expect_EOF; }; /* Use an EBCDIC encoding for the execution charset, specifically @@ -2046,6 +2047,54 @@ class ebcdic_execution_charset : public lexer_test_options ebcdic_execution_charset *ebcdic_execution_charset::s_singleton; +/* A lexer_test_options subclass that records a list of error + messages emitted by the lexer. */ + +class lexer_error_sink : public lexer_test_options +{ + public: + lexer_error_sink () + { + gcc_assert (s_singleton == NULL); + s_singleton = this; + } + ~lexer_error_sink () + { + gcc_assert (s_singleton == this); + s_singleton = NULL; + + int i; + char *str; + FOR_EACH_VEC_ELT (m_errors, i, str) + free (str); + } + + void apply (lexer_test &test) FINAL OVERRIDE + { + cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); + callbacks->error = on_error; + } + + static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED, + int level ATTRIBUTE_UNUSED, + int reason ATTRIBUTE_UNUSED, + rich_location *richloc ATTRIBUTE_UNUSED, + const char *msgid, va_list *ap) + ATTRIBUTE_FPTR_PRINTF(5,0) + { + char *msg = xvasprintf (msgid, *ap); + s_singleton->m_errors.safe_push (msg); + return true; + } + + auto_vec m_errors; + + private: + static lexer_error_sink *s_singleton; +}; + +lexer_error_sink *lexer_error_sink::s_singleton; + /* Constructor. Override line_table with a new instance based on CASE_, and write CONTENT to a tempfile. Create a cpp_reader, and use it to start parsing the tempfile. */ @@ -2056,7 +2105,8 @@ lexer_test::lexer_test (const line_table_case &case_, const char *content, m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)), /* Create a tempfile and write the text to it. */ m_tempfile (SELFTEST_LOCATION, ".c", content), - m_concats () + m_concats (), + m_implicitly_expect_EOF (true) { if (options) options->apply (*this); @@ -2069,16 +2119,19 @@ lexer_test::lexer_test (const line_table_case &case_, const char *content, ASSERT_NE (fname, NULL); } -/* Destructor. Verify that the next token in m_parser is EOF. */ +/* Destructor. By default, verify that the next token in m_parser is EOF. */ lexer_test::~lexer_test () { location_t loc; const cpp_token *tok; - tok = cpp_get_token_with_location (m_parser, &loc); - ASSERT_NE (tok, NULL); - ASSERT_EQ (tok->type, CPP_EOF); + if (m_implicitly_expect_EOF) + { + tok = cpp_get_token_with_location (m_parser, &loc); + ASSERT_NE (tok, NULL); + ASSERT_EQ (tok->type, CPP_EOF); + } } /* Get the next token from m_parser. */ @@ -3247,6 +3300,31 @@ test_lexer_string_locations_raw_string_multiline (const line_table_case &case_) "range endpoints are on different lines"); } +/* Test of parsing an unterminated raw string. */ + +static void +test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_) +{ + const char *content = "R\"ouch()ouCh\" /* etc */"; + + lexer_error_sink errors; + lexer_test test (case_, content, &errors); + test.m_implicitly_expect_EOF = false; + + /* Attempt to parse the raw string. */ + const cpp_token *tok = test.get_token (); + ASSERT_EQ (tok->type, CPP_EOF); + + ASSERT_EQ (1, errors.m_errors.length ()); + /* We expect the message "unterminated raw string" + in the "cpplib" translation domain. + It's not clear that dgettext is available on all supported hosts, + so this assertion is commented-out for now. + ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"), + errors.m_errors[0]); + */ +} + /* Test of lexing char constants. */ static void @@ -3390,6 +3468,7 @@ input_c_tests () for_each_line_table_case (test_lexer_string_locations_long_line); for_each_line_table_case (test_lexer_string_locations_raw_string_one_line); for_each_line_table_case (test_lexer_string_locations_raw_string_multiline); + for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated); for_each_line_table_case (test_lexer_char_constants); test_reading_source_line (); diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 63681220187..ba90cfe8018 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,11 @@ +2016-12-15 David Malcolm + + PR preprocessor/78680 + PR preprocessor/78811 + * lex.c (_cpp_lex_direct): Only determine the end-location of + the token and build a range for non-reserved start locations. + Do not do it for EOF tokens. + 2016-12-12 David Malcolm PR preprocessor/78680 diff --git a/libcpp/lex.c b/libcpp/lex.c index ae458926b75..9b1bdf8ba30 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -3089,25 +3089,27 @@ _cpp_lex_direct (cpp_reader *pfile) break; } - /* Ensure that any line notes are processed, so that we have the - correct physical line/column for the end-point of the token even - when a logical line is split via one or more backslashes. */ - if (buffer->cur >= buffer->notes[buffer->cur_note].pos - && !pfile->overlaid_buffer) - _cpp_process_line_notes (pfile, false); - - source_range tok_range; - tok_range.m_start = result->src_loc; - if (result->src_loc >= RESERVED_LOCATION_COUNT) - tok_range.m_finish - = linemap_position_for_column (pfile->line_table, - CPP_BUF_COLUMN (buffer, buffer->cur)); - else - tok_range.m_finish = tok_range.m_start; - - result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table, - result->src_loc, - tok_range, NULL); + /* Potentially convert the location of the token to a range. */ + if (result->src_loc >= RESERVED_LOCATION_COUNT + && result->type != CPP_EOF) + { + /* Ensure that any line notes are processed, so that we have the + correct physical line/column for the end-point of the token even + when a logical line is split via one or more backslashes. */ + if (buffer->cur >= buffer->notes[buffer->cur_note].pos + && !pfile->overlaid_buffer) + _cpp_process_line_notes (pfile, false); + + source_range tok_range; + tok_range.m_start = result->src_loc; + tok_range.m_finish + = linemap_position_for_column (pfile->line_table, + CPP_BUF_COLUMN (buffer, buffer->cur)); + + result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table, + result->src_loc, + tok_range, NULL); + } return result; }