From b224c3763e018e8bdd0047b3eb283992fb655ce0 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Fri, 8 May 2020 11:06:49 -0700 Subject: [PATCH] preprocessor: Reimplement directives only processing, support raw literals. The existing directives-only code (a) punched a hole through the libcpp interface and (b) didn't support raw string literals. This reimplements this preprocessing mode. I added a proper callback interface, and adjusted c-ppoutput to use it. Sadly I cannot get rid of the libcpp/internal.h include for unrelated reasons. The new scanner is in lex.x, and works doing some backwards scanning when it finds a charater of interest. This reduces the number of cases one has to deal with in forward scanning. It may have different failure mode than forward scanning on bad tokenization. Finally, Moved some cpp tests from the c-specific dg.gcc/cpp directory to the c-c++-common/cpp shared directory, libcpp/ * directives-only.c: Delete. * Makefile.in (libcpp_a_OBJS, libcpp_a_SOURCES): Remove it. * include/cpplib.h (enum CPP_DO_task): New enum. (cpp_directive_only_preprocess): Declare. * internal.h (_cpp_dir_only_callbacks): Delete. (_cpp_preprocess_dir_only): Delete. * lex.c (do_peek_backslask, do_peek_next, do_peek_prev): New. (cpp_directives_only_process): New implementation. gcc/c-family/ Reimplement directives only processing. * c-ppoutput.c (token_streamer): Ne. (directives_only_cb): New. Swallow ... (print_lines_directives_only): ... this. (scan_translation_unit_directives_only): Reimplment using the published interface. gcc/testsuite/ * gcc.dg/cpp/counter-[23].c: Move to c-c+_-common/cpp. * gcc.dg/cpp/dir-only-*: Likewise. * c-c++-common/cpp/dir-only-[78].c: New. --- gcc/c-family/ChangeLog | 9 + gcc/c-family/c-ppoutput.c | 183 ++++++- gcc/testsuite/ChangeLog | 6 + .../{gcc.dg => c-c++-common}/cpp/counter-2.c | 0 .../{gcc.dg => c-c++-common}/cpp/counter-3.c | 0 .../{gcc.dg => c-c++-common}/cpp/dir-only-1.c | 0 .../{gcc.dg => c-c++-common}/cpp/dir-only-1.h | 0 .../{gcc.dg => c-c++-common}/cpp/dir-only-2.c | 0 .../{gcc.dg => c-c++-common}/cpp/dir-only-3.c | 0 .../cpp/dir-only-3a.h | 0 .../cpp/dir-only-3b.h | 0 .../{gcc.dg => c-c++-common}/cpp/dir-only-4.c | 0 .../{gcc.dg => c-c++-common}/cpp/dir-only-5.c | 3 +- .../{gcc.dg => c-c++-common}/cpp/dir-only-6.c | 0 gcc/testsuite/c-c++-common/cpp/dir-only-7.c | 35 ++ gcc/testsuite/c-c++-common/cpp/dir-only-8.c | 38 ++ libcpp/ChangeLog | 12 + libcpp/Makefile.in | 4 +- libcpp/directives-only.c | 240 --------- libcpp/include/cpplib.h | 12 + libcpp/internal.h | 11 - libcpp/lex.c | 482 ++++++++++++++++++ 22 files changed, 770 insertions(+), 265 deletions(-) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/counter-2.c (100%) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/counter-3.c (100%) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/dir-only-1.c (100%) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/dir-only-1.h (100%) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/dir-only-2.c (100%) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/dir-only-3.c (100%) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/dir-only-3a.h (100%) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/dir-only-3b.h (100%) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/dir-only-4.c (100%) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/dir-only-5.c (77%) rename gcc/testsuite/{gcc.dg => c-c++-common}/cpp/dir-only-6.c (100%) create mode 100644 gcc/testsuite/c-c++-common/cpp/dir-only-7.c create mode 100644 gcc/testsuite/c-c++-common/cpp/dir-only-8.c delete mode 100644 libcpp/directives-only.c diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 711da91e598..0550e534942 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,12 @@ +2020-05-08 Nathan Sidwell + + Reimplement directives only processing. + * c-ppoutput.c (token_streamer): Ne. + (directives_only_cb): New. Swallow ... + (print_lines_directives_only): ... this. + (scan_translation_unit_directives_only): Reimplment using the + published interface. + 2020-05-07 Marek Polacek * c-format.c (badwords): Add "nonstatic". diff --git a/gcc/c-family/c-ppoutput.c b/gcc/c-family/c-ppoutput.c index 57e7ae02af9..1e2b32b46ac 100644 --- a/gcc/c-family/c-ppoutput.c +++ b/gcc/c-family/c-ppoutput.c @@ -52,7 +52,6 @@ static macro_queue *define_queue, *undef_queue; /* General output routines. */ static void scan_translation_unit (cpp_reader *); -static void print_lines_directives_only (int, const void *, size_t); static void scan_translation_unit_directives_only (cpp_reader *); static void scan_translation_unit_trad (cpp_reader *); static void account_for_newlines (const unsigned char *, size_t); @@ -165,6 +164,141 @@ init_pp_output (FILE *out_stream) print.prev_was_system_token = false; } +// FIXME: Ideally we'd just turn the entirety of the print struct into +// an encapsulated streamer ... + +class token_streamer +{ + bool avoid_paste; + bool do_line_adjustments; + bool in_pragma; + bool line_marker_emitted; + + public: + token_streamer (cpp_reader *pfile) + :avoid_paste (false), + do_line_adjustments (cpp_get_options (pfile)->lang != CLK_ASM + && !flag_no_line_commands), + in_pragma (false), + line_marker_emitted (false) + { + } + + void begin_pragma () + { + in_pragma = true; + } + + void stream (cpp_reader *pfile, const cpp_token *tok, location_t); +}; + +void +token_streamer::stream (cpp_reader *pfile, const cpp_token *token, + location_t loc) +{ + if (token->type == CPP_PADDING) + { + avoid_paste = true; + if (print.source == NULL + || (!(print.source->flags & PREV_WHITE) + && token->val.source == NULL)) + print.source = token->val.source; + return; + } + + if (token->type == CPP_EOF) + return; + + /* Subtle logic to output a space if and only if necessary. */ + if (avoid_paste) + { + int src_line = LOCATION_LINE (loc); + + if (print.source == NULL) + print.source = token; + + if (src_line != print.src_line + && do_line_adjustments + && !in_pragma) + { + line_marker_emitted = do_line_change (pfile, token, loc, false); + putc (' ', print.outf); + print.printed = true; + } + else if (print.source->flags & PREV_WHITE + || (print.prev + && cpp_avoid_paste (pfile, print.prev, token)) + || (print.prev == NULL && token->type == CPP_HASH)) + { + putc (' ', print.outf); + print.printed = true; + } + } + else if (token->flags & PREV_WHITE) + { + int src_line = LOCATION_LINE (loc); + + if (src_line != print.src_line + && do_line_adjustments + && !in_pragma) + line_marker_emitted = do_line_change (pfile, token, loc, false); + putc (' ', print.outf); + print.printed = true; + } + + avoid_paste = false; + print.source = NULL; + print.prev = token; + if (token->type == CPP_PRAGMA) + { + const char *space; + const char *name; + + line_marker_emitted = maybe_print_line (token->src_loc); + fputs ("#pragma ", print.outf); + c_pp_lookup_pragma (token->val.pragma, &space, &name); + if (space) + fprintf (print.outf, "%s %s", space, name); + else + fprintf (print.outf, "%s", name); + print.printed = true; + in_pragma = true; + } + else if (token->type == CPP_PRAGMA_EOL) + { + maybe_print_line (UNKNOWN_LOCATION); + in_pragma = false; + } + else + { + if (cpp_get_options (parse_in)->debug) + linemap_dump_location (line_table, token->src_loc, print.outf); + + if (do_line_adjustments + && !in_pragma + && !line_marker_emitted + && print.prev_was_system_token != !!in_system_header_at (loc) + && !is_location_from_builtin_token (loc)) + /* The system-ness of this token is different from the one of + the previous token. Let's emit a line change to mark the + new system-ness before we emit the token. */ + { + do_line_change (pfile, token, loc, false); + print.prev_was_system_token = !!in_system_header_at (loc); + } + cpp_output_token (token, print.outf); + line_marker_emitted = false; + print.printed = true; + } + + /* CPP_COMMENT tokens and raw-string literal tokens can have + embedded new-line characters. Rather than enumerating all the + possible token types just check if token uses val.str union + member. */ + if (cpp_token_val_index (token) == CPP_TOKEN_FLD_STR) + account_for_newlines (token->val.str.text, token->val.str.len); +} + /* Writes out the preprocessed file, handling spacing and paste avoidance issues. */ static void @@ -288,10 +422,41 @@ scan_translation_unit (cpp_reader *pfile) } static void -print_lines_directives_only (int lines, const void *buf, size_t size) +directives_only_cb (cpp_reader *pfile, CPP_DO_task task, void *data_, ...) { - print.src_line += lines; - fwrite (buf, 1, size, print.outf); + va_list args; + va_start (args, data_); + + token_streamer *streamer = reinterpret_cast (data_); + switch (task) + { + default: + gcc_unreachable (); + + case CPP_DO_print: + { + print.src_line += va_arg (args, unsigned); + + const void *buf = va_arg (args, const void *); + size_t size = va_arg (args, size_t); + fwrite (buf, 1, size, print.outf); + } + break; + + case CPP_DO_location: + maybe_print_line (va_arg (args, location_t)); + break; + + case CPP_DO_token: + { + const cpp_token *token = va_arg (args, const cpp_token *); + location_t spelling_loc = va_arg (args, location_t); + streamer->stream (pfile, token, spelling_loc); + } + break; + } + + va_end (args); } /* Writes out the preprocessed file, handling spacing and paste @@ -299,12 +464,8 @@ print_lines_directives_only (int lines, const void *buf, size_t size) static void scan_translation_unit_directives_only (cpp_reader *pfile) { - struct _cpp_dir_only_callbacks cb; - - cb.print_lines = print_lines_directives_only; - cb.maybe_print_line = maybe_print_line; - - _cpp_preprocess_dir_only (pfile, &cb); + token_streamer streamer (pfile); + cpp_directive_only_process (pfile, &streamer, directives_only_cb); } /* Adjust print.src_line for newlines embedded in output. */ @@ -396,7 +557,7 @@ print_line_1 (location_t src_loc, const char *special_flags, FILE *stream) putc ('\n', stream); print.printed = false; - if (!flag_no_line_commands) + if (src_loc != UNKNOWN_LOCATION && !flag_no_line_commands) { const char *file_path = LOCATION_FILE (src_loc); int sysp; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9618eaab2ab..2381a1c80f9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2020-05-08 Nathan Sidwell + + * gcc.dg/cpp/counter-[23].c: Move to c-c+_-common/cpp. + * gcc.dg/cpp/dir-only-*: Likewise. + * c-c++-common/cpp/dir-only-[78].c: New. + 2020-05-08 Richard Biener * gcc.dg/vect/bb-slp-pr68892.c: Adjust for not supported diff --git a/gcc/testsuite/gcc.dg/cpp/counter-2.c b/gcc/testsuite/c-c++-common/cpp/counter-2.c similarity index 100% rename from gcc/testsuite/gcc.dg/cpp/counter-2.c rename to gcc/testsuite/c-c++-common/cpp/counter-2.c diff --git a/gcc/testsuite/gcc.dg/cpp/counter-3.c b/gcc/testsuite/c-c++-common/cpp/counter-3.c similarity index 100% rename from gcc/testsuite/gcc.dg/cpp/counter-3.c rename to gcc/testsuite/c-c++-common/cpp/counter-3.c diff --git a/gcc/testsuite/gcc.dg/cpp/dir-only-1.c b/gcc/testsuite/c-c++-common/cpp/dir-only-1.c similarity index 100% rename from gcc/testsuite/gcc.dg/cpp/dir-only-1.c rename to gcc/testsuite/c-c++-common/cpp/dir-only-1.c diff --git a/gcc/testsuite/gcc.dg/cpp/dir-only-1.h b/gcc/testsuite/c-c++-common/cpp/dir-only-1.h similarity index 100% rename from gcc/testsuite/gcc.dg/cpp/dir-only-1.h rename to gcc/testsuite/c-c++-common/cpp/dir-only-1.h diff --git a/gcc/testsuite/gcc.dg/cpp/dir-only-2.c b/gcc/testsuite/c-c++-common/cpp/dir-only-2.c similarity index 100% rename from gcc/testsuite/gcc.dg/cpp/dir-only-2.c rename to gcc/testsuite/c-c++-common/cpp/dir-only-2.c diff --git a/gcc/testsuite/gcc.dg/cpp/dir-only-3.c b/gcc/testsuite/c-c++-common/cpp/dir-only-3.c similarity index 100% rename from gcc/testsuite/gcc.dg/cpp/dir-only-3.c rename to gcc/testsuite/c-c++-common/cpp/dir-only-3.c diff --git a/gcc/testsuite/gcc.dg/cpp/dir-only-3a.h b/gcc/testsuite/c-c++-common/cpp/dir-only-3a.h similarity index 100% rename from gcc/testsuite/gcc.dg/cpp/dir-only-3a.h rename to gcc/testsuite/c-c++-common/cpp/dir-only-3a.h diff --git a/gcc/testsuite/gcc.dg/cpp/dir-only-3b.h b/gcc/testsuite/c-c++-common/cpp/dir-only-3b.h similarity index 100% rename from gcc/testsuite/gcc.dg/cpp/dir-only-3b.h rename to gcc/testsuite/c-c++-common/cpp/dir-only-3b.h diff --git a/gcc/testsuite/gcc.dg/cpp/dir-only-4.c b/gcc/testsuite/c-c++-common/cpp/dir-only-4.c similarity index 100% rename from gcc/testsuite/gcc.dg/cpp/dir-only-4.c rename to gcc/testsuite/c-c++-common/cpp/dir-only-4.c diff --git a/gcc/testsuite/gcc.dg/cpp/dir-only-5.c b/gcc/testsuite/c-c++-common/cpp/dir-only-5.c similarity index 77% rename from gcc/testsuite/gcc.dg/cpp/dir-only-5.c rename to gcc/testsuite/c-c++-common/cpp/dir-only-5.c index ad8e11e5937..747006a682b 100644 --- a/gcc/testsuite/gcc.dg/cpp/dir-only-5.c +++ b/gcc/testsuite/c-c++-common/cpp/dir-only-5.c @@ -1,6 +1,7 @@ /* Copyright 2007 Free Software Foundation, Inc. Contributed by Ollie Wild . */ -/* { dg-do preprocess } */ +/* C++ silently ignores traditional! */ +/* { dg-do preprocess { target c } } */ /* { dg-options "-fdirectives-only -traditional" } */ /* { dg-error "'-fdirectives-only' is incompatible with '-traditional'\n" "'-traditional' check" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.dg/cpp/dir-only-6.c b/gcc/testsuite/c-c++-common/cpp/dir-only-6.c similarity index 100% rename from gcc/testsuite/gcc.dg/cpp/dir-only-6.c rename to gcc/testsuite/c-c++-common/cpp/dir-only-6.c diff --git a/gcc/testsuite/c-c++-common/cpp/dir-only-7.c b/gcc/testsuite/c-c++-common/cpp/dir-only-7.c new file mode 100644 index 00000000000..b9fb4d88528 --- /dev/null +++ b/gcc/testsuite/c-c++-common/cpp/dir-only-7.c @@ -0,0 +1,35 @@ +// { dg-do preprocess } +// { dg-options "-std=c++11" { target c++ } } +// { dg-options "-std=gnu99" { target c } } +// { dg-additional-options -fdirectives-only } + +R"stuff( +)nope" +#error in raw literal +)stuff" +// comment +#define bob 1 +// " comment +#if !bob +#error "no bob" +#endif + +bob\ +\ +R"regular string not an erroneous raw one" + +"regular"R"***(not a raw string" +#define HERE 1 + //)***" +#ifndef HERE +#error "oops no HERE" +#endif + /* comment */ + + +0e+R"*(not a raw string" +#define CPP_NUM 1 + //)*" +#ifndef CPP_NUM +#error "oops no CPP_NUM" +#endif diff --git a/gcc/testsuite/c-c++-common/cpp/dir-only-8.c b/gcc/testsuite/c-c++-common/cpp/dir-only-8.c new file mode 100644 index 00000000000..b0e00f5b542 --- /dev/null +++ b/gcc/testsuite/c-c++-common/cpp/dir-only-8.c @@ -0,0 +1,38 @@ +// { dg-do preprocess { target c++ } } +// { dg-options "-std=c++14" } +// { dg-additional-options -fdirectives-only } + +012'bcd +#define A 1 +// ' +#ifndef A +#error Fell into first char const +#endif +enum { A = 195'936'478 }; 'a' +#define AA 1 + // 'a +#ifndef AA +#error Fell into early char const +#endif + +012\ +'bcd +#define B 1 +// ' +#ifndef B +#error Fell into second char const +#endif + +.012'b +#define C 1 +// ' +#ifndef C +#error Fell into third char const +#endif + +.0e+12'b +#define D 1 +// ' +#ifndef D +#error Fell into fourth char const +#endif diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 307cf3add94..a1b78bb7194 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,15 @@ +2020-05-08 Nathan Sidwell + + Reimplement directives only processing, support raw literals. + * directives-only.c: Delete. + * Makefile.in (libcpp_a_OBJS, libcpp_a_SOURCES): Remove it. + * include/cpplib.h (enum CPP_DO_task): New enum. + (cpp_directive_only_preprocess): Declare. + * internal.h (_cpp_dir_only_callbacks): Delete. + (_cpp_preprocess_dir_only): Delete. + * lex.c (do_peek_backslask, do_peek_next, do_peek_prev): New. + (cpp_directives_only_process): New implementation. + 2020-02-14 Jakub Jelinek Partially implement P1042R1: __VA_OPT__ wording clarifications diff --git a/libcpp/Makefile.in b/libcpp/Makefile.in index 8f8c8f65eb3..3d9ca0baaf6 100644 --- a/libcpp/Makefile.in +++ b/libcpp/Makefile.in @@ -83,11 +83,11 @@ COMPILER_FLAGS = $(ALL_CXXFLAGS) DEPMODE = $(CXXDEPMODE) -libcpp_a_OBJS = charset.o directives.o directives-only.o errors.o \ +libcpp_a_OBJS = charset.o directives.o errors.o \ expr.o files.o identifiers.o init.o lex.o line-map.o macro.o \ mkdeps.o pch.o symtab.o traditional.o -libcpp_a_SOURCES = charset.c directives.c directives-only.c errors.c \ +libcpp_a_SOURCES = charset.c directives.c errors.c \ expr.c files.c identifiers.c init.c lex.c line-map.c macro.c \ mkdeps.c pch.c symtab.c traditional.c diff --git a/libcpp/directives-only.c b/libcpp/directives-only.c deleted file mode 100644 index 5eac118d824..00000000000 --- a/libcpp/directives-only.c +++ /dev/null @@ -1,240 +0,0 @@ -/* CPP Library - directive only preprocessing for distributed compilation. - Copyright (C) 2007-2020 Free Software Foundation, Inc. - Contributed by Ollie Wild . - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; see the file COPYING3. If not see -. */ - -#include "config.h" -#include "system.h" -#include "cpplib.h" -#include "internal.h" - -/* DO (Directive only) flags. */ -#define DO_BOL (1 << 0) /* At the beginning of a logical line. */ -#define DO_STRING (1 << 1) /* In a string constant. */ -#define DO_CHAR (1 << 2) /* In a character constant. */ -#define DO_BLOCK_COMMENT (1 << 3) /* In a block comment. */ -#define DO_LINE_COMMENT (1 << 4) /* In a single line "//-style" comment. */ - -#define DO_LINE_SPECIAL (DO_STRING | DO_CHAR | DO_LINE_COMMENT) -#define DO_SPECIAL (DO_LINE_SPECIAL | DO_BLOCK_COMMENT) - -/* Writes out the preprocessed file, handling spacing and paste - avoidance issues. */ -void -_cpp_preprocess_dir_only (cpp_reader *pfile, - const struct _cpp_dir_only_callbacks *cb) -{ - struct cpp_buffer *buffer; - const unsigned char *cur, *base, *next_line, *rlimit; - cppchar_t c, last_c; - unsigned flags; - linenum_type lines; - int col; - location_t loc; - - restart: - /* Buffer initialization ala _cpp_clean_line(). */ - buffer = pfile->buffer; - buffer->cur_note = buffer->notes_used = 0; - buffer->cur = buffer->line_base = buffer->next_line; - buffer->need_line = false; - - /* This isn't really needed. It prevents a compiler warning, though. */ - loc = pfile->line_table->highest_line; - - /* Scan initialization. */ - next_line = cur = base = buffer->cur; - rlimit = buffer->rlimit; - flags = DO_BOL; - lines = 0; - col = 1; - - for (last_c = '\n', c = *cur; cur < rlimit; last_c = c, c = *++cur, ++col) - { - /* Skip over escaped newlines. */ - if (__builtin_expect (c == '\\', false)) - { - const unsigned char *tmp = cur + 1; - - while (is_nvspace (*tmp) && tmp < rlimit) - tmp++; - if (*tmp == '\r') - tmp++; - if (*tmp == '\n' && tmp < rlimit) - { - CPP_INCREMENT_LINE (pfile, 0); - lines++; - col = 0; - cur = tmp; - c = last_c; - continue; - } - } - - if (__builtin_expect (last_c == '#', false) && !(flags & DO_SPECIAL)) - { - if (c != '#' && (flags & DO_BOL)) - { - class line_maps *line_table; - - if (!pfile->state.skipping && next_line != base) - cb->print_lines (lines, base, next_line - base); - - /* Prep things for directive handling. */ - buffer->next_line = cur; - buffer->need_line = true; - _cpp_get_fresh_line (pfile); - - /* Ensure proper column numbering for generated error messages. */ - buffer->line_base -= col - 1; - - _cpp_handle_directive (pfile, false /* ignore indented */); - - /* Sanitize the line settings. Duplicate #include's can mess - things up. */ - line_table = pfile->line_table; - line_table->highest_location = line_table->highest_line; - - /* The if block prevents us from outputing line information when - the file ends with a directive and no newline. Note that we - must use pfile->buffer, not buffer. */ - if (pfile->buffer->next_line < pfile->buffer->rlimit) - cb->maybe_print_line (pfile->line_table->highest_line); - - goto restart; - } - - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - else if (__builtin_expect (last_c == '/', false) \ - && !(flags & DO_SPECIAL) && c != '*' && c != '/') - { - /* If a previous slash is not starting a block comment, clear the - DO_BOL flag. */ - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - - switch (c) - { - case '/': - if ((flags & DO_BLOCK_COMMENT) && last_c == '*') - { - flags &= ~DO_BLOCK_COMMENT; - c = 0; - } - else if (!(flags & DO_SPECIAL) && last_c == '/') - flags |= DO_LINE_COMMENT; - else if (!(flags & DO_SPECIAL)) - /* Mark the position for possible error reporting. */ - loc = linemap_position_for_column (pfile->line_table, col); - - break; - - case '*': - if (!(flags & DO_SPECIAL)) - { - if (last_c == '/') - flags |= DO_BLOCK_COMMENT; - else - { - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - } - - break; - - case '\'': - case '"': - { - unsigned state = (c == '"') ? DO_STRING : DO_CHAR; - - if (!(flags & DO_SPECIAL)) - { - flags |= state; - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - else if ((flags & state) && last_c != '\\') - flags &= ~state; - - break; - } - - case '\\': - { - if ((flags & (DO_STRING | DO_CHAR)) && last_c == '\\') - c = 0; - - if (!(flags & DO_SPECIAL)) - { - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - - break; - } - - case '\n': - CPP_INCREMENT_LINE (pfile, 0); - lines++; - col = 0; - flags &= ~DO_LINE_SPECIAL; - if (!(flags & DO_SPECIAL)) - flags |= DO_BOL; - break; - - case '#': - next_line = cur; - /* Don't update DO_BOL yet. */ - break; - - case ' ': case '\t': case '\f': case '\v': case '\0': - break; - - default: - if (!(flags & DO_SPECIAL)) - { - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - break; - } - } - - if (flags & DO_BLOCK_COMMENT) - cpp_error_with_line (pfile, CPP_DL_ERROR, loc, 0, "unterminated comment"); - - if (!pfile->state.skipping && cur != base) - { - /* If the file was not newline terminated, add rlimit, which is - guaranteed to point to a newline, to the end of our range. */ - if (cur[-1] != '\n') - { - cur++; - CPP_INCREMENT_LINE (pfile, 0); - lines++; - } - - cb->print_lines (lines, base, cur - base); - } - - _cpp_pop_buffer (pfile); - if (pfile->buffer) - goto restart; -} diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 03cc72a12e2..2b1e33f94ae 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -1304,6 +1304,18 @@ extern int cpp_read_state (cpp_reader *, const char *, FILE *, /* In lex.c */ extern void cpp_force_token_locations (cpp_reader *, location_t); extern void cpp_stop_forcing_token_locations (cpp_reader *); +enum CPP_DO_task +{ + CPP_DO_print, + CPP_DO_location, + CPP_DO_token +}; + +extern void cpp_directive_only_process (cpp_reader *pfile, + void *data, + void (*cb) (cpp_reader *, + CPP_DO_task, + void *data, ...)); /* In expr.c */ extern enum cpp_ttype cpp_userdef_string_remove_type diff --git a/libcpp/internal.h b/libcpp/internal.h index 97d9bdbea77..11b6469dccd 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -747,17 +747,6 @@ extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *, extern void _cpp_pop_buffer (cpp_reader *); extern char *_cpp_bracket_include (cpp_reader *); -/* In directives.c */ -struct _cpp_dir_only_callbacks -{ - /* Called to print a block of lines. */ - void (*print_lines) (int, const void *, size_t); - bool (*maybe_print_line) (location_t); -}; - -extern void _cpp_preprocess_dir_only (cpp_reader *, - const struct _cpp_dir_only_callbacks *); - /* In traditional.c. */ extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *, bool); extern bool _cpp_read_logical_line_trad (cpp_reader *); diff --git a/libcpp/lex.c b/libcpp/lex.c index 56ac3a1dd73..3bcf073710e 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -3826,3 +3826,485 @@ cpp_stop_forcing_token_locations (cpp_reader *r) { r->forced_token_location = 0; } + +/* We're looking at \, if it's escaping EOL, look past it. If at + LIMIT, don't advance. */ + +static const unsigned char * +do_peek_backslash (const unsigned char *peek, const unsigned char *limit) +{ + const unsigned char *probe = peek; + + if (__builtin_expect (peek[1] == '\n', true)) + { + eol: + probe += 2; + if (__builtin_expect (probe < limit, true)) + { + peek = probe; + if (*peek == '\\') + /* The user might be perverse. */ + return do_peek_backslash (peek, limit); + } + } + else if (__builtin_expect (peek[1] == '\r', false)) + { + if (probe[2] == '\n') + probe++; + goto eol; + } + + return peek; +} + +static const unsigned char * +do_peek_next (const unsigned char *peek, const unsigned char *limit) +{ + if (__builtin_expect (*peek == '\\', false)) + peek = do_peek_backslash (peek, limit); + return peek; +} + +static const unsigned char * +do_peek_prev (const unsigned char *peek, const unsigned char *bound) +{ + if (peek == bound) + return NULL; + + unsigned char c = *--peek; + if (__builtin_expect (c == '\n', false) + || __builtin_expect (c == 'r', false)) + { + if (peek == bound) + return peek; + int ix = -1; + if (c == '\n' && peek[ix] == '\r') + { + if (peek + ix == bound) + return peek; + ix--; + } + + if (peek[ix] == '\\') + return do_peek_prev (peek + ix, bound); + + return peek; + } + else + return peek; +} + +/* Directives-only scanning. Somewhat more relaxed than correct + parsing -- some ill-formed programs will not be rejected. */ + +void +cpp_directive_only_process (cpp_reader *pfile, + void *data, + void (*cb) (cpp_reader *, CPP_DO_task, void *, ...)) +{ + do + { + restart: + /* Buffer initialization, but no line cleaning. */ + cpp_buffer *buffer = pfile->buffer; + buffer->cur_note = buffer->notes_used = 0; + buffer->cur = buffer->line_base = buffer->next_line; + buffer->need_line = false; + /* Files always end in a newline. We rely on this for + character peeking safety. */ + gcc_assert (buffer->rlimit[-1] == '\n'); + + const unsigned char *base = buffer->cur; + unsigned line_count = 0; + const unsigned char *line_start = base; + + bool bol = true; + bool raw = false; + + const unsigned char *lwm = base; + for (const unsigned char *pos = base, *limit = buffer->rlimit; + pos < limit;) + { + unsigned char c = *pos++; + /* This matches the switch in _cpp_lex_direct. */ + switch (c) + { + case ' ': case '\t': case '\f': case '\v': + /* Whitespace, do nothing. */ + break; + + case '\r': /* MAC line ending, or Windows \r\n */ + if (*pos == '\n') + pos++; + /* FALLTHROUGH */ + + case '\n': + bol = true; + + next_line: + CPP_INCREMENT_LINE (pfile, 0); + line_count++; + line_start = pos; + break; + + case '\\': + /* is removed, and doesn't undo any + preceeding escape or whatnot. */ + if (*pos == '\n') + { + pos++; + goto next_line; + } + else if (*pos == '\r') + { + if (pos[1] == '\n') + pos++; + pos++; + goto next_line; + } + goto dflt; + + case '#': + if (bol) + { + /* Line directive. */ + if (pos - 1 > base && !pfile->state.skipping) + cb (pfile, CPP_DO_print, data, + line_count, base, pos - 1 - base); + + /* Prep things for directive handling. */ + buffer->next_line = pos; + buffer->need_line = true; + _cpp_get_fresh_line (pfile); + + /* Ensure proper column numbering for generated + error messages. */ + buffer->line_base -= pos - line_start; + + _cpp_handle_directive (pfile, line_start + 1 != pos); + + /* Sanitize the line settings. Duplicate #include's can + mess things up. */ + // FIXME: Necessary? + pfile->line_table->highest_location + = pfile->line_table->highest_line; + + if (!pfile->state.skipping + && pfile->buffer->next_line < pfile->buffer->rlimit) + cb (pfile, CPP_DO_location, data, + pfile->line_table->highest_line); + + goto restart; + } + goto dflt; + + case '/': + { + const unsigned char *peek = do_peek_next (pos, limit); + if (!(*peek == '/' || *peek == '*')) + goto dflt; + + /* Line or block comment */ + bool is_block = *peek == '*'; + bool star = false; + bool esc = false; + location_t sloc + = linemap_position_for_column (pfile->line_table, + pos - line_start); + + while (pos < limit) + { + char c = *pos++; + switch (c) + { + case '\\': + esc = true; + break; + + case '\r': + if (*pos == '\n') + pos++; + /* FALLTHROUGH */ + + case '\n': + { + CPP_INCREMENT_LINE (pfile, 0); + line_count++; + line_start = pos; + if (!esc && !is_block) + { + bol = true; + goto done_comment; + } + } + if (!esc) + star = false; + esc = false; + break; + + case '*': + if (pos > peek && !esc) + star = is_block; + esc = false; + break; + + case '/': + if (star) + goto done_comment; + /* FALLTHROUGH */ + + default: + star = false; + esc = false; + break; + } + } + cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0, + "unterminated comment"); + done_comment: + lwm = pos; + break; + } + + case '\'': + if (!CPP_OPTION (pfile, digit_separators)) + goto delimited_string; + + /* Possibly a number punctuator. */ + if (!ISIDNUM (*do_peek_next (pos, limit))) + goto delimited_string; + + goto quote_peek; + + case '\"': + if (!CPP_OPTION (pfile, rliterals)) + goto delimited_string; + + quote_peek: + { + /* For ' see if it's a number punctuator + \.?(| + |'|'|[eEpP]|\.)* */ + /* For " see if it's a raw string + {U,L,u,u8}R. This includes CPP_NUMBER detection, + because that could be 0e+R. */ + const unsigned char *peek = pos - 1; + bool quote_first = c == '"'; + bool quote_eight = false; + bool maybe_number_start = false; + bool want_number = false; + + while ((peek = do_peek_prev (peek, lwm))) + { + unsigned char p = *peek; + if (quote_first) + { + if (!raw) + { + if (p != 'R') + break; + raw = true; + continue; + } + + quote_first = false; + if (p == 'L' || p == 'U' || p == 'u') + ; + else if (p == '8') + quote_eight = true; + else + goto second_raw; + } + else if (quote_eight) + { + if (p != 'u') + { + raw = false; + break; + } + quote_eight = false; + } + else if (c == '"') + { + second_raw:; + if (!want_number && ISIDNUM (p)) + { + raw = false; + break; + } + } + + if (ISDIGIT (p)) + maybe_number_start = true; + else if (p == '.') + want_number = true; + else if (ISIDNUM (p)) + maybe_number_start = false; + else if (p == '+' || p == '-') + { + if (const unsigned char *peek_prev + = do_peek_prev (peek, lwm)) + { + p = *peek_prev; + if (p == 'e' || p == 'E' + || p == 'p' || p == 'P') + { + want_number = true; + maybe_number_start = false; + } + else + break; + } + else + break; + } + else if (p == '\'' || p == '\"') + { + /* If this is lwm, this must be the end of a + previous string. So this is a trailing + literal type, (a) if those are allowed, + and (b) maybe_start is false. Otherwise + this must be a CPP_NUMBER because we've + met another ', and we'd have checked that + in its own right. */ + if (peek == lwm && CPP_OPTION (pfile, uliterals)) + { + if (!maybe_number_start && !want_number) + /* Must be a literal type. */ + raw = false; + } + else if (p == '\'' + && CPP_OPTION (pfile, digit_separators)) + maybe_number_start = true; + break; + } + else if (c == '\'') + break; + else if (!quote_first && !quote_eight) + break; + } + + if (maybe_number_start) + { + if (c == '\'') + /* A CPP NUMBER. */ + goto dflt; + raw = false; + } + + goto delimited_string; + } + + delimited_string: + { + /* (Possibly raw) string or char literal. */ + unsigned char end = c; + int delim_len = -1; + const unsigned char *delim = NULL; + location_t sloc = linemap_position_for_column (pfile->line_table, + pos - line_start); + int esc = 0; + + if (raw) + { + /* There can be no line breaks in the delimiter. */ + delim = pos; + for (delim_len = 0; (c = *pos++) != '('; delim_len++) + { + if (delim_len == 16) + { + cpp_error_with_line (pfile, CPP_DL_ERROR, + sloc, 0, + "raw string delimiter" + " longer than %d" + " characters", + delim_len); + raw = false; + pos = delim; + break; + } + if (strchr (") \\\t\v\f\n", c)) + { + cpp_error_with_line (pfile, CPP_DL_ERROR, + sloc, 0, + "invalid character '%c'" + " in raw string" + " delimiter", c); + raw = false; + pos = delim; + break; + } + if (pos >= limit) + goto bad_string; + } + } + + while (pos < limit) + { + char c = *pos++; + switch (c) + { + case '\\': + if (!raw) + esc++; + break; + + case '\r': + if (*pos == '\n') + pos++; + /* FALLTHROUGH */ + + case '\n': + { + CPP_INCREMENT_LINE (pfile, 0); + line_count++; + line_start = pos; + } + if (esc) + esc--; + break; + + case ')': + if (raw + && pos + delim_len + 1 < limit + && pos[delim_len] == end + && !memcmp (delim, pos, delim_len)) + { + pos += delim_len + 1; + raw = false; + goto done_string; + } + break; + + default: + if (!raw && !(esc & 1) && c == end) + goto done_string; + esc = 0; + break; + } + } + bad_string: + cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0, + "unterminated literal"); + + done_string: + raw = false; + lwm = pos - 1; + } + goto dflt; + + default: + dflt: + bol = false; + pfile->mi_valid = false; + break; + } + } + + if (buffer->rlimit > base && !pfile->state.skipping) + cb (pfile, CPP_DO_print, data, line_count, base, buffer->rlimit - base); + + _cpp_pop_buffer (pfile); + } + while (pfile->buffer); +} -- 2.30.2