X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=libcpp%2Flex.c;h=07d5a4ff4668853a230c00f53dd48b0691276b09;hb=6a6926635c36e0ef2598b5399afdbfc2dbd4bf1f;hp=3bcf073710eb91984265db1fee4a3d862eba60a6;hpb=b224c3763e018e8bdd0047b3eb283992fb655ce0;p=gcc.git diff --git a/libcpp/lex.c b/libcpp/lex.c index 3bcf073710e..07d5a4ff466 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -531,11 +531,11 @@ init_vectorized_lexer (void) search_line_fast = impl; } -#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__) +#elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__) /* A vection of the fast scanner using AltiVec vectorized byte compares and VSX unaligned loads (when VSX is available). This is otherwise - the same as the pre-GCC 5 version. */ + the same as the AltiVec version. */ ATTRIBUTE_NO_SANITIZE_UNDEFINED static const uchar * @@ -1062,7 +1062,7 @@ _cpp_clean_line (cpp_reader *pfile) d = (uchar *) s; /* Handle DOS line endings. */ - if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') + if (*s == '\r' && s + 1 != buffer->rlimit && s[1] == '\n') s++; } @@ -1370,9 +1370,9 @@ maybe_va_opt_error (cpp_reader *pfile) { /* __VA_OPT__ should not be accepted at all, but allow it in system headers. */ - if (!cpp_in_system_header (pfile)) + if (!_cpp_in_system_header (pfile)) cpp_error (pfile, CPP_DL_PEDWARN, - "__VA_OPT__ is not available until C++2a"); + "__VA_OPT__ is not available until C++20"); } else if (!pfile->state.va_args_ok) { @@ -1380,7 +1380,7 @@ maybe_va_opt_error (cpp_reader *pfile) variadic macro. */ cpp_error (pfile, CPP_DL_PEDWARN, "__VA_OPT__ can only appear in the expansion" - " of a C++2a variadic macro"); + " of a C++20 variadic macro"); } } @@ -1577,44 +1577,90 @@ static void create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, unsigned int len, enum cpp_ttype type) { - uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); - - memcpy (dest, base, len); - dest[len] = '\0'; token->type = type; token->val.str.len = len; - token->val.str.text = dest; + token->val.str.text = cpp_alloc_token_string (pfile, base, len); } +const uchar * +cpp_alloc_token_string (cpp_reader *pfile, + const unsigned char *ptr, unsigned len) +{ + uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); + + dest[len] = 0; + memcpy (dest, ptr, len); + return dest; +} + +/* A pair of raw buffer pointers. The currently open one is [1], the + first one is [0]. Used for string literal lexing. */ +struct lit_accum { + _cpp_buff *first; + _cpp_buff *last; + const uchar *rpos; + size_t accum; + + lit_accum () + : first (NULL), last (NULL), rpos (0), accum (0) + { + } + + void append (cpp_reader *, const uchar *, size_t); + + void read_begin (cpp_reader *); + bool reading_p () const + { + return rpos != NULL; + } + char read_char () + { + char c = *rpos++; + if (rpos == BUFF_FRONT (last)) + rpos = NULL; + return c; + } +}; + /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer sequence from *FIRST_BUFF_P to LAST_BUFF_P. */ -static void -bufring_append (cpp_reader *pfile, const uchar *base, size_t len, - _cpp_buff **first_buff_p, _cpp_buff **last_buff_p) +void +lit_accum::append (cpp_reader *pfile, const uchar *base, size_t len) { - _cpp_buff *first_buff = *first_buff_p; - _cpp_buff *last_buff = *last_buff_p; - - if (first_buff == NULL) - first_buff = last_buff = _cpp_get_buff (pfile, len); - else if (len > BUFF_ROOM (last_buff)) + if (!last) + /* Starting. */ + first = last = _cpp_get_buff (pfile, len); + else if (len > BUFF_ROOM (last)) { - size_t room = BUFF_ROOM (last_buff); - memcpy (BUFF_FRONT (last_buff), base, room); - BUFF_FRONT (last_buff) += room; + /* There is insufficient room in the buffer. Copy what we can, + and then either extend or create a new one. */ + size_t room = BUFF_ROOM (last); + memcpy (BUFF_FRONT (last), base, room); + BUFF_FRONT (last) += room; base += room; len -= room; - last_buff = _cpp_append_extend_buff (pfile, last_buff, len); - } + accum += room; - memcpy (BUFF_FRONT (last_buff), base, len); - BUFF_FRONT (last_buff) += len; + gcc_checking_assert (!rpos); - *first_buff_p = first_buff; - *last_buff_p = last_buff; + last = _cpp_append_extend_buff (pfile, last, len); + } + + memcpy (BUFF_FRONT (last), base, len); + BUFF_FRONT (last) += len; + accum += len; } +void +lit_accum::read_begin (cpp_reader *pfile) +{ + /* We never accumulate more than 4 chars to read. */ + if (BUFF_ROOM (last) < 4) + + last = _cpp_append_extend_buff (pfile, last, 4); + rpos = BUFF_FRONT (last); +} /* Returns true if a macro has been defined. This might not work if compile with -save-temps, @@ -1657,267 +1703,256 @@ is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base) return is_macro (pfile, base); } -/* Lexes a raw string. The stored string contains the spelling, including - double quotes, delimiter string, '(' and ')', any leading - 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the - literal, or CPP_OTHER if it was not properly terminated. +/* Lexes a raw string. The stored string contains the spelling, + including double quotes, delimiter string, '(' and ')', any leading + 'L', 'u', 'U' or 'u8' and 'R' modifier. The created token contains + the type of the literal, or CPP_OTHER if it was not properly + terminated. + + BASE is the start of the token. Updates pfile->buffer->cur to just + after the lexed string. The spelling is NUL-terminated, but it is not guaranteed that this is the first NUL since embedded NULs are preserved. */ static void -lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, - const uchar *cur) +lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base) { - uchar raw_prefix[17]; - uchar temp_buffer[18]; - const uchar *orig_base; - unsigned int raw_prefix_len = 0, raw_suffix_len = 0; - enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX }; - raw_str_phase phase = RAW_STR_PREFIX; - enum cpp_ttype type; - size_t total_len = 0; - /* Index into temp_buffer during phases other than RAW_STR, - during RAW_STR phase 17 to tell BUF_APPEND that nothing should - be appended to temp_buffer. */ - size_t temp_buffer_len = 0; - _cpp_buff *first_buff = NULL, *last_buff = NULL; - size_t raw_prefix_start; + const uchar *pos = base; + + /* 'tis a pity this information isn't passed down from the lexer's + initial categorization of the token. */ + enum cpp_ttype type = CPP_STRING; + + if (*pos == 'L') + { + type = CPP_WSTRING; + pos++; + } + else if (*pos == 'U') + { + type = CPP_STRING32; + pos++; + } + else if (*pos == 'u') + { + if (pos[1] == '8') + { + type = CPP_UTF8STRING; + pos++; + } + else + type = CPP_STRING16; + pos++; + } + + gcc_checking_assert (pos[0] == 'R' && pos[1] == '"'); + pos += 2; + _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note]; - type = (*base == 'L' ? CPP_WSTRING : - *base == 'U' ? CPP_STRING32 : - *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16) - : CPP_STRING); - -#define BUF_APPEND(STR,LEN) \ - do { \ - bufring_append (pfile, (const uchar *)(STR), (LEN), \ - &first_buff, &last_buff); \ - total_len += (LEN); \ - if (__builtin_expect (temp_buffer_len < 17, 0) \ - && (const uchar *)(STR) != base \ - && (LEN) <= 2) \ - { \ - memcpy (temp_buffer + temp_buffer_len, \ - (const uchar *)(STR), (LEN)); \ - temp_buffer_len += (LEN); \ - } \ - } while (0) - - orig_base = base; - ++cur; - raw_prefix_start = cur - base; + /* Skip notes before the ". */ + while (note->pos < pos) + ++note; + + lit_accum accum; + + uchar prefix[17]; + unsigned prefix_len = 0; + enum Phase + { + PHASE_PREFIX = -2, + PHASE_NONE = -1, + PHASE_SUFFIX = 0 + } phase = PHASE_PREFIX; + for (;;) { - cppchar_t c; - - /* If we previously performed any trigraph or line splicing - transformations, undo them in between the opening and closing - double quote. */ - while (note->pos < cur) - ++note; - for (; note->pos == cur; ++note) - { - switch (note->type) - { - case '\\': - case ' ': - /* Restore backslash followed by newline. */ - BUF_APPEND (base, cur - base); - base = cur; - BUF_APPEND ("\\", 1); - after_backslash: - if (note->type == ' ') - { - /* GNU backslash whitespace newline extension. FIXME - could be any sequence of non-vertical space. When we - can properly restore any such sequence, we should mark - this note as handled so _cpp_process_line_notes - doesn't warn. */ - BUF_APPEND (" ", 1); - } + gcc_checking_assert (note->pos >= pos); - BUF_APPEND ("\n", 1); - break; + /* Undo any escaped newlines and trigraphs. */ + if (!accum.reading_p () && note->pos == pos) + switch (note->type) + { + case '\\': + case ' ': + /* Restore backslash followed by newline. */ + accum.append (pfile, base, pos - base); + base = pos; + accum.read_begin (pfile); + accum.append (pfile, UC"\\", 1); + + after_backslash: + if (note->type == ' ') + /* GNU backslash whitespace newline extension. FIXME + could be any sequence of non-vertical space. When we + can properly restore any such sequence, we should + mark this note as handled so _cpp_process_line_notes + doesn't warn. */ + accum.append (pfile, UC" ", 1); + + accum.append (pfile, UC"\n", 1); + note++; + break; - case 0: - /* Already handled. */ - break; + case '\n': + /* This can happen for ??/ when trigraphs are not + being interpretted. */ + gcc_checking_assert (!CPP_OPTION (pfile, trigraphs)); + note->type = 0; + note++; + break; - default: - if (_cpp_trigraph_map[note->type]) - { - /* Don't warn about this trigraph in - _cpp_process_line_notes, since trigraphs show up as - trigraphs in raw strings. */ - uchar type = note->type; - note->type = 0; - - if (!CPP_OPTION (pfile, trigraphs)) - /* If we didn't convert the trigraph in the first - place, don't do anything now either. */ - break; + default: + gcc_checking_assert (_cpp_trigraph_map[note->type]); - BUF_APPEND (base, cur - base); - base = cur; - BUF_APPEND ("??", 2); + /* Don't warn about this trigraph in + _cpp_process_line_notes, since trigraphs show up as + trigraphs in raw strings. */ + uchar type = note->type; + note->type = 0; - /* ??/ followed by newline gets two line notes, one for - the trigraph and one for the backslash/newline. */ - if (type == '/' && note[1].pos == cur) - { - if (note[1].type != '\\' - && note[1].type != ' ') - abort (); - BUF_APPEND ("/", 1); - ++note; - goto after_backslash; - } - else - { - /* Skip the replacement character. */ - base = ++cur; - BUF_APPEND (&type, 1); - c = type; - goto check_c; - } - } - else - abort (); - break; - } - } - c = *cur++; - if (__builtin_expect (temp_buffer_len < 17, 0)) - temp_buffer[temp_buffer_len++] = c; + if (CPP_OPTION (pfile, trigraphs)) + { + accum.append (pfile, base, pos - base); + base = pos; + accum.read_begin (pfile); + accum.append (pfile, UC"??", 2); + accum.append (pfile, &type, 1); + + /* ??/ followed by newline gets two line notes, one for + the trigraph and one for the backslash/newline. */ + if (type == '/' && note[1].pos == pos) + { + note++; + gcc_assert (note->type == '\\' || note->type == ' '); + goto after_backslash; + } + /* Skip the replacement character. */ + base = ++pos; + } - check_c: - if (phase == RAW_STR_PREFIX) + note++; + break; + } + + /* Now get a char to process. Either from an expanded note, or + from the line buffer. */ + bool read_note = accum.reading_p (); + char c = read_note ? accum.read_char () : *pos++; + + if (phase == PHASE_PREFIX) { - while (raw_prefix_len < temp_buffer_len) + if (c == '(') { - raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len]; - switch (raw_prefix[raw_prefix_len]) - { - case ' ': case '(': case ')': case '\\': case '\t': - case '\v': case '\f': case '\n': default: - break; - /* Basic source charset except the above chars. */ - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - case '_': case '{': case '}': case '#': case '[': case ']': - case '<': case '>': case '%': case ':': case ';': case '.': - case '?': case '*': case '+': case '-': case '/': case '^': - case '&': case '|': case '~': case '!': case '=': case ',': - case '"': case '\'': - if (raw_prefix_len < 16) - { - raw_prefix_len++; - continue; - } - break; - } - - if (raw_prefix[raw_prefix_len] != '(') - { - int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1; - if (raw_prefix_len == 16) - cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, - col, "raw string delimiter longer " - "than 16 characters"); - else if (raw_prefix[raw_prefix_len] == '\n') - cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, - col, "invalid new-line in raw " - "string delimiter"); - else - cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, - col, "invalid character '%c' in " - "raw string delimiter", - (int) raw_prefix[raw_prefix_len]); - pfile->buffer->cur = orig_base + raw_prefix_start - 1; - create_literal (pfile, token, orig_base, - raw_prefix_start - 1, CPP_OTHER); - if (first_buff) - _cpp_release_buff (pfile, first_buff); - return; - } - raw_prefix[raw_prefix_len] = '"'; - phase = RAW_STR; - /* Nothing should be appended to temp_buffer during - RAW_STR phase. */ - temp_buffer_len = 17; - break; + /* Done. */ + phase = PHASE_NONE; + prefix[prefix_len++] = '"'; } - continue; - } - else if (phase == RAW_STR_SUFFIX) - { - while (raw_suffix_len <= raw_prefix_len - && raw_suffix_len < temp_buffer_len - && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len]) - raw_suffix_len++; - if (raw_suffix_len > raw_prefix_len) - break; - if (raw_suffix_len == temp_buffer_len) + else if (prefix_len < 16 + /* Prefix chars are any of the basic character set, + [lex.charset] except for ' + ()\\\t\v\f\n'. Optimized for a contiguous + alphabet. */ + /* Unlike a switch, this collapses down to one or + two shift and bitmask operations on an ASCII + system, with an outlier or two. */ + && (('Z' - 'A' == 25 + ? ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) + : ISIDST (c)) + || (c >= '0' && c <= '9') + || c == '_' || c == '{' || c == '}' + || c == '[' || c == ']' || c == '#' + || c == '<' || c == '>' || c == '%' + || c == ':' || c == ';' || c == '.' || c == '?' + || c == '*' || c == '+' || c == '-' || c == '/' + || c == '^' || c == '&' || c == '|' || c == '~' + || c == '!' || c == '=' || c == ',' + || c == '"' || c == '\'')) + prefix[prefix_len++] = c; + else + { + /* Something is wrong. */ + int col = CPP_BUF_COLUMN (pfile->buffer, pos) + read_note; + if (prefix_len == 16) + cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, + col, "raw string delimiter longer " + "than 16 characters"); + else if (c == '\n') + cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, + col, "invalid new-line in raw " + "string delimiter"); + else + cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, + col, "invalid character '%c' in " + "raw string delimiter", c); + type = CPP_OTHER; + phase = PHASE_NONE; + /* Continue until we get a close quote, that's probably + the best failure mode. */ + prefix_len = 0; + } + if (c != '\n') continue; - phase = RAW_STR; - /* Nothing should be appended to temp_buffer during - RAW_STR phase. */ - temp_buffer_len = 17; } - if (c == ')') + + if (phase != PHASE_NONE) { - phase = RAW_STR_SUFFIX; - raw_suffix_len = 0; - temp_buffer_len = 0; + if (prefix[phase] != c) + phase = PHASE_NONE; + else if (unsigned (phase + 1) == prefix_len) + break; + else + { + phase = Phase (phase + 1); + continue; + } } - else if (c == '\n') + + if (!prefix_len && c == '"') + /* Failure mode lexing. */ + goto out; + else if (prefix_len && c == ')') + phase = PHASE_SUFFIX; + else if (!read_note && c == '\n') { + pos--; + pfile->buffer->cur = pos; if (pfile->state.in_directive || (pfile->state.parsing_args && pfile->buffer->next_line >= pfile->buffer->rlimit)) { - cur--; - type = CPP_OTHER; cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0, "unterminated raw string"); - break; + type = CPP_OTHER; + goto out; } - BUF_APPEND (base, cur - base); + accum.append (pfile, base, pos - base + 1); + _cpp_process_line_notes (pfile, false); - if (pfile->buffer->cur < pfile->buffer->rlimit) + if (pfile->buffer->next_line < pfile->buffer->rlimit) CPP_INCREMENT_LINE (pfile, 0); pfile->buffer->need_line = true; - pfile->buffer->cur = cur-1; - _cpp_process_line_notes (pfile, false); if (!_cpp_get_fresh_line (pfile)) { + /* We ran out of file and failed to get a line. */ location_t src_loc = token->src_loc; token->type = CPP_EOF; /* Tell the compiler the line number of the EOF token. */ token->src_loc = pfile->line_table->highest_line; token->flags = BOL; - if (first_buff != NULL) - _cpp_release_buff (pfile, first_buff); + if (accum.first) + _cpp_release_buff (pfile, accum.first); cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0, "unterminated raw string"); + /* Now pop the buffer that _cpp_get_fresh_line did not. */ + _cpp_pop_buffer (pfile); return; } - cur = base = pfile->buffer->cur; + pos = base = pfile->buffer->cur; note = &pfile->buffer->notes[pfile->buffer->cur_note]; } } @@ -1927,7 +1962,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, /* If a string format macro, say from inttypes.h, is placed touching a string literal it could be parsed as a C++11 user-defined string literal thus breaking the program. */ - if (is_macro_not_literal_suffix (pfile, cur)) + if (is_macro_not_literal_suffix (pfile, pos)) { /* Raise a warning, but do not consume subsequent tokens. */ if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping) @@ -1937,37 +1972,37 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, "a space between literal and string macro"); } /* Grab user defined literal suffix. */ - else if (ISIDST (*cur)) + else if (ISIDST (*pos)) { type = cpp_userdef_string_add_type (type); - ++cur; + ++pos; - while (ISIDNUM (*cur)) - ++cur; + while (ISIDNUM (*pos)) + ++pos; } } - pfile->buffer->cur = cur; - if (first_buff == NULL) - create_literal (pfile, token, base, cur - base, type); + out: + pfile->buffer->cur = pos; + if (!accum.accum) + create_literal (pfile, token, base, pos - base, type); else { - uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1); + size_t extra_len = pos - base; + uchar *dest = _cpp_unaligned_alloc (pfile, accum.accum + extra_len + 1); token->type = type; - token->val.str.len = total_len + (cur - base); + token->val.str.len = accum.accum + extra_len; token->val.str.text = dest; - last_buff = first_buff; - while (last_buff != NULL) + for (_cpp_buff *buf = accum.first; buf; buf = buf->next) { - memcpy (dest, last_buff->base, - BUFF_FRONT (last_buff) - last_buff->base); - dest += BUFF_FRONT (last_buff) - last_buff->base; - last_buff = last_buff->next; + size_t len = BUFF_FRONT (buf) - buf->base; + memcpy (dest, buf->base, len); + dest += len; } - _cpp_release_buff (pfile, first_buff); - memcpy (dest, base, cur - base); - dest[cur - base] = '\0'; + _cpp_release_buff (pfile, accum.first); + memcpy (dest, base, extra_len); + dest[extra_len] = '\0'; } } @@ -2000,7 +2035,7 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) } if (terminator == 'R') { - lex_raw_string (pfile, token, base, cur); + lex_raw_string (pfile, token, base); return; } if (terminator == '"') @@ -2526,6 +2561,15 @@ cpp_peek_token (cpp_reader *pfile, int index) index--; break; } + else if (peektok->type == CPP_PRAGMA) + { + /* Don't peek past a pragma. */ + if (peektok == &pfile->directive_result) + /* Save the pragma in the buffer. */ + *pfile->cur_token++ = *peektok; + index--; + break; + } } while (index--); @@ -2578,6 +2622,151 @@ _cpp_temp_token (cpp_reader *pfile) return result; } +/* We're at the beginning of a logical line (so not in + directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set. See + if we should enter deferred_pragma mode to tokenize the rest of the + line as a module control-line. */ + +static void +cpp_maybe_module_directive (cpp_reader *pfile, cpp_token *result) +{ + unsigned backup = 0; /* Tokens we peeked. */ + cpp_hashnode *node = result->val.node.node; + cpp_token *peek = result; + cpp_token *keyword = peek; + cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][2] = pfile->spec_nodes.n_modules; + int header_count = 0; + + /* Make sure the incoming state is as we expect it. This way we + can restore it using constants. */ + gcc_checking_assert (!pfile->state.in_deferred_pragma + && !pfile->state.skipping + && !pfile->state.parsing_args + && !pfile->state.angled_headers + && (pfile->state.save_comments + == !CPP_OPTION (pfile, discard_comments))); + + /* Enter directives mode sufficiently for peeking. We don't have + to actually set in_directive. */ + pfile->state.in_deferred_pragma = true; + + /* These two fields are needed to process tokenization in deferred + pragma mode. They are not used outside deferred pragma mode or + directives mode. */ + pfile->state.pragma_allow_expansion = true; + pfile->directive_line = result->src_loc; + + /* Saving comments is incompatible with directives mode. */ + pfile->state.save_comments = 0; + + if (node == n_modules[spec_nodes::M_EXPORT][0]) + { + peek = _cpp_lex_direct (pfile); + keyword = peek; + backup++; + if (keyword->type != CPP_NAME) + goto not_module; + node = keyword->val.node.node; + if (!(node->flags & NODE_MODULE)) + goto not_module; + } + + if (node == n_modules[spec_nodes::M__IMPORT][0]) + /* __import */ + header_count = backup + 2 + 16; + else if (node == n_modules[spec_nodes::M_IMPORT][0]) + /* import */ + header_count = backup + 2 + (CPP_OPTION (pfile, preprocessed) ? 16 : 0); + else if (node == n_modules[spec_nodes::M_MODULE][0]) + ; /* module */ + else + goto not_module; + + /* We've seen [export] {module|import|__import}. Check the next token. */ + if (header_count) + /* After '{,__}import' a header name may appear. */ + pfile->state.angled_headers = true; + peek = _cpp_lex_direct (pfile); + backup++; + + /* ... import followed by identifier, ':', '<' or + header-name preprocessing tokens, or module + followed by cpp-identifier, ':' or ';' preprocessing + tokens. C++ keywords are not yet relevant. */ + if (peek->type == CPP_NAME + || peek->type == CPP_COLON + || (header_count + ? (peek->type == CPP_LESS + || (peek->type == CPP_STRING && peek->val.str.text[0] != 'R') + || peek->type == CPP_HEADER_NAME) + : peek->type == CPP_SEMICOLON)) + { + pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed); + if (!pfile->state.pragma_allow_expansion) + pfile->state.prevent_expansion++; + + if (!header_count && linemap_included_from + (LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table))) + cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, 0, + "module control-line cannot be in included file"); + + /* The first one or two tokens cannot be macro names. */ + for (int ix = backup; ix--;) + { + cpp_token *tok = ix ? keyword : result; + cpp_hashnode *node = tok->val.node.node; + + /* Don't attempt to expand the token. */ + tok->flags |= NO_EXPAND; + if (_cpp_defined_macro_p (node) + && _cpp_maybe_notify_macro_use (pfile, node, tok->src_loc) + && !cpp_fun_like_macro_p (node)) + cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, 0, + "module control-line \"%s\" cannot be" + " an object-like macro", + NODE_NAME (node)); + } + + /* Map to underbar variants. */ + keyword->val.node.node = n_modules[header_count + ? spec_nodes::M_IMPORT + : spec_nodes::M_MODULE][1]; + if (backup != 1) + result->val.node.node = n_modules[spec_nodes::M_EXPORT][1]; + + /* Maybe tell the tokenizer we expect a header-name down the + road. */ + pfile->state.directive_file_token = header_count; + } + else + { + not_module: + /* Drop out of directive mode. */ + /* We aaserted save_comments had this value upon entry. */ + pfile->state.save_comments + = !CPP_OPTION (pfile, discard_comments); + pfile->state.in_deferred_pragma = false; + /* Do not let this remain on. */ + pfile->state.angled_headers = false; + } + + /* In either case we want to backup the peeked tokens. */ + if (backup) + { + /* If we saw EOL, we should drop it, because this isn't a module + control-line after all. */ + bool eol = peek->type == CPP_PRAGMA_EOL; + if (!eol || backup > 1) + { + /* Put put the peeked tokens back */ + _cpp_backup_tokens_direct (pfile, backup); + /* But if the last one was an EOL, forget it. */ + if (eol) + pfile->lookaheads--; + } + } +} + /* Lex a token into RESULT (external interface). Takes care of issues like directive handling, token lookahead, multiple include optimization and skipping. */ @@ -2626,6 +2815,21 @@ _cpp_lex_token (cpp_reader *pfile) } else if (pfile->state.in_deferred_pragma) result = &pfile->directive_result; + else if (result->type == CPP_NAME + && (result->val.node.node->flags & NODE_MODULE) + && !pfile->state.skipping + /* Unlike regular directives, we do not deal with + tokenizing module directives as macro arguments. + That's not permitted. */ + && !pfile->state.parsing_args) + { + /* P1857. Before macro expansion, At start of logical + line ... */ + /* We don't have to consider lookaheads at this point. */ + gcc_checking_assert (!pfile->lookaheads); + + cpp_maybe_module_directive (pfile, result); + } if (pfile->cb.line_change && !pfile->state.skipping) pfile->cb.line_change (pfile, result, pfile->state.parsing_args); @@ -2651,8 +2855,6 @@ _cpp_lex_token (cpp_reader *pfile) bool _cpp_get_fresh_line (cpp_reader *pfile) { - int return_at_eof; - /* We can't get a new line until we leave the current directive. */ if (pfile->state.in_directive) return false; @@ -2683,10 +2885,17 @@ _cpp_get_fresh_line (cpp_reader *pfile) buffer->next_line = buffer->rlimit; } - return_at_eof = buffer->return_at_eof; - _cpp_pop_buffer (pfile); - if (pfile->buffer == NULL || return_at_eof) - return false; + if (buffer->prev && !buffer->return_at_eof) + _cpp_pop_buffer (pfile); + else + { + /* End of translation. Do not pop the buffer yet. Increment + line number so that the EOF token is on a line of its own + (_cpp_lex_direct doesn't increment in that case, because + it's hard for it to distinguish this special case). */ + CPP_INCREMENT_LINE (pfile, 0); + return false; + } } } @@ -2724,22 +2933,20 @@ _cpp_lex_direct (cpp_reader *pfile) buffer = pfile->buffer; if (buffer->need_line) { - if (pfile->state.in_deferred_pragma) - { - result->type = CPP_PRAGMA_EOL; - pfile->state.in_deferred_pragma = false; - if (!pfile->state.pragma_allow_expansion) - pfile->state.prevent_expansion--; - return result; - } + gcc_assert (!pfile->state.in_deferred_pragma); if (!_cpp_get_fresh_line (pfile)) { result->type = CPP_EOF; - if (!pfile->state.in_directive) + /* Not a real EOF in a directive or arg parsing -- we refuse + to advance to the next file now, and will once we're out + of those modes. */ + if (!pfile->state.in_directive && !pfile->state.parsing_args) { /* Tell the compiler the line number of the EOF token. */ result->src_loc = pfile->line_table->highest_line; result->flags = BOL; + /* Now pop the buffer that _cpp_get_fresh_line did not. */ + _cpp_pop_buffer (pfile); } return result; } @@ -2791,6 +2998,19 @@ _cpp_lex_direct (cpp_reader *pfile) && !CPP_OPTION (pfile, traditional))) CPP_INCREMENT_LINE (pfile, 0); buffer->need_line = true; + if (pfile->state.in_deferred_pragma) + { + /* Produce the PRAGMA_EOL on this line. File reading + ensures there is always a \n at end of the buffer, thus + in a deferred pragma we always see CPP_PRAGMA_EOL before + any CPP_EOF. */ + result->type = CPP_PRAGMA_EOL; + result->flags &= ~PREV_WHITE; + pfile->state.in_deferred_pragma = false; + if (!pfile->state.pragma_allow_expansion) + pfile->state.prevent_expansion--; + return result; + } goto fresh_line; case '0': case '1': case '2': case '3': case '4': @@ -2881,7 +3101,7 @@ _cpp_lex_direct (cpp_reader *pfile) else if (c == '/' && ! CPP_OPTION (pfile, traditional)) { /* Don't warn for system headers. */ - if (cpp_in_system_header (pfile)) + if (_cpp_in_system_header (pfile)) ; /* Warn about comments if pedantically GNUC89, and not in system headers. */ @@ -2984,7 +3204,7 @@ _cpp_lex_direct (cpp_reader *pfile) buffer->cur++, result->type = CPP_LESS_EQ; if (*buffer->cur == '>' && CPP_OPTION (pfile, cplusplus) - && CPP_OPTION (pfile, lang) >= CLK_GNUCXX2A) + && CPP_OPTION (pfile, lang) >= CLK_GNUCXX20) buffer->cur++, result->type = CPP_SPACESHIP; } else if (*buffer->cur == '<') @@ -3408,7 +3628,11 @@ cpp_output_token (const cpp_token *token, FILE *fp) break; case SPELL_LITERAL: + if (token->type == CPP_HEADER_NAME) + fputc ('"', fp); fwrite (token->val.str.text, 1, token->val.str.len, fp); + if (token->type == CPP_HEADER_NAME) + fputc ('"', fp); break; case SPELL_NONE: @@ -3894,6 +4118,188 @@ do_peek_prev (const unsigned char *peek, const unsigned char *bound) return peek; } +/* If PEEK[-1] is identifier MATCH, scan past it and trailing white + space. Otherwise return NULL. */ + +static const unsigned char * +do_peek_ident (const char *match, const unsigned char *peek, + const unsigned char *limit) +{ + for (; *++match; peek++) + if (*peek != *match) + { + peek = do_peek_next (peek, limit); + if (*peek != *match) + return NULL; + } + + /* Must now not be looking at an identifier char. */ + peek = do_peek_next (peek, limit); + if (ISIDNUM (*peek)) + return NULL; + + /* Skip control-line whitespace. */ + ws: + while (*peek == ' ' || *peek == '\t') + peek++; + if (__builtin_expect (*peek == '\\', false)) + { + peek = do_peek_backslash (peek, limit); + if (*peek != '\\') + goto ws; + } + + return peek; +} + +/* Are we looking at a module control line starting as PEEK - 1? */ + +static bool +do_peek_module (cpp_reader *pfile, unsigned char c, + const unsigned char *peek, const unsigned char *limit) +{ + bool import = false; + + if (__builtin_expect (c == 'e', false)) + { + if (!((peek[0] == 'x' || peek[0] == '\\') + && (peek = do_peek_ident ("export", peek, limit)))) + return false; + + /* export, peek for import or module. No need to peek __import + here. */ + if (peek[0] == 'i') + { + if (!((peek[1] == 'm' || peek[1] == '\\') + && (peek = do_peek_ident ("import", peek + 1, limit)))) + return false; + import = true; + } + else if (peek[0] == 'm') + { + if (!((peek[1] == 'o' || peek[1] == '\\') + && (peek = do_peek_ident ("module", peek + 1, limit)))) + return false; + } + else + return false; + } + else if (__builtin_expect (c == 'i', false)) + { + if (!((peek[0] == 'm' || peek[0] == '\\') + && (peek = do_peek_ident ("import", peek, limit)))) + return false; + import = true; + } + else if (__builtin_expect (c == '_', false)) + { + /* Needed for translated includes. */ + if (!((peek[0] == '_' || peek[0] == '\\') + && (peek = do_peek_ident ("__import", peek, limit)))) + return false; + import = true; + } + else if (__builtin_expect (c == 'm', false)) + { + if (!((peek[0] == 'o' || peek[0] == '\\') + && (peek = do_peek_ident ("module", peek, limit)))) + return false; + } + else + return false; + + /* Peek the next character to see if it's good enough. We'll be at + the first non-whitespace char, including skipping an escaped + newline. */ + /* ... import followed by identifier, ':', '<' or header-name + preprocessing tokens, or module followed by identifier, ':' or + ';' preprocessing tokens. */ + unsigned char p = *peek++; + + /* A character literal is ... single quotes, ... optionally preceded + by u8, u, U, or L */ + /* A string-literal is a ... double quotes, optionally prefixed by + R, u8, u8R, u, uR, U, UR, L, or LR */ + if (p == 'u') + { + peek = do_peek_next (peek, limit); + if (*peek == '8') + { + peek++; + goto peek_u8; + } + goto peek_u; + } + else if (p == 'U' || p == 'L') + { + peek_u8: + peek = do_peek_next (peek, limit); + peek_u: + if (*peek == '\"' || *peek == '\'') + return false; + + if (*peek == 'R') + goto peek_R; + /* Identifier. Ok. */ + } + else if (p == 'R') + { + peek_R: + if (CPP_OPTION (pfile, rliterals)) + { + peek = do_peek_next (peek, limit); + if (*peek == '\"') + return false; + } + /* Identifier. Ok. */ + } + else if ('Z' - 'A' == 25 + ? ((p >= 'A' && p <= 'Z') || (p >= 'a' && p <= 'z') || p == '_') + : ISIDST (p)) + { + /* Identifier. Ok. */ + } + else if (p == '<') + { + /* Maybe angle header, ok for import. Reject + '<=', '<<' digraph:'<:'. */ + if (!import) + return false; + peek = do_peek_next (peek, limit); + if (*peek == '=' || *peek == '<' + || (*peek == ':' && CPP_OPTION (pfile, digraphs))) + return false; + } + else if (p == ';') + { + /* SEMICOLON, ok for module. */ + if (import) + return false; + } + else if (p == '"') + { + /* STRING, ok for import. */ + if (!import) + return false; + } + else if (p == ':') + { + /* Maybe COLON, ok. Reject '::', digraph:':>'. */ + peek = do_peek_next (peek, limit); + if (*peek == ':' || (*peek == '>' && CPP_OPTION (pfile, digraphs))) + return false; + } + else + /* FIXME: Detect a unicode character, excluding those not + permitted as the initial character. [lex.name]/1. I presume + we need to check the \[uU] spellings, and directly using + Unicode in say UTF8 form? Or perhaps we do the phase-1 + conversion of UTF8 to universal-character-names? */ + return false; + + return true; +} + /* Directives-only scanning. Somewhat more relaxed than correct parsing -- some ill-formed programs will not be rejected. */ @@ -3902,6 +4308,8 @@ cpp_directive_only_process (cpp_reader *pfile, void *data, void (*cb) (cpp_reader *, CPP_DO_task, void *, ...)) { + bool module_p = CPP_OPTION (pfile, module_directives); + do { restart: @@ -3975,7 +4383,8 @@ cpp_directive_only_process (cpp_reader *pfile, /* Prep things for directive handling. */ buffer->next_line = pos; buffer->need_line = true; - _cpp_get_fresh_line (pfile); + bool ok = _cpp_get_fresh_line (pfile); + gcc_checking_assert (ok); /* Ensure proper column numbering for generated error messages. */ @@ -4293,6 +4702,51 @@ cpp_directive_only_process (cpp_reader *pfile, } goto dflt; + case '_': + case 'e': + case 'i': + case 'm': + if (bol && module_p && !pfile->state.skipping + && do_peek_module (pfile, c, pos, limit)) + { + /* We've seen the start of a module control line. + Start up the tokenizer. */ + pos--; /* Backup over the first character. */ + + /* Backup over whitespace to start of line. */ + while (pos > line_start + && (pos[-1] == ' ' || pos[-1] == '\t')) + pos--; + + if (pos > base) + cb (pfile, CPP_DO_print, data, line_count, base, pos - base); + + /* Prep things for directive handling. */ + buffer->next_line = pos; + buffer->need_line = true; + + /* Now get tokens until the PRAGMA_EOL. */ + do + { + location_t spelling; + const cpp_token *tok + = cpp_get_token_with_location (pfile, &spelling); + + gcc_assert (pfile->state.in_deferred_pragma + || tok->type == CPP_PRAGMA_EOL); + cb (pfile, CPP_DO_token, data, tok, spelling); + } + while (pfile->state.in_deferred_pragma); + + if (pfile->buffer->next_line < pfile->buffer->rlimit) + cb (pfile, CPP_DO_location, data, + pfile->line_table->highest_line); + + pfile->mi_valid = false; + goto restart; + } + goto dflt; + default: dflt: bol = false;