yylloc->last_column = yycolumn + 1; \
parser->has_new_line_number = 0; \
parser->has_new_source_number = 0; \
- } while(0);
+ } while(0);
#define YY_USER_INIT \
do { \
* of RETURN_TOKEN that performs a string copy of yytext before the
* return.
*/
-#define RETURN_TOKEN_NEVER_SKIP(token) \
- do { \
- if (token == NEWLINE) \
- parser->last_token_was_newline = 1; \
- else \
- parser->last_token_was_newline = 0; \
- return (token); \
+#define RETURN_TOKEN_NEVER_SKIP(token) \
+ do { \
+ if (glcpp_lex_update_state_per_token (parser, token)) \
+ return token; \
} while (0)
#define RETURN_TOKEN(token) \
} \
} while(0)
+
+/* Update all state necessary for each token being returned.
+ *
+ * Here we'll be tracking newlines and spaces so that the lexer can
+ * alter its behavior as necessary, (for example, '#' has special
+ * significance if it is the first non-whitespace, non-comment token
+ * in a line, but does not otherwise).
+ *
+ * NOTE: If this function returns FALSE, then no token should be
+ * returned at all. This is used to suprress duplicate SPACE tokens.
+ */
+static int
+glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token)
+{
+ /* After the first non-space token in a line, we won't
+ * allow any '#' to introduce a directive. */
+ if (token == NEWLINE) {
+ parser->first_non_space_token_this_line = 1;
+ } else if (token != SPACE) {
+ parser->first_non_space_token_this_line = 0;
+ }
+
+ /* Track newlines just to know whether a newline needs
+ * to be inserted if end-of-file comes early. */
+ if (token == NEWLINE) {
+ parser->last_token_was_newline = 1;
+ } else {
+ parser->last_token_was_newline = 0;
+ }
+
+ /* Track spaces to avoid emitting multiple SPACE
+ * tokens in a row. */
+ if (token == SPACE) {
+ if (! parser->last_token_was_space) {
+ parser->last_token_was_space = 1;
+ return 1;
+ } else {
+ parser->last_token_was_space = 1;
+ return 0;
+ }
+ } else {
+ parser->last_token_was_space = 0;
+ return 1;
+ }
+}
+
+
%}
%option bison-bridge bison-locations reentrant noyywrap
%option stack
%option never-interactive
-%x DONE COMMENT UNREACHABLE DEFINE NEWLINE_CATCHUP
+%x DONE COMMENT HASH UNREACHABLE DEFINE NEWLINE_CATCHUP
SPACE [[:space:]]
NONSPACE [^[:space:]]
NEWLINE [\n]
HSPACE [ \t]
-HASH ^{HSPACE}*#{HSPACE}*
+HASH #
IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]*
PP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])*
PUNCTUATION [][(){}.&*~!/%<>^|;,=+-]
parser->commented_newlines--;
if (parser->commented_newlines == 0)
BEGIN INITIAL;
- RETURN_TOKEN (NEWLINE);
+ RETURN_TOKEN_NEVER_SKIP (NEWLINE);
}
/* Set up the parser->skipping bit here before doing any lexing.
}
/* Multi-line comments */
-<DEFINE,INITIAL>"/*" { yy_push_state(COMMENT, yyscanner); }
+<DEFINE,HASH,INITIAL>"/*" { yy_push_state(COMMENT, yyscanner); }
<COMMENT>[^*\n]*
<COMMENT>[^*\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; }
<COMMENT>"*"+[^*/\n]*
<COMMENT>"*"+[^*/\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; }
<COMMENT>"*"+"/" {
yy_pop_state(yyscanner);
- if (yyextra->space_tokens)
+ /* In the <HASH> start condition, we don't want any SPACE token. */
+ if (yyextra->space_tokens && YY_START != HASH)
RETURN_TOKEN (SPACE);
}
-{HASH}version{HSPACE}+ {
+{HASH} {
+
+ /* If the '#' is the first non-whitespace, non-comment token on this
+ * line, then it introduces a directive, switch to the <HASH> start
+ * condition.
+ *
+ * Otherwise, this is just punctuation, so return the HASH_TOKEN
+ * token. */
+ if (parser->first_non_space_token_this_line) {
+ BEGIN HASH;
+ }
+
+ RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN);
+}
+
+<HASH>version{HSPACE}+ {
+ BEGIN INITIAL;
yyextra->space_tokens = 0;
RETURN_STRING_TOKEN (HASH_VERSION);
}
/* glcpp doesn't handle #extension, #version, or #pragma directives.
* Simply pass them through to the main compiler's lexer/parser. */
-{HASH}(extension|pragma)[^\n]* {
+<HASH>(extension|pragma)[^\n]* {
+ BEGIN INITIAL;
yylineno++;
yycolumn = 0;
- RETURN_STRING_TOKEN (OTHER);
+ RETURN_STRING_TOKEN (HASH_PRAGMA);
}
-{HASH}line{HSPACE}+ {
+<HASH>line{HSPACE}+ {
+ BEGIN INITIAL;
RETURN_TOKEN (HASH_LINE);
}
+<HASH>\n {
+ BEGIN INITIAL;
+ RETURN_TOKEN_NEVER_SKIP (NEWLINE);
+}
+
/* For the pre-processor directives, we return these tokens
* even when we are otherwise skipping. */
-{HASH}ifdef {
+<HASH>ifdef {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
RETURN_TOKEN_NEVER_SKIP (HASH_IFDEF);
}
-{HASH}ifndef {
+<HASH>ifndef {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
RETURN_TOKEN_NEVER_SKIP (HASH_IFNDEF);
}
-{HASH}if/[^_a-zA-Z0-9] {
+<HASH>if/[^_a-zA-Z0-9] {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
RETURN_TOKEN_NEVER_SKIP (HASH_IF);
}
-{HASH}elif/[^_a-zA-Z0-9] {
+<HASH>elif/[^_a-zA-Z0-9] {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
RETURN_TOKEN_NEVER_SKIP (HASH_ELIF);
}
-{HASH}else {
+<HASH>else {
+ BEGIN INITIAL;
yyextra->space_tokens = 0;
RETURN_TOKEN_NEVER_SKIP (HASH_ELSE);
}
-{HASH}endif {
+<HASH>endif {
+ BEGIN INITIAL;
yyextra->space_tokens = 0;
RETURN_TOKEN_NEVER_SKIP (HASH_ENDIF);
}
-{HASH}error.* {
- if (! parser->skipping) {
- char *p;
- for (p = yytext; !isalpha(p[0]); p++); /* skip " # " */
- p += 5; /* skip "error" */
- glcpp_error(yylloc, yyextra, "#error%s", p);
- }
+<HASH>error.* {
+ BEGIN INITIAL;
+ RETURN_STRING_TOKEN (HASH_ERROR);
}
/* After we see a "#define" we enter the <DEFINE> start state
* * Anything else, (not an identifier, not a comment,
* and not whitespace). This will generate an error.
*/
-{HASH}define{HSPACE}+ {
+<HASH>define{HSPACE}+ {
if (! parser->skipping) {
BEGIN DEFINE;
yyextra->space_tokens = 0;
}
}
+<HASH>undef {
+ BEGIN INITIAL;
+ yyextra->space_tokens = 0;
+ RETURN_TOKEN (HASH_UNDEF);
+}
+
+<HASH>{HSPACE}+ {
+ /* Nothing to do here. Importantly, don't leave the <HASH>
+ * start condition, since it's legal to have space between the
+ * '#' and the directive.. */
+}
+
+ /* This will catch any non-directive garbage after a HASH */
+<HASH>{NONSPACE} {
+ BEGIN INITIAL;
+ RETURN_TOKEN (HASH_GARBAGE);
+}
+
/* An identifier immediately followed by '(' */
<DEFINE>{IDENTIFIER}/"(" {
BEGIN INITIAL;
RETURN_STRING_TOKEN (INTEGER_STRING);
}
-{HASH}undef {
- yyextra->space_tokens = 0;
- RETURN_TOKEN (HASH_UNDEF);
-}
-
-{HASH} {
- yyextra->space_tokens = 0;
- RETURN_TOKEN (HASH_TOKEN);
-}
-
{DECIMAL_INTEGER} {
RETURN_STRING_TOKEN (INTEGER_STRING);
}
RETURN_TOKEN_NEVER_SKIP (NEWLINE);
}
-<INITIAL,COMMENT,DEFINE><<EOF>> {
+<INITIAL,COMMENT,DEFINE,HASH><<EOF>> {
if (YY_START == COMMENT)
glcpp_error(yylloc, yyextra, "Unterminated comment");
if (YY_START == DEFINE)
/* We use HASH_TOKEN, not HASH to avoid a conflict with the <HASH>
* start condition in the lexer. */
-%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH_TOKEN HASH_DEFINE FUNC_IDENTIFIER OBJ_IDENTIFIER HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_LINE HASH_UNDEF HASH_VERSION IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING LINE_EXPANDED NEWLINE OTHER PLACEHOLDER SPACE
+%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH_TOKEN HASH_DEFINE FUNC_IDENTIFIER OBJ_IDENTIFIER HASH_ELIF HASH_ELSE HASH_ENDIF HASH_ERROR HASH_IF HASH_IFDEF HASH_IFNDEF HASH_LINE HASH_PRAGMA HASH_UNDEF HASH_VERSION HASH_GARBAGE IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING LINE_EXPANDED NEWLINE OTHER PLACEHOLDER SPACE
%token PASTE
%type <ival> INTEGER operator SPACE integer_constant
%type <expression_value> expression
-%type <str> IDENTIFIER FUNC_IDENTIFIER OBJ_IDENTIFIER INTEGER_STRING OTHER
+%type <str> IDENTIFIER FUNC_IDENTIFIER OBJ_IDENTIFIER INTEGER_STRING OTHER HASH_ERROR HASH_PRAGMA
%type <string_list> identifier_list
%type <token> preprocessing_token conditional_token
%type <token_list> pp_tokens replacement_list text_line conditional_tokens
;
line:
- control_line {
- ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n");
- }
-| HASH_LINE {
- glcpp_parser_resolve_implicit_version(parser);
- } pp_tokens NEWLINE {
-
- if (parser->skip_stack == NULL ||
- parser->skip_stack->type == SKIP_NO_SKIP)
- {
- _glcpp_parser_expand_and_lex_from (parser,
- LINE_EXPANDED, $3);
- }
- }
+ control_line
+| SPACE control_line
| text_line {
_glcpp_parser_print_expanded_token_list (parser, $1);
ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n");
ralloc_free ($1);
}
| expanded_line
-| HASH_TOKEN non_directive
;
expanded_line:
;
control_line:
- HASH_DEFINE {
+ control_line_success {
+ ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n");
+ }
+| control_line_error
+| HASH_TOKEN HASH_LINE {
+ glcpp_parser_resolve_implicit_version(parser);
+ } pp_tokens NEWLINE {
+
+ if (parser->skip_stack == NULL ||
+ parser->skip_stack->type == SKIP_NO_SKIP)
+ {
+ _glcpp_parser_expand_and_lex_from (parser,
+ LINE_EXPANDED, $4);
+ }
+ }
+;
+
+control_line_success:
+ HASH_TOKEN HASH_DEFINE {
glcpp_parser_resolve_implicit_version(parser);
} define
-| HASH_UNDEF {
+| HASH_TOKEN HASH_UNDEF {
glcpp_parser_resolve_implicit_version(parser);
} IDENTIFIER NEWLINE {
macro_t *macro;
- if (strcmp("__LINE__", $3) == 0
- || strcmp("__FILE__", $3) == 0
- || strcmp("__VERSION__", $3) == 0)
+ if (strcmp("__LINE__", $4) == 0
+ || strcmp("__FILE__", $4) == 0
+ || strcmp("__VERSION__", $4) == 0)
glcpp_error(& @1, parser, "Built-in (pre-defined)"
" macro names can not be undefined.");
- macro = hash_table_find (parser->defines, $3);
+ macro = hash_table_find (parser->defines, $4);
if (macro) {
- hash_table_remove (parser->defines, $3);
+ hash_table_remove (parser->defines, $4);
ralloc_free (macro);
}
- ralloc_free ($3);
+ ralloc_free ($4);
}
-| HASH_IF {
+| HASH_TOKEN HASH_IF {
glcpp_parser_resolve_implicit_version(parser);
} conditional_tokens NEWLINE {
/* Be careful to only evaluate the 'if' expression if
parser->skip_stack->type == SKIP_NO_SKIP)
{
_glcpp_parser_expand_and_lex_from (parser,
- IF_EXPANDED, $3);
+ IF_EXPANDED, $4);
}
else
{
parser->skip_stack->type = SKIP_TO_ENDIF;
}
}
-| HASH_IF NEWLINE {
+| HASH_TOKEN HASH_IF NEWLINE {
/* #if without an expression is only an error if we
* are not skipping */
if (parser->skip_stack == NULL ||
}
_glcpp_parser_skip_stack_push_if (parser, & @1, 0);
}
-| HASH_IFDEF {
+| HASH_TOKEN HASH_IFDEF {
glcpp_parser_resolve_implicit_version(parser);
} IDENTIFIER junk NEWLINE {
- macro_t *macro = hash_table_find (parser->defines, $3);
- ralloc_free ($3);
+ macro_t *macro = hash_table_find (parser->defines, $4);
+ ralloc_free ($4);
_glcpp_parser_skip_stack_push_if (parser, & @1, macro != NULL);
}
-| HASH_IFNDEF {
+| HASH_TOKEN HASH_IFNDEF {
glcpp_parser_resolve_implicit_version(parser);
} IDENTIFIER junk NEWLINE {
- macro_t *macro = hash_table_find (parser->defines, $3);
- ralloc_free ($3);
- _glcpp_parser_skip_stack_push_if (parser, & @2, macro == NULL);
+ macro_t *macro = hash_table_find (parser->defines, $4);
+ ralloc_free ($4);
+ _glcpp_parser_skip_stack_push_if (parser, & @3, macro == NULL);
}
-| HASH_ELIF conditional_tokens NEWLINE {
+| HASH_TOKEN HASH_ELIF conditional_tokens NEWLINE {
/* Be careful to only evaluate the 'elif' expression
* if we are not skipping. When we are skipping, we
* simply change to a 0-valued 'elif' on the skip
parser->skip_stack->type == SKIP_TO_ELSE)
{
_glcpp_parser_expand_and_lex_from (parser,
- ELIF_EXPANDED, $2);
+ ELIF_EXPANDED, $3);
}
else if (parser->skip_stack &&
parser->skip_stack->has_else)
"elif", 0);
}
}
-| HASH_ELIF NEWLINE {
+| HASH_TOKEN HASH_ELIF NEWLINE {
/* #elif without an expression is an error unless we
* are skipping. */
if (parser->skip_stack &&
glcpp_warning(& @1, parser, "ignoring illegal #elif without expression");
}
}
-| HASH_ELSE { parser->lexing_directive = 1; } NEWLINE {
+| HASH_TOKEN HASH_ELSE { parser->lexing_directive = 1; } NEWLINE {
if (parser->skip_stack &&
parser->skip_stack->has_else)
{
parser->skip_stack->has_else = true;
}
}
-| HASH_ENDIF {
+| HASH_TOKEN HASH_ENDIF {
_glcpp_parser_skip_stack_pop (parser, & @1);
} NEWLINE
-| HASH_VERSION integer_constant NEWLINE {
+| HASH_TOKEN HASH_VERSION integer_constant NEWLINE {
if (parser->version_resolved) {
glcpp_error(& @1, parser, "#version must appear on the first line");
}
- _glcpp_parser_handle_version_declaration(parser, $2, NULL, true);
+ _glcpp_parser_handle_version_declaration(parser, $3, NULL, true);
}
-| HASH_VERSION integer_constant IDENTIFIER NEWLINE {
+| HASH_TOKEN HASH_VERSION integer_constant IDENTIFIER NEWLINE {
if (parser->version_resolved) {
glcpp_error(& @1, parser, "#version must appear on the first line");
}
- _glcpp_parser_handle_version_declaration(parser, $2, $3, true);
+ _glcpp_parser_handle_version_declaration(parser, $3, $4, true);
}
| HASH_TOKEN NEWLINE {
glcpp_parser_resolve_implicit_version(parser);
}
+| HASH_TOKEN HASH_PRAGMA NEWLINE {
+ ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "#%s", $2);
+ }
+;
+
+control_line_error:
+ HASH_TOKEN HASH_ERROR NEWLINE {
+ glcpp_error(& @1, parser, "#%s", $2);
+ }
+| HASH_TOKEN HASH_GARBAGE pp_tokens NEWLINE {
+ glcpp_error (& @1, parser, "Illegal non-directive after #");
+ }
;
integer_constant:
| pp_tokens NEWLINE
;
-non_directive:
- pp_tokens NEWLINE {
- yyerror (& @1, parser, "Invalid tokens after #");
- }
-;
-
replacement_list:
/* empty */ { $$ = NULL; }
| pp_tokens
parser->active = NULL;
parser->lexing_directive = 0;
parser->space_tokens = 1;
- parser->last_token_was_newline = 0;
+ parser->last_token_was_newline = 0;
+ parser->last_token_was_space = 0;
+ parser->first_non_space_token_this_line = 1;
parser->newline_as_space = 0;
parser->in_control_line = 0;
parser->paren_count = 0;
int lexing_directive;
int space_tokens;
int last_token_was_newline;
+ int last_token_was_space;
+ int first_non_space_token_this_line;
int newline_as_space;
int in_control_line;
int paren_count;
- this is four tokens with spaces
+ this is four tokens with spaces
0:1(1): preprocessor error: #error human error
-
1:0(1): preprocessor error: #error source 1, line 0 error
2:30(1): preprocessor error: #error source 2, line 30 error
#line 0
-
#line 25
-
#line 0 1
-
#line 30 2
-
#line 45 2
switch (1) {
- case 1 + 2:
- break;
+ case 1 + 2:
+ break;
}
-0:1(2): preprocessor error: Invalid tokens after #
+0:1(1): preprocessor error: Illegal non-directive after #
-0:1(2): preprocessor error: Invalid tokens after #
+0:1(1): preprocessor error: Illegal non-directive after #
-0:2(2): preprocessor error: Invalid tokens after #
+0:2(1): preprocessor error: Illegal non-directive after #
-
+
#version 300
- #pragma Testing spaces before hash
+#pragma Testing spaces before hash
#line 3
-0:1(1): preprocessor error: #define without macro name
-0:1(1): preprocessor error: syntax error, unexpected NEWLINE, expecting FUNC_IDENTIFIER or OBJ_IDENTIFIER
+0:1(2): preprocessor error: #define without macro name
+0:1(2): preprocessor error: syntax error, unexpected NEWLINE, expecting FUNC_IDENTIFIER or OBJ_IDENTIFIER
--- /dev/null
+/*...*/ # /*...*/ version 300
+ /*...*/#/*...*/ extension whatever
+ /*..*/ # /*..*/ pragma ignored
+/**/ # /**/ line 4
+ /*...*/# /*...*/ ifdef NOT_DEFINED
+ /*...*/# /*...*/ else
+ /*..*/ #/*..*/ endif
+ /*...*/# /*...*/ ifndef ALSO_NOT_DEFINED
+ /*...*/# /*...*/ else
+ /*..*/ #/*..*/ endif
+/*...*/ # /*...*/ if 0
+ /*...*/#/*...*/ elif 1
+ /*..*/ # /*..*/ else
+ /**/ # /**/ endif
+ /*...*/# /*...*/ define FOO bar
+ /*..*/ #/*..*/ define FUNC() baz
+ /*..*/ # /*..*/ define FUNC2(a,b) b a
+FOO
+FUNC()
+FUNC2(x,y)
+
+
--- /dev/null
+#version 300
+#extension whatever
+#pragma ignored
+#line 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+bar
+baz
+y x
+
+