yylloc->last_column = yycolumn + 1; \
parser->has_new_line_number = 0; \
parser->has_new_source_number = 0; \
- } while(0);
+ } while(0);
#define YY_USER_INIT \
do { \
* of RETURN_TOKEN that performs a string copy of yytext before the
* return.
*/
-#define RETURN_TOKEN_NEVER_SKIP(token) \
- do { \
- if (token == NEWLINE) \
- parser->last_token_was_newline = 1; \
- else \
- parser->last_token_was_newline = 0; \
- return (token); \
+#define RETURN_TOKEN_NEVER_SKIP(token) \
+ do { \
+ if (glcpp_lex_update_state_per_token (parser, token)) \
+ return token; \
} while (0)
#define RETURN_TOKEN(token) \
} \
} while(0)
+
+/* Update all state necessary for each token being returned.
+ *
+ * Here we'll be tracking newlines and spaces so that the lexer can
+ * alter its behavior as necessary, (for example, '#' has special
+ * significance if it is the first non-whitespace, non-comment token
+ * in a line, but does not otherwise).
+ *
+ * NOTE: If this function returns FALSE, then no token should be
+ * returned at all. This is used to suprress duplicate SPACE tokens.
+ */
+static int
+glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token)
+{
+ /* After the first non-space token in a line, we won't
+ * allow any '#' to introduce a directive. */
+ if (token == NEWLINE) {
+ parser->first_non_space_token_this_line = 1;
+ } else if (token != SPACE) {
+ parser->first_non_space_token_this_line = 0;
+ }
+
+ /* Track newlines just to know whether a newline needs
+ * to be inserted if end-of-file comes early. */
+ if (token == NEWLINE) {
+ parser->last_token_was_newline = 1;
+ } else {
+ parser->last_token_was_newline = 0;
+ }
+
+ /* Track spaces to avoid emitting multiple SPACE
+ * tokens in a row. */
+ if (token == SPACE) {
+ if (! parser->last_token_was_space) {
+ parser->last_token_was_space = 1;
+ return 1;
+ } else {
+ parser->last_token_was_space = 1;
+ return 0;
+ }
+ } else {
+ parser->last_token_was_space = 0;
+ return 1;
+ }
+}
+
+
%}
%option bison-bridge bison-locations reentrant noyywrap
%option prefix="glcpp_"
%option stack
%option never-interactive
+%option warn nodefault
-%x DONE COMMENT UNREACHABLE DEFINE NEWLINE_CATCHUP
+ /* Note: When adding any start conditions to this list, you must also
+ * update the "Internal compiler error" catch-all rule near the end of
+ * this file. */
+
+%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE
SPACE [[:space:]]
NONSPACE [^[:space:]]
-NEWLINE [\n]
HSPACE [ \t]
-HASH ^{HSPACE}*#{HSPACE}*
+HASH #
IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]*
PP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])*
PUNCTUATION [][(){}.&*~!/%<>^|;,=+-]
parser->commented_newlines--;
if (parser->commented_newlines == 0)
BEGIN INITIAL;
- RETURN_TOKEN (NEWLINE);
+ RETURN_TOKEN_NEVER_SKIP (NEWLINE);
}
/* Set up the parser->skipping bit here before doing any lexing.
}
/* Single-line comments */
-"//"[^\n]* {
+<INITIAL,DEFINE,HASH>"//"[^\r\n]* {
}
/* Multi-line comments */
-<DEFINE,INITIAL>"/*" { yy_push_state(COMMENT, yyscanner); }
-<COMMENT>[^*\n]*
-<COMMENT>[^*\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; }
-<COMMENT>"*"+[^*/\n]*
-<COMMENT>"*"+[^*/\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; }
+<INITIAL,DEFINE,HASH>"/*" { yy_push_state(COMMENT, yyscanner); }
+<COMMENT>[^*\r\n]*
+<COMMENT>[^*\r\n]*[\r\n] { yylineno++; yycolumn = 0; parser->commented_newlines++; }
+<COMMENT>"*"+[^*/\r\n]*
+<COMMENT>"*"+[^*/\r\n]*[\r\n] { yylineno++; yycolumn = 0; parser->commented_newlines++; }
<COMMENT>"*"+"/" {
yy_pop_state(yyscanner);
- if (yyextra->space_tokens)
+ /* In the <HASH> start condition, we don't want any SPACE token. */
+ if (yyextra->space_tokens && YY_START != HASH)
RETURN_TOKEN (SPACE);
}
-{HASH}version{HSPACE}+ {
+{HASH} {
+
+ /* If the '#' is the first non-whitespace, non-comment token on this
+ * line, then it introduces a directive, switch to the <HASH> start
+ * condition.
+ *
+ * Otherwise, this is just punctuation, so return the HASH_TOKEN
+ * token. */
+ if (parser->first_non_space_token_this_line) {
+ BEGIN HASH;
+ }
+
+ RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN);
+}
+
+<HASH>version{HSPACE}+ {
+ BEGIN INITIAL;
yyextra->space_tokens = 0;
- RETURN_STRING_TOKEN (HASH_VERSION);
+ RETURN_STRING_TOKEN (VERSION_TOKEN);
+}
+
+ /* Swallow empty #pragma directives, (to avoid confusing the
+ * downstream compiler). */
+<HASH>pragma{HSPACE}*/[\r\n] {
+ BEGIN INITIAL;
}
/* glcpp doesn't handle #extension, #version, or #pragma directives.
* Simply pass them through to the main compiler's lexer/parser. */
-{HASH}(extension|pragma)[^\n]* {
- yylineno++;
- yycolumn = 0;
- RETURN_STRING_TOKEN (OTHER);
+<HASH>(extension|pragma)[^\r\n]* {
+ BEGIN INITIAL;
+ RETURN_STRING_TOKEN (PRAGMA);
}
-{HASH}line{HSPACE}+ {
- RETURN_TOKEN (HASH_LINE);
+<HASH>line{HSPACE}+ {
+ BEGIN INITIAL;
+ RETURN_TOKEN (LINE);
+}
+
+<HASH>\n {
+ BEGIN INITIAL;
+ RETURN_TOKEN_NEVER_SKIP (NEWLINE);
}
/* For the pre-processor directives, we return these tokens
* even when we are otherwise skipping. */
-{HASH}ifdef {
+<HASH>ifdef {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
- RETURN_TOKEN_NEVER_SKIP (HASH_IFDEF);
+ RETURN_TOKEN_NEVER_SKIP (IFDEF);
}
-{HASH}ifndef {
+<HASH>ifndef {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
- RETURN_TOKEN_NEVER_SKIP (HASH_IFNDEF);
+ RETURN_TOKEN_NEVER_SKIP (IFNDEF);
}
-{HASH}if/[^_a-zA-Z0-9] {
+<HASH>if/[^_a-zA-Z0-9] {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
- RETURN_TOKEN_NEVER_SKIP (HASH_IF);
+ RETURN_TOKEN_NEVER_SKIP (IF);
}
-{HASH}elif/[^_a-zA-Z0-9] {
+<HASH>elif/[^_a-zA-Z0-9] {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
- RETURN_TOKEN_NEVER_SKIP (HASH_ELIF);
+ RETURN_TOKEN_NEVER_SKIP (ELIF);
}
-{HASH}else {
+<HASH>else {
+ BEGIN INITIAL;
yyextra->space_tokens = 0;
- RETURN_TOKEN_NEVER_SKIP (HASH_ELSE);
+ RETURN_TOKEN_NEVER_SKIP (ELSE);
}
-{HASH}endif {
+<HASH>endif {
+ BEGIN INITIAL;
yyextra->space_tokens = 0;
- RETURN_TOKEN_NEVER_SKIP (HASH_ENDIF);
+ RETURN_TOKEN_NEVER_SKIP (ENDIF);
}
-{HASH}error.* {
- if (! parser->skipping) {
- char *p;
- for (p = yytext; !isalpha(p[0]); p++); /* skip " # " */
- p += 5; /* skip "error" */
- glcpp_error(yylloc, yyextra, "#error%s", p);
- }
+<HASH>error.* {
+ BEGIN INITIAL;
+ RETURN_STRING_TOKEN (ERROR_TOKEN);
}
/* After we see a "#define" we enter the <DEFINE> start state
* * Anything else, (not an identifier, not a comment,
* and not whitespace). This will generate an error.
*/
-{HASH}define{HSPACE}+ {
+<HASH>define{HSPACE}* {
if (! parser->skipping) {
BEGIN DEFINE;
yyextra->space_tokens = 0;
- RETURN_TOKEN (HASH_DEFINE);
+ RETURN_TOKEN (DEFINE_TOKEN);
}
}
+<HASH>undef {
+ BEGIN INITIAL;
+ yyextra->space_tokens = 0;
+ RETURN_TOKEN (UNDEF);
+}
+
+<HASH>{HSPACE}+ {
+ /* Nothing to do here. Importantly, don't leave the <HASH>
+ * start condition, since it's legal to have space between the
+ * '#' and the directive.. */
+}
+
+ /* This will catch any non-directive garbage after a HASH */
+<HASH>{NONSPACE} {
+ BEGIN INITIAL;
+ RETURN_TOKEN (GARBAGE);
+}
+
/* An identifier immediately followed by '(' */
<DEFINE>{IDENTIFIER}/"(" {
BEGIN INITIAL;
RETURN_STRING_TOKEN (INTEGER_STRING);
}
-{HASH}undef {
- yyextra->space_tokens = 0;
- RETURN_TOKEN (HASH_UNDEF);
-}
-
-{HASH} {
- yyextra->space_tokens = 0;
- RETURN_TOKEN (HASH);
-}
-
{DECIMAL_INTEGER} {
RETURN_STRING_TOKEN (INTEGER_STRING);
}
RETURN_TOKEN (OR);
}
+"++" {
+ RETURN_TOKEN (PLUS_PLUS);
+}
+
+"--" {
+ RETURN_TOKEN (MINUS_MINUS);
+}
+
"##" {
if (! parser->skipping) {
if (parser->is_gles)
/* We preserve all newlines, even between #if 0..#endif, so no
skipping.. */
-\n {
+<*>[\r\n] {
if (parser->commented_newlines) {
BEGIN NEWLINE_CATCHUP;
+ } else {
+ BEGIN INITIAL;
}
yyextra->space_tokens = 1;
yyextra->lexing_directive = 0;
RETURN_TOKEN_NEVER_SKIP (NEWLINE);
}
-<INITIAL,COMMENT,DEFINE><<EOF>> {
+<INITIAL,COMMENT,DEFINE,HASH><<EOF>> {
if (YY_START == COMMENT)
glcpp_error(yylloc, yyextra, "Unterminated comment");
- if (YY_START == DEFINE)
- glcpp_error(yylloc, yyextra, "#define without macro name");
BEGIN DONE; /* Don't keep matching this rule forever. */
yyextra->lexing_directive = 0;
if (! parser->last_token_was_newline)
RETURN_TOKEN (NEWLINE);
}
+ /* This is a catch-all to avoid the annoying default flex action which
+ * matches any character and prints it. If any input ever matches this
+ * rule, then we have made a mistake above and need to fix one or more
+ * of the preceding patterns to match that input. */
+
+<*>. {
+ glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext);
+
/* We don't actually use the UNREACHABLE start condition. We
- only have this action here so that we can pretend to call some
+ only have this block here so that we can pretend to call some
generated functions, (to avoid "defined but not used"
warnings. */
-<UNREACHABLE>. {
- unput('.');
- yy_top_state(yyextra);
+ if (YY_START == UNREACHABLE) {
+ unput('.');
+ yy_top_state(yyextra);
+ }
}
%%