X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fglsl%2Fglcpp%2Fglcpp-lex.l;h=fa9aa50691280df8e8324b58abb44ea12c19aaa7;hb=a4ba476c30ebcb99694c6167ac9b8af9414cb656;hp=5f0bb324990cdaa2eefcfeec3ec8dde9da6f6d24;hpb=21dda50549c5f220eff7ec04a72fb02e5eb09e76;p=mesa.git diff --git a/src/glsl/glcpp/glcpp-lex.l b/src/glsl/glcpp/glcpp-lex.l index 5f0bb324990..fa9aa506912 100644 --- a/src/glsl/glcpp/glcpp-lex.l +++ b/src/glsl/glcpp/glcpp-lex.l @@ -52,14 +52,107 @@ void glcpp_set_column (int column_no , yyscan_t yyscanner); yylloc->last_column = yycolumn + 1; \ parser->has_new_line_number = 0; \ parser->has_new_source_number = 0; \ - } while(0); + } while(0); #define YY_USER_INIT \ do { \ yylineno = 1; \ - yycolumn = 1; \ + yycolumn = 0; \ yylloc->source = 0; \ } while(0) + +/* It's ugly to have macros that have return statements inside of + * them, but flex-based lexer generation is all built around the + * return statement. + * + * To mitigate the ugliness, we defer as much of the logic as possible + * to an actual function, not a macro (see + * glcpplex_update_state_per_token) and we make the word RETURN + * prominent in all of the macros which may return. + * + * The most-commonly-used macro is RETURN_TOKEN which will perform all + * necessary state updates based on the provided token,, then + * conditionally return the token. It will not return a token if the + * parser is currently skipping tokens, (such as within #if + * 0...#else). + * + * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that + * makes the token returning unconditional. This is needed for things + * like #if and the tokens of its condition, (since these must be + * evaluated by the parser even when otherwise skipping). + * + * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top + * of RETURN_TOKEN that performs a string copy of yytext before the + * return. + */ +#define RETURN_TOKEN_NEVER_SKIP(token) \ + do { \ + if (glcpp_lex_update_state_per_token (parser, token)) \ + return token; \ + } while (0) + +#define RETURN_TOKEN(token) \ + do { \ + if (! parser->skipping) { \ + RETURN_TOKEN_NEVER_SKIP(token); \ + } \ + } while(0) + +#define RETURN_STRING_TOKEN(token) \ + do { \ + if (! parser->skipping) { \ + yylval->str = ralloc_strdup (yyextra, yytext); \ + RETURN_TOKEN_NEVER_SKIP (token); \ + } \ + } while(0) + + +/* Update all state necessary for each token being returned. + * + * Here we'll be tracking newlines and spaces so that the lexer can + * alter its behavior as necessary, (for example, '#' has special + * significance if it is the first non-whitespace, non-comment token + * in a line, but does not otherwise). + * + * NOTE: If this function returns FALSE, then no token should be + * returned at all. This is used to suprress duplicate SPACE tokens. + */ +static int +glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token) +{ + /* After the first non-space token in a line, we won't + * allow any '#' to introduce a directive. */ + if (token == NEWLINE) { + parser->first_non_space_token_this_line = 1; + } else if (token != SPACE) { + parser->first_non_space_token_this_line = 0; + } + + /* Track newlines just to know whether a newline needs + * to be inserted if end-of-file comes early. */ + if (token == NEWLINE) { + parser->last_token_was_newline = 1; + } else { + parser->last_token_was_newline = 0; + } + + /* Track spaces to avoid emitting multiple SPACE + * tokens in a row. */ + if (token == SPACE) { + if (! parser->last_token_was_space) { + parser->last_token_was_space = 1; + return 1; + } else { + parser->last_token_was_space = 1; + return 0; + } + } else { + parser->last_token_was_space = 0; + return 1; + } +} + + %} %option bison-bridge bison-locations reentrant noyywrap @@ -67,14 +160,19 @@ void glcpp_set_column (int column_no , yyscan_t yyscanner); %option prefix="glcpp_" %option stack %option never-interactive +%option warn nodefault -%x DONE COMMENT UNREACHABLE SKIP DEFINE NEWLINE_CATCHUP + /* Note: When adding any start conditions to this list, you must also + * update the "Internal compiler error" catch-all rule near the end of + * this file. */ + +%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE SPACE [[:space:]] NONSPACE [^[:space:]] -NEWLINE [\n] HSPACE [ \t] -HASH ^{HSPACE}*#{HSPACE}* +HASH # +NEWLINE (\r\n|\n\r|\r|\n) IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* PP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])* PUNCTUATION [][(){}.&*~!/%<>^|;,=+-] @@ -111,130 +209,159 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? parser->commented_newlines--; if (parser->commented_newlines == 0) BEGIN INITIAL; - return NEWLINE; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); } - /* The handling of the SKIP vs INITIAL start states requires - * some special handling. Typically, a lexer would change - * start states with statements like "BEGIN SKIP" within the - * lexer rules. We can't get away with that here, since we - * need the parser to actually evaluate expressions for - * directives like "#if". + /* Set up the parser->skipping bit here before doing any lexing. + * + * This bit controls whether tokens are skipped, (as implemented by + * RETURN_TOKEN), such as between "#if 0" and "#endif". * - * So, here, in code that will be executed on every call to - * the lexer,and before any rules, we examine the skip_stack - * as set by the parser to know whether to change from INITIAL - * to SKIP or from SKIP back to INITIAL. + * The parser maintains a skip_stack indicating whether we should be + * skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will + * push and pop items from the stack. * - * Three cases cause us to switch out of the SKIP state and - * back to the INITIAL state: + * Here are the rules for determining whether we are skipping: * - * 1. The top of the skip_stack is of type SKIP_NO_SKIP - * This means we're still evaluating some #if - * hierarchy, but we're on a branch of it where - * content should not be skipped (such as "#if 1" or - * "#else" or so). + * 1. If the skip stack is NULL, we are outside of all #if blocks + * and we are not skipping. * - * 2. The skip_stack is NULL meaning that we've reached - * the last #endif. + * 2. If the skip stack is non-NULL, the type of the top node in + * the stack determines whether to skip. A type of + * SKIP_NO_SKIP is used for blocks wheere we are emitting + * tokens, (such as between #if 1 and #endif, or after the + * #else of an #if 0, etc.). * - * 3. The lexing_directive bit is set. This indicates that we are - * lexing a pre-processor directive, (such as #if, #elif, or - * #else). For the #if and #elif directives we always need to - * parse the conditions, (even if otherwise within an #if - * 0). And for #else, we want to be able to generate an error - * if any garbage follows #else. + * 3. The lexing_directive bit overrides the skip stack. This bit + * is set when we are actively lexing the expression for a + * pre-processor condition, (such as #if, #elif, or #else). In + * this case, even if otherwise skipping, we need to emit the + * tokens for this condition so that the parser can evaluate + * the expression. (For, #else, there's no expression, but we + * emit tokens so the parser can generate a nice error message + * if there are any tokens here). */ - if (YY_START == INITIAL || YY_START == SKIP) { - if (parser->lexing_directive || - parser->skip_stack == NULL || - parser->skip_stack->type == SKIP_NO_SKIP) - { - BEGIN INITIAL; - } else { - BEGIN SKIP; - } + if (parser->skip_stack && + parser->skip_stack->type != SKIP_NO_SKIP && + ! parser->lexing_directive) + { + parser->skipping = 1; + } else { + parser->skipping = 0; } /* Single-line comments */ -"//"[^\n]* { +"//"[^\r\n]* { } /* Multi-line comments */ -"/*" { yy_push_state(COMMENT, yyscanner); } -[^*\n]* -[^*\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; } -"*"+[^*/\n]* -"*"+[^*/\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; } +"/*" { yy_push_state(COMMENT, yyscanner); } +[^*\r\n]* +[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } +"*"+[^*/\r\n]* +"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } "*"+"/" { yy_pop_state(yyscanner); - if (yyextra->space_tokens) - return SPACE; + /* In the start condition, we don't want any SPACE token. */ + if (yyextra->space_tokens && YY_START != HASH) + RETURN_TOKEN (SPACE); } -{HASH}version{HSPACE}+ { - yylval->str = ralloc_strdup (yyextra, yytext); +{HASH} { + + /* If the '#' is the first non-whitespace, non-comment token on this + * line, then it introduces a directive, switch to the start + * condition. + * + * Otherwise, this is just punctuation, so return the HASH_TOKEN + * token. */ + if (parser->first_non_space_token_this_line) { + BEGIN HASH; + } + + RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN); +} + +version{HSPACE}+ { + BEGIN INITIAL; yyextra->space_tokens = 0; - return HASH_VERSION; + RETURN_STRING_TOKEN (VERSION_TOKEN); +} + + /* Swallow empty #pragma directives, (to avoid confusing the + * downstream compiler). + * + * Note: We use a simple regular expression for the lookahead + * here. Specifically, we cannot use the complete {NEWLINE} expression + * since it uses alternation and we've found that there's a flex bug + * where using alternation in the lookahead portion of a pattern + * triggers a buffer overrun. */ +pragma{HSPACE}*/[\r\n] { + BEGIN INITIAL; } /* glcpp doesn't handle #extension, #version, or #pragma directives. * Simply pass them through to the main compiler's lexer/parser. */ -{HASH}(extension|pragma)[^\n]* { - yylval->str = ralloc_strdup (yyextra, yytext); - yylineno++; - yycolumn = 0; - return OTHER; +(extension|pragma)[^\r\n]* { + BEGIN INITIAL; + RETURN_STRING_TOKEN (PRAGMA); } -{HASH}line{HSPACE}+ { - return HASH_LINE; +line{HSPACE}+ { + BEGIN INITIAL; + RETURN_TOKEN (LINE); } -{ -{HASH}ifdef { - yyextra->lexing_directive = 1; - yyextra->space_tokens = 0; - return HASH_IFDEF; +{NEWLINE} { + BEGIN INITIAL; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); } -{HASH}ifndef { + /* For the pre-processor directives, we return these tokens + * even when we are otherwise skipping. */ +ifdef { + BEGIN INITIAL; yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_IFNDEF; + RETURN_TOKEN_NEVER_SKIP (IFDEF); } -{HASH}if/[^_a-zA-Z0-9] { +ifndef { + BEGIN INITIAL; yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_IF; + RETURN_TOKEN_NEVER_SKIP (IFNDEF); } -{HASH}elif/[^_a-zA-Z0-9] { +if/[^_a-zA-Z0-9] { + BEGIN INITIAL; yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_ELIF; + RETURN_TOKEN_NEVER_SKIP (IF); } -{HASH}else { +elif/[^_a-zA-Z0-9] { + BEGIN INITIAL; + yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_ELSE; + RETURN_TOKEN_NEVER_SKIP (ELIF); } -{HASH}endif { +else { + BEGIN INITIAL; yyextra->space_tokens = 0; - return HASH_ENDIF; -} + RETURN_TOKEN_NEVER_SKIP (ELSE); } -[^\n] { +endif { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (ENDIF); } -{HASH}error.* { - char *p; - for (p = yytext; !isalpha(p[0]); p++); /* skip " # " */ - p += 5; /* skip "error" */ - glcpp_error(yylloc, yyextra, "#error%s", p); +error[^\r\n]* { + BEGIN INITIAL; + RETURN_STRING_TOKEN (ERROR_TOKEN); } /* After we see a "#define" we enter the start state @@ -255,24 +382,42 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? * * Anything else, (not an identifier, not a comment, * and not whitespace). This will generate an error. */ -{HASH}define{HSPACE}+ { +define{HSPACE}* { + if (! parser->skipping) { + BEGIN DEFINE; + yyextra->space_tokens = 0; + RETURN_TOKEN (DEFINE_TOKEN); + } +} + +undef { + BEGIN INITIAL; yyextra->space_tokens = 0; - yy_push_state(DEFINE, yyscanner); - return HASH_DEFINE; + RETURN_TOKEN (UNDEF); +} + +{HSPACE}+ { + /* Nothing to do here. Importantly, don't leave the + * start condition, since it's legal to have space between the + * '#' and the directive.. */ +} + + /* This will catch any non-directive garbage after a HASH */ +{NONSPACE} { + BEGIN INITIAL; + RETURN_TOKEN (GARBAGE); } /* An identifier immediately followed by '(' */ {IDENTIFIER}/"(" { - yy_pop_state(yyscanner); - yylval->str = ralloc_strdup (yyextra, yytext); - return FUNC_IDENTIFIER; + BEGIN INITIAL; + RETURN_STRING_TOKEN (FUNC_IDENTIFIER); } /* An identifier not immediately followed by '(' */ {IDENTIFIER} { - yy_pop_state(yyscanner); - yylval->str = ralloc_strdup (yyextra, yytext); - return OBJ_IDENTIFIER; + BEGIN INITIAL; + RETURN_STRING_TOKEN (OBJ_IDENTIFIER); } /* Whitespace */ @@ -284,7 +429,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? [/][^*]{NONSPACE}* { BEGIN INITIAL; glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); - return INTEGER_STRING; + RETURN_STRING_TOKEN (INTEGER_STRING); } /* A character that can't start an identifier, comment, or @@ -292,126 +437,135 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? [^_a-zA-Z/[:space:]]{NONSPACE}* { BEGIN INITIAL; glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); - return INTEGER_STRING; -} - -{HASH}undef { - yyextra->space_tokens = 0; - return HASH_UNDEF; -} - -{HASH} { - yyextra->space_tokens = 0; - return HASH; + RETURN_STRING_TOKEN (INTEGER_STRING); } {DECIMAL_INTEGER} { - yylval->str = ralloc_strdup (yyextra, yytext); - return INTEGER_STRING; + RETURN_STRING_TOKEN (INTEGER_STRING); } {OCTAL_INTEGER} { - yylval->str = ralloc_strdup (yyextra, yytext); - return INTEGER_STRING; + RETURN_STRING_TOKEN (INTEGER_STRING); } {HEXADECIMAL_INTEGER} { - yylval->str = ralloc_strdup (yyextra, yytext); - return INTEGER_STRING; + RETURN_STRING_TOKEN (INTEGER_STRING); } "<<" { - return LEFT_SHIFT; + RETURN_TOKEN (LEFT_SHIFT); } ">>" { - return RIGHT_SHIFT; + RETURN_TOKEN (RIGHT_SHIFT); } "<=" { - return LESS_OR_EQUAL; + RETURN_TOKEN (LESS_OR_EQUAL); } ">=" { - return GREATER_OR_EQUAL; + RETURN_TOKEN (GREATER_OR_EQUAL); } "==" { - return EQUAL; + RETURN_TOKEN (EQUAL); } "!=" { - return NOT_EQUAL; + RETURN_TOKEN (NOT_EQUAL); } "&&" { - return AND; + RETURN_TOKEN (AND); } "||" { - return OR; + RETURN_TOKEN (OR); +} + +"++" { + RETURN_TOKEN (PLUS_PLUS); +} + +"--" { + RETURN_TOKEN (MINUS_MINUS); } "##" { - if (parser->is_gles) - glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES"); - return PASTE; + if (! parser->skipping) { + if (parser->is_gles) + glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES"); + RETURN_TOKEN (PASTE); + } } "defined" { - return DEFINED; + RETURN_TOKEN (DEFINED); } {IDENTIFIER} { - yylval->str = ralloc_strdup (yyextra, yytext); - return IDENTIFIER; + RETURN_STRING_TOKEN (IDENTIFIER); } {PP_NUMBER} { - yylval->str = ralloc_strdup (yyextra, yytext); - return OTHER; + RETURN_STRING_TOKEN (OTHER); } {PUNCTUATION} { - return yytext[0]; + RETURN_TOKEN (yytext[0]); } {OTHER}+ { - yylval->str = ralloc_strdup (yyextra, yytext); - return OTHER; + RETURN_STRING_TOKEN (OTHER); } {HSPACE} { if (yyextra->space_tokens) { - return SPACE; + RETURN_TOKEN (SPACE); } } -\n { + /* We preserve all newlines, even between #if 0..#endif, so no + skipping.. */ +<*>{NEWLINE} { if (parser->commented_newlines) { BEGIN NEWLINE_CATCHUP; + } else { + BEGIN INITIAL; } yyextra->space_tokens = 1; yyextra->lexing_directive = 0; yylineno++; yycolumn = 0; - return NEWLINE; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); } - /* Handle missing newline at EOF. */ -<> { +<> { + if (YY_START == COMMENT) + glcpp_error(yylloc, yyextra, "Unterminated comment"); BEGIN DONE; /* Don't keep matching this rule forever. */ yyextra->lexing_directive = 0; - return NEWLINE; + if (! parser->last_token_was_newline) + RETURN_TOKEN (NEWLINE); } + /* This is a catch-all to avoid the annoying default flex action which + * matches any character and prints it. If any input ever matches this + * rule, then we have made a mistake above and need to fix one or more + * of the preceding patterns to match that input. */ + +<*>. { + glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext); + /* We don't actually use the UNREACHABLE start condition. We - only have this action here so that we can pretend to call some + only have this block here so that we can pretend to call some generated functions, (to avoid "defined but not used" warnings. */ -. { - unput('.'); - yy_top_state(yyextra); + if (YY_START == UNREACHABLE) { + unput('.'); + yy_top_state(yyextra); + } } %%