X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fglsl%2Fglcpp%2Fglcpp-lex.l;h=fa9aa50691280df8e8324b58abb44ea12c19aaa7;hb=a4ba476c30ebcb99694c6167ac9b8af9414cb656;hp=8eb84ed138a879a8d7df9c9ed6c392a426ac2f2e;hpb=1218430e1200a08cd64b6555d3fd1fd0274ad9e5;p=mesa.git diff --git a/src/glsl/glcpp/glcpp-lex.l b/src/glsl/glcpp/glcpp-lex.l index 8eb84ed138a..fa9aa506912 100644 --- a/src/glsl/glcpp/glcpp-lex.l +++ b/src/glsl/glcpp/glcpp-lex.l @@ -34,21 +34,125 @@ int glcpp_get_column (yyscan_t yyscanner); void glcpp_set_column (int column_no , yyscan_t yyscanner); +#ifdef _MSC_VER +#define YY_NO_UNISTD_H +#endif + #define YY_NO_INPUT -#define YY_USER_ACTION \ - do { \ - yylloc->first_column = yycolumn + 1; \ - yylloc->first_line = yylineno; \ - yycolumn += yyleng; \ - } while(0); +#define YY_USER_ACTION \ + do { \ + if (parser->has_new_line_number) \ + yylineno = parser->new_line_number; \ + if (parser->has_new_source_number) \ + yylloc->source = parser->new_source_number; \ + yylloc->first_column = yycolumn + 1; \ + yylloc->first_line = yylloc->last_line = yylineno; \ + yycolumn += yyleng; \ + yylloc->last_column = yycolumn + 1; \ + parser->has_new_line_number = 0; \ + parser->has_new_source_number = 0; \ + } while(0); #define YY_USER_INIT \ do { \ yylineno = 1; \ - yycolumn = 1; \ + yycolumn = 0; \ yylloc->source = 0; \ } while(0) + +/* It's ugly to have macros that have return statements inside of + * them, but flex-based lexer generation is all built around the + * return statement. + * + * To mitigate the ugliness, we defer as much of the logic as possible + * to an actual function, not a macro (see + * glcpplex_update_state_per_token) and we make the word RETURN + * prominent in all of the macros which may return. + * + * The most-commonly-used macro is RETURN_TOKEN which will perform all + * necessary state updates based on the provided token,, then + * conditionally return the token. It will not return a token if the + * parser is currently skipping tokens, (such as within #if + * 0...#else). + * + * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that + * makes the token returning unconditional. This is needed for things + * like #if and the tokens of its condition, (since these must be + * evaluated by the parser even when otherwise skipping). + * + * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top + * of RETURN_TOKEN that performs a string copy of yytext before the + * return. + */ +#define RETURN_TOKEN_NEVER_SKIP(token) \ + do { \ + if (glcpp_lex_update_state_per_token (parser, token)) \ + return token; \ + } while (0) + +#define RETURN_TOKEN(token) \ + do { \ + if (! parser->skipping) { \ + RETURN_TOKEN_NEVER_SKIP(token); \ + } \ + } while(0) + +#define RETURN_STRING_TOKEN(token) \ + do { \ + if (! parser->skipping) { \ + yylval->str = ralloc_strdup (yyextra, yytext); \ + RETURN_TOKEN_NEVER_SKIP (token); \ + } \ + } while(0) + + +/* Update all state necessary for each token being returned. + * + * Here we'll be tracking newlines and spaces so that the lexer can + * alter its behavior as necessary, (for example, '#' has special + * significance if it is the first non-whitespace, non-comment token + * in a line, but does not otherwise). + * + * NOTE: If this function returns FALSE, then no token should be + * returned at all. This is used to suprress duplicate SPACE tokens. + */ +static int +glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token) +{ + /* After the first non-space token in a line, we won't + * allow any '#' to introduce a directive. */ + if (token == NEWLINE) { + parser->first_non_space_token_this_line = 1; + } else if (token != SPACE) { + parser->first_non_space_token_this_line = 0; + } + + /* Track newlines just to know whether a newline needs + * to be inserted if end-of-file comes early. */ + if (token == NEWLINE) { + parser->last_token_was_newline = 1; + } else { + parser->last_token_was_newline = 0; + } + + /* Track spaces to avoid emitting multiple SPACE + * tokens in a row. */ + if (token == SPACE) { + if (! parser->last_token_was_space) { + parser->last_token_was_space = 1; + return 1; + } else { + parser->last_token_was_space = 1; + return 0; + } + } else { + parser->last_token_was_space = 0; + return 1; + } +} + + %} %option bison-bridge bison-locations reentrant noyywrap @@ -56,17 +160,31 @@ void glcpp_set_column (int column_no , yyscan_t yyscanner); %option prefix="glcpp_" %option stack %option never-interactive +%option warn nodefault + + /* Note: When adding any start conditions to this list, you must also + * update the "Internal compiler error" catch-all rule near the end of + * this file. */ -%x DONE COMMENT UNREACHABLE +%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE SPACE [[:space:]] NONSPACE [^[:space:]] -NEWLINE [\n] HSPACE [ \t] -HASH ^{HSPACE}*#{HSPACE}* +HASH # +NEWLINE (\r\n|\n\r|\r|\n) IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* +PP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])* PUNCTUATION [][(){}.&*~!/%<>^|;,=+-] -OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+ + +/* The OTHER class is simply a catch-all for things that the CPP +parser just doesn't care about. Since flex regular expressions that +match longer strings take priority over those matching shorter +strings, we have to be careful to avoid OTHER matching and hiding +something that CPP does care about. So we simply exclude all +characters that appear in any other expressions. */ + +OTHER [^][_#[:space:]#a-zA-Z0-9(){}.&*~!/%<>^|;,=+-] DIGITS [0-9][0-9]* DECIMAL_INTEGER [1-9][0-9]*[uU]? @@ -75,245 +193,379 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% + glcpp_parser_t *parser = yyextra; + + /* When we lex a multi-line comment, we replace it (as + * specified) with a single space. But if the comment spanned + * multiple lines, then subsequent parsing stages will not + * count correct line numbers. To avoid this problem we keep + * track of all newlines that were commented out by a + * multi-line comment, and we emit a NEWLINE token for each at + * the next legal opportunity, (which is when the lexer would + * be emitting a NEWLINE token anyway). + */ + if (YY_START == NEWLINE_CATCHUP) { + if (parser->commented_newlines) + parser->commented_newlines--; + if (parser->commented_newlines == 0) + BEGIN INITIAL; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); + } + + /* Set up the parser->skipping bit here before doing any lexing. + * + * This bit controls whether tokens are skipped, (as implemented by + * RETURN_TOKEN), such as between "#if 0" and "#endif". + * + * The parser maintains a skip_stack indicating whether we should be + * skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will + * push and pop items from the stack. + * + * Here are the rules for determining whether we are skipping: + * + * 1. If the skip stack is NULL, we are outside of all #if blocks + * and we are not skipping. + * + * 2. If the skip stack is non-NULL, the type of the top node in + * the stack determines whether to skip. A type of + * SKIP_NO_SKIP is used for blocks wheere we are emitting + * tokens, (such as between #if 1 and #endif, or after the + * #else of an #if 0, etc.). + * + * 3. The lexing_directive bit overrides the skip stack. This bit + * is set when we are actively lexing the expression for a + * pre-processor condition, (such as #if, #elif, or #else). In + * this case, even if otherwise skipping, we need to emit the + * tokens for this condition so that the parser can evaluate + * the expression. (For, #else, there's no expression, but we + * emit tokens so the parser can generate a nice error message + * if there are any tokens here). + */ + if (parser->skip_stack && + parser->skip_stack->type != SKIP_NO_SKIP && + ! parser->lexing_directive) + { + parser->skipping = 1; + } else { + parser->skipping = 0; + } + /* Single-line comments */ -"//"[^\n]* { +"//"[^\r\n]* { } /* Multi-line comments */ -"/*" { yy_push_state(COMMENT, yyscanner); } -[^*\n]* -[^*\n]*\n { yylineno++; yycolumn = 0; } -"*"+[^*/\n]* -"*"+[^*/\n]*\n { yylineno++; yycolumn = 0; } +"/*" { yy_push_state(COMMENT, yyscanner); } +[^*\r\n]* +[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } +"*"+[^*/\r\n]* +"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } "*"+"/" { yy_pop_state(yyscanner); - if (yyextra->space_tokens) - return SPACE; + /* In the start condition, we don't want any SPACE token. */ + if (yyextra->space_tokens && YY_START != HASH) + RETURN_TOKEN (SPACE); +} + +{HASH} { + + /* If the '#' is the first non-whitespace, non-comment token on this + * line, then it introduces a directive, switch to the start + * condition. + * + * Otherwise, this is just punctuation, so return the HASH_TOKEN + * token. */ + if (parser->first_non_space_token_this_line) { + BEGIN HASH; + } + + RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN); } -{HASH}version { - yylval->str = talloc_strdup (yyextra, yytext); +version{HSPACE}+ { + BEGIN INITIAL; yyextra->space_tokens = 0; - return HASH_VERSION; + RETURN_STRING_TOKEN (VERSION_TOKEN); +} + + /* Swallow empty #pragma directives, (to avoid confusing the + * downstream compiler). + * + * Note: We use a simple regular expression for the lookahead + * here. Specifically, we cannot use the complete {NEWLINE} expression + * since it uses alternation and we've found that there's a flex bug + * where using alternation in the lookahead portion of a pattern + * triggers a buffer overrun. */ +pragma{HSPACE}*/[\r\n] { + BEGIN INITIAL; } /* glcpp doesn't handle #extension, #version, or #pragma directives. * Simply pass them through to the main compiler's lexer/parser. */ -{HASH}(extension|pragma)[^\n]+ { - yylval->str = talloc_strdup (yyextra, yytext); - yylineno++; - yycolumn = 0; - return OTHER; +(extension|pragma)[^\r\n]* { + BEGIN INITIAL; + RETURN_STRING_TOKEN (PRAGMA); } -{HASH}line{HSPACE}+{DIGITS}{HSPACE}+{DIGITS}{HSPACE}*$ { - /* Eat characters until the first digit is - * encountered - */ - char *ptr = yytext; - while (!isdigit(*ptr)) - ptr++; - - /* Subtract one from the line number because - * yylineno is zero-based instead of - * one-based. - */ - yylineno = strtol(ptr, &ptr, 0) - 1; - yylloc->source = strtol(ptr, NULL, 0); +line{HSPACE}+ { + BEGIN INITIAL; + RETURN_TOKEN (LINE); } -{HASH}line{HSPACE}+{DIGITS}{HSPACE}*$ { - /* Eat characters until the first digit is - * encountered - */ - char *ptr = yytext; - while (!isdigit(*ptr)) - ptr++; - - /* Subtract one from the line number because - * yylineno is zero-based instead of - * one-based. - */ - yylineno = strtol(ptr, &ptr, 0) - 1; +{NEWLINE} { + BEGIN INITIAL; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); } -{HASH}ifdef/.*\n { - yyextra->lexing_if = 1; + /* For the pre-processor directives, we return these tokens + * even when we are otherwise skipping. */ +ifdef { + BEGIN INITIAL; + yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_IFDEF; + RETURN_TOKEN_NEVER_SKIP (IFDEF); } -{HASH}ifndef/.*\n { - yyextra->lexing_if = 1; +ifndef { + BEGIN INITIAL; + yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_IFNDEF; + RETURN_TOKEN_NEVER_SKIP (IFNDEF); } -{HASH}if/[^_a-zA-Z0-9].*\n { - yyextra->lexing_if = 1; +if/[^_a-zA-Z0-9] { + BEGIN INITIAL; + yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_IF; + RETURN_TOKEN_NEVER_SKIP (IF); } -{HASH}elif/.*\n { - yyextra->lexing_if = 1; +elif/[^_a-zA-Z0-9] { + BEGIN INITIAL; + yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_ELIF; + RETURN_TOKEN_NEVER_SKIP (ELIF); } -{HASH}else/.*\n { +else { + BEGIN INITIAL; yyextra->space_tokens = 0; - return HASH_ELSE; + RETURN_TOKEN_NEVER_SKIP (ELSE); } -{HASH}endif/.*\n { +endif { + BEGIN INITIAL; yyextra->space_tokens = 0; - return HASH_ENDIF; + RETURN_TOKEN_NEVER_SKIP (ENDIF); +} + +error[^\r\n]* { + BEGIN INITIAL; + RETURN_STRING_TOKEN (ERROR_TOKEN); } - /* When skipping (due to an #if 0 or similar) consume anything - * up to a newline. We do this with less priority than any - * #if-related directive (#if, #elif, #else, #endif), but with - * more priority than any other directive or token to avoid - * any side-effects from skipped content. + /* After we see a "#define" we enter the start state + * for the lexer. Within we are looking for the first + * identifier and specifically checking whether the identifier + * is followed by a '(' or not, (to lex either a + * FUNC_IDENTIFIER or an OBJ_IDENITIFIER token). * - * We use the lexing_if flag to avoid skipping any part of an - * if conditional expression. */ -[^\n]+/\n { - /* Since this rule always matches, YY_USER_ACTION gets called for it, - * wrongly incrementing yycolumn. We undo that effect here. */ - yycolumn -= yyleng; - if (yyextra->lexing_if || - yyextra->skip_stack == NULL || - yyextra->skip_stack->type == SKIP_NO_SKIP) - { - REJECT; + * While in the state we also need to explicitly + * handle a few other things that may appear before the + * identifier: + * + * * Comments, (handled above with the main support for + * comments). + * + * * Whitespace (simply ignored) + * + * * Anything else, (not an identifier, not a comment, + * and not whitespace). This will generate an error. + */ +define{HSPACE}* { + if (! parser->skipping) { + BEGIN DEFINE; + yyextra->space_tokens = 0; + RETURN_TOKEN (DEFINE_TOKEN); } } -{HASH}error.* { - char *p; - for (p = yytext; !isalpha(p[0]); p++); /* skip " # " */ - p += 5; /* skip "error" */ - glcpp_error(yylloc, yyextra, "#error%s", p); +undef { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_TOKEN (UNDEF); +} + +{HSPACE}+ { + /* Nothing to do here. Importantly, don't leave the + * start condition, since it's legal to have space between the + * '#' and the directive.. */ } -{HASH}define{HSPACE}+/{IDENTIFIER}"(" { - yyextra->space_tokens = 0; - return HASH_DEFINE_FUNC; + /* This will catch any non-directive garbage after a HASH */ +{NONSPACE} { + BEGIN INITIAL; + RETURN_TOKEN (GARBAGE); } -{HASH}define { - yyextra->space_tokens = 0; - return HASH_DEFINE_OBJ; + /* An identifier immediately followed by '(' */ +{IDENTIFIER}/"(" { + BEGIN INITIAL; + RETURN_STRING_TOKEN (FUNC_IDENTIFIER); } -{HASH}undef { - yyextra->space_tokens = 0; - return HASH_UNDEF; + /* An identifier not immediately followed by '(' */ +{IDENTIFIER} { + BEGIN INITIAL; + RETURN_STRING_TOKEN (OBJ_IDENTIFIER); } -{HASH} { - yyextra->space_tokens = 0; - return HASH; + /* Whitespace */ +{HSPACE}+ { + /* Just ignore it. Nothing to do here. */ +} + + /* '/' not followed by '*', so not a comment. This is an error. */ +[/][^*]{NONSPACE}* { + BEGIN INITIAL; + glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); + RETURN_STRING_TOKEN (INTEGER_STRING); +} + + /* A character that can't start an identifier, comment, or + * space. This is an error. */ +[^_a-zA-Z/[:space:]]{NONSPACE}* { + BEGIN INITIAL; + glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); + RETURN_STRING_TOKEN (INTEGER_STRING); } {DECIMAL_INTEGER} { - yylval->str = talloc_strdup (yyextra, yytext); - return INTEGER_STRING; + RETURN_STRING_TOKEN (INTEGER_STRING); } {OCTAL_INTEGER} { - yylval->str = talloc_strdup (yyextra, yytext); - return INTEGER_STRING; + RETURN_STRING_TOKEN (INTEGER_STRING); } {HEXADECIMAL_INTEGER} { - yylval->str = talloc_strdup (yyextra, yytext); - return INTEGER_STRING; + RETURN_STRING_TOKEN (INTEGER_STRING); } "<<" { - return LEFT_SHIFT; + RETURN_TOKEN (LEFT_SHIFT); } ">>" { - return RIGHT_SHIFT; + RETURN_TOKEN (RIGHT_SHIFT); } "<=" { - return LESS_OR_EQUAL; + RETURN_TOKEN (LESS_OR_EQUAL); } ">=" { - return GREATER_OR_EQUAL; + RETURN_TOKEN (GREATER_OR_EQUAL); } "==" { - return EQUAL; + RETURN_TOKEN (EQUAL); } "!=" { - return NOT_EQUAL; + RETURN_TOKEN (NOT_EQUAL); } "&&" { - return AND; + RETURN_TOKEN (AND); } "||" { - return OR; + RETURN_TOKEN (OR); +} + +"++" { + RETURN_TOKEN (PLUS_PLUS); +} + +"--" { + RETURN_TOKEN (MINUS_MINUS); } "##" { - return PASTE; + if (! parser->skipping) { + if (parser->is_gles) + glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES"); + RETURN_TOKEN (PASTE); + } } "defined" { - return DEFINED; + RETURN_TOKEN (DEFINED); } {IDENTIFIER} { - yylval->str = talloc_strdup (yyextra, yytext); - return IDENTIFIER; + RETURN_STRING_TOKEN (IDENTIFIER); +} + +{PP_NUMBER} { + RETURN_STRING_TOKEN (OTHER); } {PUNCTUATION} { - return yytext[0]; + RETURN_TOKEN (yytext[0]); } {OTHER}+ { - yylval->str = talloc_strdup (yyextra, yytext); - return OTHER; + RETURN_STRING_TOKEN (OTHER); } -{HSPACE}+ { +{HSPACE} { if (yyextra->space_tokens) { - return SPACE; + RETURN_TOKEN (SPACE); } } -\n { - yyextra->lexing_if = 0; + /* We preserve all newlines, even between #if 0..#endif, so no + skipping.. */ +<*>{NEWLINE} { + if (parser->commented_newlines) { + BEGIN NEWLINE_CATCHUP; + } else { + BEGIN INITIAL; + } + yyextra->space_tokens = 1; + yyextra->lexing_directive = 0; yylineno++; yycolumn = 0; - return NEWLINE; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); } - /* Handle missing newline at EOF. */ -<> { +<> { + if (YY_START == COMMENT) + glcpp_error(yylloc, yyextra, "Unterminated comment"); BEGIN DONE; /* Don't keep matching this rule forever. */ - yyextra->lexing_if = 0; - return NEWLINE; + yyextra->lexing_directive = 0; + if (! parser->last_token_was_newline) + RETURN_TOKEN (NEWLINE); } + /* This is a catch-all to avoid the annoying default flex action which + * matches any character and prints it. If any input ever matches this + * rule, then we have made a mistake above and need to fix one or more + * of the preceding patterns to match that input. */ + +<*>. { + glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext); + /* We don't actually use the UNREACHABLE start condition. We - only have this action here so that we can pretend to call some + only have this block here so that we can pretend to call some generated functions, (to avoid "defined but not used" warnings. */ -. { - unput('.'); - yy_top_state(yyextra); + if (YY_START == UNREACHABLE) { + unput('.'); + yy_top_state(yyextra); + } } %%