glsl/glcpp: Swallow empty #pragma directives.

[mesa.git] / src / glsl / glcpp / glcpp-lex.l
diff --git a/src/glsl/glcpp/glcpp-lex.l b/src/glsl/glcpp/glcpp-lex.l

index f13b3dacb16166dd43a6de4338470d9f97bb1d9a..aeaf8abdf8a8851a96f9c2410cec0e090de54d1f 100644 (file)
--- a/src/glsl/glcpp/glcpp-lex.l
+++ b/src/glsl/glcpp/glcpp-lex.l
@@ -52,7 +52,7 @@ void glcpp_set_column (int  column_no , yyscan_t yyscanner);
                 yylloc->last_column = yycolumn + 1;                     \
                 parser->has_new_line_number = 0;                        \
                 parser->has_new_source_number = 0;                      \
- } while(0);
+       } while(0);
  
  #define YY_USER_INIT                   \
         do {                            \
@@ -85,13 +85,10 @@ void glcpp_set_column (int  column_no , yyscan_t yyscanner);
   * of RETURN_TOKEN that performs a string copy of yytext before the
   * return.
   */
-#define RETURN_TOKEN_NEVER_SKIP(token)                         \
-       do {                                                    \
-               if (token == NEWLINE)                           \
-                       parser->last_token_was_newline = 1;     \
-               else                                            \
-                       parser->last_token_was_newline = 0;     \
-               return (token);                                 \
+#define RETURN_TOKEN_NEVER_SKIP(token)                                 \
+       do {                                                            \
+               if (glcpp_lex_update_state_per_token (parser, token))   \
+                       return token;                                   \
         } while (0)
  
  #define RETURN_TOKEN(token)                                            \
@@ -109,6 +106,53 @@ void glcpp_set_column (int  column_no , yyscan_t yyscanner);
                 }                                                       \
         } while(0)
  
+
+/* Update all state necessary for each token being returned.
+ *
+ * Here we'll be tracking newlines and spaces so that the lexer can
+ * alter its behavior as necessary, (for example, '#' has special
+ * significance if it is the first non-whitespace, non-comment token
+ * in a line, but does not otherwise).
+ *
+ * NOTE: If this function returns FALSE, then no token should be
+ * returned at all. This is used to suprress duplicate SPACE tokens.
+ */
+static int
+glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token)
+{
+       /* After the first non-space token in a line, we won't
+        * allow any '#' to introduce a directive. */
+       if (token == NEWLINE) {
+               parser->first_non_space_token_this_line = 1;
+       } else if (token != SPACE) {
+               parser->first_non_space_token_this_line = 0;
+       }
+
+       /* Track newlines just to know whether a newline needs
+        * to be inserted if end-of-file comes early. */
+       if (token == NEWLINE) {
+               parser->last_token_was_newline = 1;
+       } else {
+               parser->last_token_was_newline = 0;
+       }
+
+       /* Track spaces to avoid emitting multiple SPACE
+        * tokens in a row. */
+       if (token == SPACE) {
+               if (! parser->last_token_was_space) {
+                       parser->last_token_was_space = 1;
+                       return 1;
+               } else {
+                       parser->last_token_was_space = 1;
+                       return 0;
+               }
+       } else {
+               parser->last_token_was_space = 0;
+               return 1;
+       }
+}
+
+
  %}
  
  %option bison-bridge bison-locations reentrant noyywrap
@@ -116,14 +160,18 @@ void glcpp_set_column (int  column_no , yyscan_t yyscanner);
  %option prefix="glcpp_"
  %option stack
  %option never-interactive
+%option warn nodefault
  
-%x DONE COMMENT UNREACHABLE DEFINE NEWLINE_CATCHUP
+       /* Note: When adding any start conditions to this list, you must also
+        * update the "Internal compiler error" catch-all rule near the end of
+        * this file. */
+
+%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE
  
  SPACE          [[:space:]]
  NONSPACE       [^[:space:]]
-NEWLINE                [\n]
  HSPACE         [ \t]
-HASH           ^{HSPACE}*#{HSPACE}*
+HASH           #
  IDENTIFIER     [_a-zA-Z][_a-zA-Z0-9]*
  PP_NUMBER      [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])*
  PUNCTUATION    [][(){}.&*~!/%<>^|;,=+-]
@@ -160,7 +208,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]?
                         parser->commented_newlines--;
                 if (parser->commented_newlines == 0)
                         BEGIN INITIAL;
-               RETURN_TOKEN (NEWLINE);
+               RETURN_TOKEN_NEVER_SKIP (NEWLINE);
         }
  
         /* Set up the parser->skipping bit here before doing any lexing.
@@ -202,81 +250,111 @@ HEXADECIMAL_INTEGER      0[xX][0-9a-fA-F]+[uU]?
         }
  
         /* Single-line comments */
-"//"[^\n]* {
+<INITIAL,DEFINE,HASH>"//"[^\r\n]* {
  }
  
         /* Multi-line comments */
-<DEFINE,INITIAL>"/*"                    { yy_push_state(COMMENT, yyscanner); }
-<COMMENT>[^*\n]*
-<COMMENT>[^*\n]*\n      { yylineno++; yycolumn = 0; parser->commented_newlines++; }
-<COMMENT>"*"+[^*/\n]*
-<COMMENT>"*"+[^*/\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; }
+<INITIAL,DEFINE,HASH>"/*"   { yy_push_state(COMMENT, yyscanner); }
+<COMMENT>[^*\r\n]*
+<COMMENT>[^*\r\n]*[\r\n]    { yylineno++; yycolumn = 0; parser->commented_newlines++; }
+<COMMENT>"*"+[^*/\r\n]*
+<COMMENT>"*"+[^*/\r\n]*[\r\n] { yylineno++; yycolumn = 0; parser->commented_newlines++; }
  <COMMENT>"*"+"/"        {
         yy_pop_state(yyscanner);
-       if (yyextra->space_tokens)
+       /* In the <HASH> start condition, we don't want any SPACE token. */
+       if (yyextra->space_tokens && YY_START != HASH)
                 RETURN_TOKEN (SPACE);
  }
  
-{HASH}version{HSPACE}+ {
+{HASH} {
+
+       /* If the '#' is the first non-whitespace, non-comment token on this
+        * line, then it introduces a directive, switch to the <HASH> start
+        * condition.
+        *
+        * Otherwise, this is just punctuation, so return the HASH_TOKEN
+         * token. */
+       if (parser->first_non_space_token_this_line) {
+               BEGIN HASH;
+       }
+
+       RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN);
+}
+
+<HASH>version{HSPACE}+ {
+       BEGIN INITIAL;
         yyextra->space_tokens = 0;
-       RETURN_STRING_TOKEN (HASH_VERSION);
+       RETURN_STRING_TOKEN (VERSION_TOKEN);
+}
+
+       /* Swallow empty #pragma directives, (to avoid confusing the
+        * downstream compiler). */
+<HASH>pragma{HSPACE}*/[\r\n] {
+       BEGIN INITIAL;
  }
  
         /* glcpp doesn't handle #extension, #version, or #pragma directives.
          * Simply pass them through to the main compiler's lexer/parser. */
-{HASH}(extension|pragma)[^\n]* {
-       yylineno++;
-       yycolumn = 0;
-       RETURN_STRING_TOKEN (OTHER);
+<HASH>(extension|pragma)[^\r\n]* {
+       BEGIN INITIAL;
+       RETURN_STRING_TOKEN (PRAGMA);
  }
  
-{HASH}line{HSPACE}+ {
-       RETURN_TOKEN (HASH_LINE);
+<HASH>line{HSPACE}+ {
+       BEGIN INITIAL;
+       RETURN_TOKEN (LINE);
+}
+
+<HASH>\n {
+       BEGIN INITIAL;
+       RETURN_TOKEN_NEVER_SKIP (NEWLINE);
  }
  
         /* For the pre-processor directives, we return these tokens
          * even when we are otherwise skipping. */
-{HASH}ifdef {
+<HASH>ifdef {
+       BEGIN INITIAL;
         yyextra->lexing_directive = 1;
         yyextra->space_tokens = 0;
-       RETURN_TOKEN_NEVER_SKIP (HASH_IFDEF);
+       RETURN_TOKEN_NEVER_SKIP (IFDEF);
  }
  
-{HASH}ifndef {
+<HASH>ifndef {
+       BEGIN INITIAL;
         yyextra->lexing_directive = 1;
         yyextra->space_tokens = 0;
-       RETURN_TOKEN_NEVER_SKIP (HASH_IFNDEF);
+       RETURN_TOKEN_NEVER_SKIP (IFNDEF);
  }
  
-{HASH}if/[^_a-zA-Z0-9] {
+<HASH>if/[^_a-zA-Z0-9] {
+       BEGIN INITIAL;
         yyextra->lexing_directive = 1;
         yyextra->space_tokens = 0;
-       RETURN_TOKEN_NEVER_SKIP (HASH_IF);
+       RETURN_TOKEN_NEVER_SKIP (IF);
  }
  
-{HASH}elif/[^_a-zA-Z0-9] {
+<HASH>elif/[^_a-zA-Z0-9] {
+       BEGIN INITIAL;
         yyextra->lexing_directive = 1;
         yyextra->space_tokens = 0;
-       RETURN_TOKEN_NEVER_SKIP (HASH_ELIF);
+       RETURN_TOKEN_NEVER_SKIP (ELIF);
  }
  
-{HASH}else {
+<HASH>else {
+       BEGIN INITIAL;
         yyextra->space_tokens = 0;
-       RETURN_TOKEN_NEVER_SKIP (HASH_ELSE);
+       RETURN_TOKEN_NEVER_SKIP (ELSE);
  }
  
-{HASH}endif {
+<HASH>endif {
+       BEGIN INITIAL;
         yyextra->space_tokens = 0;
-       RETURN_TOKEN_NEVER_SKIP (HASH_ENDIF);
+       RETURN_TOKEN_NEVER_SKIP (ENDIF);
  }
  
-{HASH}error.* {
-       if (! parser->skipping) {
-               char *p;
-               for (p = yytext; !isalpha(p[0]); p++); /* skip "  #   " */
-               p += 5; /* skip "error" */
-               glcpp_error(yylloc, yyextra, "#error%s", p);
-       }
+<HASH>error.* {
+       BEGIN INITIAL;
+       RETURN_STRING_TOKEN (ERROR_TOKEN);
  }
  
         /* After we see a "#define" we enter the <DEFINE> start state
@@ -297,14 +375,32 @@ HEXADECIMAL_INTEGER       0[xX][0-9a-fA-F]+[uU]?
          *      * Anything else, (not an identifier, not a comment,
          *        and not whitespace). This will generate an error.
          */
-{HASH}define{HSPACE}+ {
+<HASH>define{HSPACE}* {
         if (! parser->skipping) {
                 BEGIN DEFINE;
                 yyextra->space_tokens = 0;
-               RETURN_TOKEN (HASH_DEFINE);
+               RETURN_TOKEN (DEFINE_TOKEN);
         }
  }
  
+<HASH>undef {
+       BEGIN INITIAL;
+       yyextra->space_tokens = 0;
+       RETURN_TOKEN (UNDEF);
+}
+
+<HASH>{HSPACE}+ {
+       /* Nothing to do here. Importantly, don't leave the <HASH>
+        * start condition, since it's legal to have space between the
+        * '#' and the directive.. */
+}
+
+       /* This will catch any non-directive garbage after a HASH */
+<HASH>{NONSPACE} {
+       BEGIN INITIAL;
+       RETURN_TOKEN (GARBAGE);
+}
+
         /* An identifier immediately followed by '(' */
  <DEFINE>{IDENTIFIER}/"(" {
         BEGIN INITIAL;
@@ -337,16 +433,6 @@ HEXADECIMAL_INTEGER        0[xX][0-9a-fA-F]+[uU]?
         RETURN_STRING_TOKEN (INTEGER_STRING);
  }
  
-{HASH}undef {
-       yyextra->space_tokens = 0;
-       RETURN_TOKEN (HASH_UNDEF);
-}
-
-{HASH} {
-       yyextra->space_tokens = 0;
-       RETURN_TOKEN (HASH);
-}
-
  {DECIMAL_INTEGER} {
         RETURN_STRING_TOKEN (INTEGER_STRING);
  }
@@ -391,6 +477,14 @@ HEXADECIMAL_INTEGER        0[xX][0-9a-fA-F]+[uU]?
         RETURN_TOKEN (OR);
  }
  
+"++" {
+       RETURN_TOKEN (PLUS_PLUS);
+}
+
+"--" {
+       RETURN_TOKEN (MINUS_MINUS);
+}
+
  "##" {
         if (! parser->skipping) {
                 if (parser->is_gles)
@@ -427,9 +521,11 @@ HEXADECIMAL_INTEGER        0[xX][0-9a-fA-F]+[uU]?
  
         /* We preserve all newlines, even between #if 0..#endif, so no
         skipping.. */
-\n {
+<*>[\r\n] {
         if (parser->commented_newlines) {
                 BEGIN NEWLINE_CATCHUP;
+       } else {
+               BEGIN INITIAL;
         }
         yyextra->space_tokens = 1;
         yyextra->lexing_directive = 0;
@@ -438,24 +534,31 @@ HEXADECIMAL_INTEGER       0[xX][0-9a-fA-F]+[uU]?
         RETURN_TOKEN_NEVER_SKIP (NEWLINE);
  }
  
-<INITIAL,COMMENT,DEFINE><<EOF>> {
+<INITIAL,COMMENT,DEFINE,HASH><<EOF>> {
         if (YY_START == COMMENT)
                 glcpp_error(yylloc, yyextra, "Unterminated comment");
-       if (YY_START == DEFINE)
-               glcpp_error(yylloc, yyextra, "#define without macro name");
         BEGIN DONE; /* Don't keep matching this rule forever. */
         yyextra->lexing_directive = 0;
         if (! parser->last_token_was_newline)
                 RETURN_TOKEN (NEWLINE);
  }
  
+       /* This is a catch-all to avoid the annoying default flex action which
+        * matches any character and prints it. If any input ever matches this
+        * rule, then we have made a mistake above and need to fix one or more
+        * of the preceding patterns to match that input. */
+
+<*>. {
+       glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext);
+
         /* We don't actually use the UNREACHABLE start condition. We
-       only have this action here so that we can pretend to call some
+       only have this block here so that we can pretend to call some
         generated functions, (to avoid "defined but not used"
         warnings. */
-<UNREACHABLE>. {
-       unput('.');
-       yy_top_state(yyextra);
+        if (YY_START == UNREACHABLE) {
+               unput('.');
+               yy_top_state(yyextra);
+       }
  }
  
  %%