src/mesa/shader/grammar.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.2
   4  *
   5  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file grammar.c
  27  * syntax parsing engine
  28  * \author Michal Krol
  29  */
  30
  31 #ifndef GRAMMAR_PORT_BUILD
  32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
  33 #endif
  34
  35 /*
  36     $Id: grammar.c,v 1.10 2004/12/08 14:00:46 alanh Exp $
  37 */
  38
  39 /*
  40     INTRODUCTION
  41     ------------
  42
  43     The task is to check the syntax of an input string. Input string is a stream of ASCII
  44     characters terminated with a null-character ('\0'). Checking it using C language is
  45     difficult and hard to implement without bugs. It is hard to maintain and make changes when
  46     the syntax changes.
  47
  48     This is because of a high redundancy of the C code. Large blocks of code are duplicated with
  49     only small changes. Even use of macros does not solve the problem because macros cannot
  50     erase the complexity of the problem.
  51
  52     The resolution is to create a new language that will be highly oriented to our task. Once
  53     we describe a particular syntax, we are done. We can then focus on the code that implements
  54     the language. The size and complexity of it is relatively small than the code that directly
  55     checks the syntax.
  56
  57     First, we must implement our new language. Here, the language is implemented in C, but it
  58     could also be implemented in any other language. The code is listed below. We must take
  59     a good care that it is bug free. This is simple because the code is simple and clean.
  60
  61     Next, we must describe the syntax of our new language in itself. Once created and checked
  62     manually that it is correct, we can use it to check another scripts.
  63
  64     Note that our new language loading code does not have to check the syntax. It is because we
  65     assume that the script describing itself is correct, and other scripts can be syntactically
  66     checked by the former script. The loading code must only do semantic checking which leads us to
  67     simple resolving references.
  68
  69     THE LANGUAGE
  70     ------------
  71
  72     Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
  73     sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
  74     which is an identifier, and its definition. A definition is in turn a sequence of specifiers
  75     connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
  76     definition. Specifier can be a symbol, string, character, character range or a special
  77     keyword ".true" or ".false".
  78
  79     On the very beginning of the script there is a declaration of a root symbol and is in the form:
  80         .syntax <root_symbol>;
  81     The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
  82     the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
  83     the symbol evaluates to true. Definition evaluation depends on the operator used to connect
  84     specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
  85     only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
  86     true if any of the specifiers evaluates to true. If definition contains only one specifier,
  87     it is evaluated as if it was connected with ".true" keyword by ".and" operator.
  88
  89     If specifier is a ".true" keyword, it always evaluates to true.
  90
  91     If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
  92     when it does not evaluate to true.
  93
  94     Character range specifier is in the form:
  95         '<first_character>' - '<second_character>'
  96     If specifier is a character range, it evaluates to true if character in the stream is greater
  97     or equal to <first_character> and less or equal to <second_character>. In that situation
  98     the stream pointer is advanced to point to next character in the stream. All C-style escape
  99     sequences are supported although trigraph sequences are not. The comparisions are performed
 100     on 8-bit unsigned integers.
 101
 102     Character specifier is in the form:
 103         '<single_character>'
 104     It evaluates to true if the following character range specifier evaluates to true:
 105         '<single_character>' - '<single_character>'
 106
 107     String specifier is in the form:
 108         "<string>"
 109     Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
 110     <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
 111     the following character specifier evaluates to true:
 112         '<string>[i]'
 113     If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
 114
 115     Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
 116         .loop <symbol>                  (1)
 117     where <symbol> is defined as follows:
 118         <symbol> <definition>;          (2)
 119     Construction (1) is replaced by the following code:
 120         <symbol$1>
 121     and declaration (2) is replaced by the following:
 122         <symbol$1> <symbol$2> .or .true;
 123         <symbol$2> <symbol> .and <symbol$1>;
 124         <symbol> <definition>;
 125
 126     Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
 127     registers that can be accessed in the syn body. Each reg has its name and a default value.
 128     The register is one byte wide. The C code can change the default value by calling
 129     grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
 130     a sequence of specifiers joined with .and or .or operator. And now each specifier can be
 131     prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
 132     where <operator> can be == or !=. If the condition evaluates to false, the specifier
 133     evaluates to .false. Otherwise it evalutes to the specifier.
 134
 135     ESCAPE SEQUENCES
 136     ----------------
 137
 138     Synek supports all escape sequences in character specifiers. The mapping table is listed below.
 139     All occurences of the characters in the first column are replaced with the corresponding
 140     character in the second column.
 141
 142         Escape sequence         Represents
 143     ------------------------------------------------------------------------------------------------
 144         \a                      Bell (alert)
 145         \b                      Backspace
 146         \f                      Formfeed
 147         \n                      New line
 148         \r                      Carriage return
 149         \t                      Horizontal tab
 150         \v                      Vertical tab
 151         \'                      Single quotation mark
 152         \"                      Double quotation mark
 153         \\                      Backslash
 154         \?                      Literal question mark
 155         \ooo                    ASCII character in octal notation
 156         \xhhh                   ASCII character in hexadecimal notation
 157     ------------------------------------------------------------------------------------------------
 158
 159     RAISING ERRORS
 160     --------------
 161
 162     Any specifier can be followed by a special construction that is executed when the specifier
 163     evaluates to false. The construction is in the form:
 164         .error <ERROR_TEXT>
 165     <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
 166     in the form:
 167         .errtext <ERROR_TEXT> "<error_desc>"
 168     When specifier evaluates to false and this construction is present, parsing is stopped
 169     immediately and <error_desc> is returned as a result of parsing. The error position is also
 170     returned and it is meant as an offset from the beggining of the stream to the character that
 171     was valid so far. Example:
 172
 173         (**** syntax script ****)
 174
 175         .syntax program;
 176         .errtext MISSING_SEMICOLON      "missing ';'"
 177         program         declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
 178                         .loop space .and '\0';
 179         declaration     "declare" .and .loop space .and identifier;
 180         space           ' ';
 181
 182         (**** sample code ****)
 183
 184         declare foo ,
 185
 186     In the example above checking the sample code will result in error message "missing ';'" and
 187     error position 12. The sample code is not correct. Note the presence of '\0' specifier to
 188     assure that there is no code after semicolon - only spaces.
 189     <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
 190     the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
 191     the identifier name. The starting position is the error position. The lenght of the resulting
 192     string is the position after invoking the symbol.
 193
 194     PRODUCTION
 195     ----------
 196
 197     Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
 198     that evaluate to true. That is, every specifier and optional error construction can be followed
 199     by a number of emit constructions that are in the form:
 200         .emit <parameter>
 201     <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
 202     0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
 203     in the form:
 204         .emtcode <identifier> <hex_number>
 205
 206     When given specifier evaluates to true, all emits associated with the specifier are output
 207     in order they were declared. A star means that last-read character should be output instead
 208     of constant value. Example:
 209
 210         (**** syntax script ****)
 211
 212         .syntax foobar;
 213         .emtcode WORD_FOO       0x01
 214         .emtcode WORD_BAR       0x02
 215         foobar      FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
 216         FOO         "foo" .and SPACE;
 217         BAR         "bar" .and SPACE;
 218         SPACE       ' ' .or '\0';
 219
 220         (**** sample text 1 ****)
 221
 222         foo
 223
 224         (**** sample text 2 ****)
 225
 226         foobar
 227
 228     For both samples the result will be one-element array. For first sample text it will be
 229     value 1, for second - 0. Note that every text will be accepted because of presence of
 230     .true as an alternative.
 231
 232     Another example:
 233
 234         (**** syntax script ****)
 235
 236         .syntax declaration;
 237         .emtcode VARIABLE       0x01
 238         declaration     "declare" .and .loop space .and
 239                         identifier .emit VARIABLE .and          (1)
 240                         .true .emit 0x00 .and                   (2)
 241                         .loop space .and ';';
 242         space           ' ' .or '\t';
 243         identifier      .loop id_char .emit *;                  (3)
 244         id_char         'a'-'z' .or 'A'-'Z' .or '_';
 245
 246         (**** sample code ****)
 247
 248         declare    fubar;
 249
 250     In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
 251     true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
 252     to terminate the string with null to signal when the string ends. Specifier (3) outputs
 253     all characters that make declared identifier. The result of sample code will be the
 254     following array:
 255         { 1, 'f', 'u', 'b', 'a', 'r', 0 }
 256
 257     If .emit is followed by dollar $, it means that current position should be output. Current
 258     position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
 259     first character consumed by the specifier associated with the .emit instruction. Current
 260     position is stored in the output buffer in Little-Endian convention (the lowest byte comes
 261     first).
 262 */
 263
 264 static void mem_free (void **);
 265
 266 /*
 267     internal error messages
 268 */
 269 static const byte *OUT_OF_MEMORY =          (byte *) "internal error 1001: out of physical memory";
 270 static const byte *UNRESOLVED_REFERENCE =   (byte *) "internal error 1002: unresolved reference '$'";
 271 static const byte *INVALID_GRAMMAR_ID =     (byte *) "internal error 1003: invalid grammar object";
 272 static const byte *INVALID_REGISTER_NAME =  (byte *) "internal error 1004: invalid register name: '$'";
 273 static const byte *DUPLICATE_IDENTIFIER =   (byte *) "internal error 1005: identifier '$' already defined";
 274 static const byte *UNREFERENCED_IDENTIFIER =(byte *) "internal error 1006: unreferenced identifier '$'";
 275
 276 static const byte *error_message = NULL;    /* points to one of the error messages above */
 277 static byte *error_param = NULL;        /* this is inserted into error_message in place of $ */
 278 static int error_position = -1;
 279
 280 static byte *unknown = (byte *) "???";
 281
 282 static void clear_last_error (void)
 283 {
 284     /* reset error message */
 285     error_message = NULL;
 286
 287     /* free error parameter - if error_param is a "???" don't free it - it's static */
 288     if (error_param != unknown)
 289         mem_free ((void **) (void *) &error_param);
 290     else
 291         error_param = NULL;
 292
 293     /* reset error position */
 294     error_position = -1;
 295 }
 296
 297 static void set_last_error (const byte *msg, byte *param, int pos)
 298 {
 299     /* error message can be set only once */
 300     if (error_message != NULL)
 301     {
 302         mem_free ((void **) (void *) &param);
 303         return;
 304     }
 305
 306     error_message = msg;
 307
 308     /* if param is NULL, set error_param to unknown ("???") */
 309     /* note: do not try to strdup the "???" - it may be that we are here because of */
 310     /* out of memory error so strdup can fail */
 311     if (param != NULL)
 312         error_param = param;
 313     else
 314         error_param = unknown;
 315
 316     error_position = pos;
 317 }
 318
 319 /*
 320     memory management routines
 321 */
 322 static void *mem_alloc (size_t size)
 323 {
 324     void *ptr = grammar_alloc_malloc (size);
 325     if (ptr == NULL)
 326         set_last_error (OUT_OF_MEMORY, NULL, -1);
 327     return ptr;
 328 }
 329
 330 static void *mem_copy (void *dst, const void *src, size_t size)
 331 {
 332     return grammar_memory_copy (dst, src, size);
 333 }
 334
 335 static void mem_free (void **ptr)
 336 {
 337     grammar_alloc_free (*ptr);
 338     *ptr = NULL;
 339 }
 340
 341 static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
 342 {
 343     void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
 344     if (ptr2 == NULL)
 345         set_last_error (OUT_OF_MEMORY, NULL, -1);
 346     return ptr2;
 347 }
 348
 349 static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
 350 {
 351     return grammar_string_copy_n (dst, src, max_len);
 352 }
 353
 354 static byte *str_duplicate (const byte *str)
 355 {
 356     byte *new_str = grammar_string_duplicate (str);
 357     if (new_str == NULL)
 358         set_last_error (OUT_OF_MEMORY, NULL, -1);
 359     return new_str;
 360 }
 361
 362 static int str_equal (const byte *str1, const byte *str2)
 363 {
 364     return grammar_string_compare (str1, str2) == 0;
 365 }
 366
 367 static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
 368 {
 369     return grammar_string_compare_n (str1, str2, n) == 0;
 370 }
 371
 372 static unsigned int str_length (const byte *str)
 373 {
 374     return grammar_string_length (str);
 375 }
 376
 377 /*
 378     useful macros
 379 */
 380 #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
 381     static void _Ty##_append (_Ty **x, _Ty *nx) {\
 382         while (*x) x = &(**x).next;\
 383         *x = nx;\
 384     }
 385
 386 /*
 387     string to byte map typedef
 388 */
 389 typedef struct map_byte_
 390 {
 391     byte *key;
 392     byte data;
 393     struct map_byte_ *next;
 394 } map_byte;
 395
 396 static void map_byte_create (map_byte **ma)
 397 {
 398     *ma = (map_byte *) mem_alloc (sizeof (map_byte));
 399     if (*ma)
 400     {
 401         (**ma).key = NULL;
 402         (**ma).data = '\0';
 403         (**ma).next = NULL;
 404     }
 405 }
 406
 407 static void map_byte_destroy (map_byte **ma)
 408 {
 409     if (*ma)
 410     {
 411         map_byte_destroy (&(**ma).next);
 412         mem_free ((void **) &(**ma).key);
 413         mem_free ((void **) ma);
 414     }
 415 }
 416
 417 GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte)
 418
 419 /*
 420     searches the map for the specified key,
 421     returns pointer to the element with the specified key if it exists
 422     returns NULL otherwise
 423 */
 424 static map_byte *map_byte_locate (map_byte **ma, const byte *key)
 425 {
 426     while (*ma)
 427     {
 428         if (str_equal ((**ma).key, key))
 429             return *ma;
 430
 431         ma = &(**ma).next;
 432     }
 433
 434     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
 435     return NULL;
 436 }
 437
 438 /*
 439     searches the map for specified key,
 440     if the key is matched, *data is filled with data associated with the key,
 441     returns 0 if the key is matched,
 442     returns 1 otherwise
 443 */
 444 static int map_byte_find (map_byte **ma, const byte *key, byte *data)
 445 {
 446     map_byte *found = map_byte_locate (ma, key);
 447     if (found != NULL)
 448     {
 449         *data = found->data;
 450
 451         return 0;
 452     }
 453
 454     return 1;
 455 }
 456
 457 /*
 458     regbyte context typedef
 459
 460     Each regbyte consists of its name and a default value. These are static and created at
 461     grammar script compile-time, for example the following line:
 462         .regbyte vertex_blend      0x00
 463     adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
 464     When the script is executed, this regbyte can be accessed by name for read and write. When a
 465     particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
 466     stack. The new entry contains information abot which regbyte it references and its new value.
 467     When a given regbyte is accessed for read, the stack is searched top-down to find an
 468     entry that references the regbyte. The first matching entry is used to return the current
 469     value it holds. If no entry is found, the default value is returned.
 470 */
 471 typedef struct regbyte_ctx_
 472 {
 473     map_byte *m_regbyte;
 474     byte m_current_value;
 475     struct regbyte_ctx_ *m_prev;
 476 } regbyte_ctx;
 477
 478 static void regbyte_ctx_create (regbyte_ctx **re)
 479 {
 480     *re = (regbyte_ctx *) mem_alloc (sizeof (regbyte_ctx));
 481     if (*re)
 482     {
 483         (**re).m_regbyte = NULL;
 484         (**re).m_prev = NULL;
 485     }
 486 }
 487
 488 static void regbyte_ctx_destroy (regbyte_ctx **re)
 489 {
 490     if (*re)
 491     {
 492         mem_free ((void **) re);
 493     }
 494 }
 495
 496 static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
 497 {
 498     /* first lookup in the register stack */
 499     while (*re != NULL)
 500     {
 501         if ((**re).m_regbyte == reg)
 502             return (**re).m_current_value;
 503
 504         re = &(**re).m_prev;
 505     }
 506
 507     /* if not found - return the default value */
 508     return reg->data;
 509 }
 510
 511 /*
 512     emit type typedef
 513 */
 514 typedef enum emit_type_
 515 {
 516     et_byte,            /* explicit number */
 517     et_stream,          /* eaten character */
 518     et_position         /* current position */
 519 } emit_type;
 520
 521 /*
 522     emit destination typedef
 523 */
 524 typedef enum emit_dest_
 525 {
 526     ed_output,          /* write to the output buffer */
 527     ed_regbyte          /* write a particular regbyte */
 528 } emit_dest;
 529
 530 /*
 531     emit typedef
 532 */
 533 typedef struct emit_
 534 {
 535     emit_dest m_emit_dest;
 536     emit_type m_emit_type;      /* ed_output */
 537     byte m_byte;                /* et_byte */
 538     map_byte *m_regbyte;        /* ed_regbyte */
 539     byte *m_regname;            /* ed_regbyte - temporary */
 540     struct emit_ *m_next;
 541 } emit;
 542
 543 static void emit_create (emit **em)
 544 {
 545     *em = (emit *) mem_alloc (sizeof (emit));
 546     if (*em)
 547     {
 548         (**em).m_emit_dest = ed_output;
 549         (**em).m_emit_type = et_byte;
 550         (**em).m_byte = '\0';
 551         (**em).m_regbyte = NULL;
 552         (**em).m_regname = NULL;
 553         (**em).m_next = NULL;
 554     }
 555 }
 556
 557 static void emit_destroy (emit **em)
 558 {
 559     if (*em)
 560     {
 561         emit_destroy (&(**em).m_next);
 562         mem_free ((void **) &(**em).m_regname);
 563         mem_free ((void **) em);
 564     }
 565 }
 566
 567 static unsigned int emit_size (emit *_E)
 568 {
 569     unsigned int _N = 0;
 570
 571     while (_E != NULL)
 572     {
 573         if (_E->m_emit_dest == ed_output)
 574         {
 575             if (_E->m_emit_type == et_position)
 576                 _N += 4;     /* position is a 32-bit unsigned integer */
 577             else
 578                 _N++;
 579         }
 580         _E = _E->m_next;
 581     }
 582
 583     return _N;
 584 }
 585
 586 static int emit_push (emit *_E, byte *_P, byte _C, unsigned int _Pos, regbyte_ctx **_Ctx)
 587 {
 588     while (_E != NULL)
 589     {
 590         if (_E->m_emit_dest == ed_output)
 591         {
 592             if (_E->m_emit_type == et_byte)
 593                 *_P++ = _E->m_byte;
 594             else if (_E->m_emit_type == et_stream)
 595                 *_P++ = _C;
 596             else /* _Em->type == et_position */
 597             {
 598                 *_P++ = (byte) (_Pos);
 599                 *_P++ = (byte) (_Pos >> 8);
 600                 *_P++ = (byte) (_Pos >> 16);
 601                 *_P++ = (byte) (_Pos >> 24);
 602             }
 603         }
 604         else
 605         {
 606             regbyte_ctx *new_rbc;
 607             regbyte_ctx_create (&new_rbc);
 608             if (new_rbc == NULL)
 609                 return 1;
 610
 611             new_rbc->m_prev = *_Ctx;
 612             new_rbc->m_regbyte = _E->m_regbyte;
 613             *_Ctx = new_rbc;
 614
 615             if (_E->m_emit_type == et_byte)
 616                 new_rbc->m_current_value = _E->m_byte;
 617             else if (_E->m_emit_type == et_stream)
 618                 new_rbc->m_current_value = _C;
 619         }
 620
 621         _E = _E->m_next;
 622     }
 623
 624     return 0;
 625 }
 626
 627 /*
 628     error typedef
 629 */
 630 typedef struct error_
 631 {
 632     byte *m_text;
 633     byte *m_token_name;
 634     struct rule_ *m_token;
 635 } error;
 636
 637 static void error_create (error **er)
 638 {
 639     *er = (error *) mem_alloc (sizeof (error));
 640     if (*er)
 641     {
 642         (**er).m_text = NULL;
 643         (**er).m_token_name = NULL;
 644         (**er).m_token = NULL;
 645     }
 646 }
 647
 648 static void error_destroy (error **er)
 649 {
 650     if (*er)
 651     {
 652         mem_free ((void **) &(**er).m_text);
 653         mem_free ((void **) &(**er).m_token_name);
 654         mem_free ((void **) er);
 655     }
 656 }
 657
 658 struct dict_;
 659 static byte *error_get_token (error *, struct dict_ *, const byte *, unsigned int);
 660
 661 /*
 662     condition operand type typedef
 663 */
 664 typedef enum cond_oper_type_
 665 {
 666     cot_byte,               /* constant 8-bit unsigned integer */
 667     cot_regbyte             /* pointer to byte register containing the current value */
 668 } cond_oper_type;
 669
 670 /*
 671     condition operand typedef
 672 */
 673 typedef struct cond_oper_
 674 {
 675     cond_oper_type m_type;
 676     byte m_byte;            /* cot_byte */
 677     map_byte *m_regbyte;    /* cot_regbyte */
 678     byte *m_regname;        /* cot_regbyte - temporary */
 679 } cond_oper;
 680
 681 /*
 682     condition type typedef
 683 */
 684 typedef enum cond_type_
 685 {
 686     ct_equal,
 687     ct_not_equal
 688 } cond_type;
 689
 690 /*
 691     condition typedef
 692 */
 693 typedef struct cond_
 694 {
 695     cond_type m_type;
 696     cond_oper m_operands[2];
 697 } cond;
 698
 699 static void cond_create (cond **co)
 700 {
 701     *co = (cond *) mem_alloc (sizeof (cond));
 702     if (*co)
 703     {
 704         (**co).m_operands[0].m_regname = NULL;
 705         (**co).m_operands[1].m_regname = NULL;
 706     }
 707 }
 708
 709 static void cond_destroy (cond **co)
 710 {
 711     if (*co)
 712     {
 713         mem_free ((void **) &(**co).m_operands[0].m_regname);
 714         mem_free ((void **) &(**co).m_operands[1].m_regname);
 715         mem_free ((void **) co);
 716     }
 717 }
 718
 719 /*
 720     specifier type typedef
 721 */
 722 typedef enum spec_type_
 723 {
 724     st_false,
 725     st_true,
 726     st_byte,
 727     st_byte_range,
 728     st_string,
 729     st_identifier,
 730     st_identifier_loop,
 731     st_debug
 732 } spec_type;
 733
 734 /*
 735     specifier typedef
 736 */
 737 typedef struct spec_
 738 {
 739     spec_type m_spec_type;
 740     byte m_byte[2];                 /* st_byte, st_byte_range */
 741     byte *m_string;                 /* st_string */
 742     struct rule_ *m_rule;           /* st_identifier, st_identifier_loop */
 743     emit *m_emits;
 744     error *m_errtext;
 745     cond *m_cond;
 746     struct spec_ *next;
 747 } spec;
 748
 749 static void spec_create (spec **sp)
 750 {
 751     *sp = (spec *) mem_alloc (sizeof (spec));
 752     if (*sp)
 753     {
 754         (**sp).m_spec_type = st_false;
 755         (**sp).m_byte[0] = '\0';
 756         (**sp).m_byte[1] = '\0';
 757         (**sp).m_string = NULL;
 758         (**sp).m_rule = NULL;
 759         (**sp).m_emits = NULL;
 760         (**sp).m_errtext = NULL;
 761         (**sp).m_cond = NULL;
 762         (**sp).next = NULL;
 763     }
 764 }
 765
 766 static void spec_destroy (spec **sp)
 767 {
 768     if (*sp)
 769     {
 770         spec_destroy (&(**sp).next);
 771         emit_destroy (&(**sp).m_emits);
 772         error_destroy (&(**sp).m_errtext);
 773         mem_free ((void **) &(**sp).m_string);
 774         cond_destroy (&(**sp).m_cond);
 775         mem_free ((void **) sp);
 776     }
 777 }
 778
 779 GRAMMAR_IMPLEMENT_LIST_APPEND(spec)
 780
 781 /*
 782     operator typedef
 783 */
 784 typedef enum oper_
 785 {
 786     op_none,
 787     op_and,
 788     op_or
 789 } oper;
 790
 791 /*
 792     rule typedef
 793 */
 794 typedef struct rule_
 795 {
 796     oper m_oper;
 797     spec *m_specs;
 798     struct rule_ *next;
 799     int m_referenced;
 800 } rule;
 801
 802 static void rule_create (rule **ru)
 803 {
 804     *ru = (rule *) mem_alloc (sizeof (rule));
 805     if (*ru)
 806     {
 807         (**ru).m_oper = op_none;
 808         (**ru).m_specs = NULL;
 809         (**ru).next = NULL;
 810         (**ru).m_referenced = 0;
 811     }
 812 }
 813
 814 static void rule_destroy (rule **ru)
 815 {
 816     if (*ru)
 817     {
 818         rule_destroy (&(**ru).next);
 819         spec_destroy (&(**ru).m_specs);
 820         mem_free ((void **) ru);
 821     }
 822 }
 823
 824 GRAMMAR_IMPLEMENT_LIST_APPEND(rule)
 825
 826 /*
 827     returns unique grammar id
 828 */
 829 static grammar next_valid_grammar_id (void)
 830 {
 831     static grammar id = 0;
 832
 833     return ++id;
 834 }
 835
 836 /*
 837     dictionary typedef
 838 */
 839 typedef struct dict_
 840 {
 841     rule *m_rulez;
 842     rule *m_syntax;
 843     rule *m_string;
 844     map_byte *m_regbytes;
 845     grammar m_id;
 846     struct dict_ *next;
 847 } dict;
 848
 849 static void dict_create (dict **di)
 850 {
 851     *di = (dict *) mem_alloc (sizeof (dict));
 852     if (*di)
 853     {
 854         (**di).m_rulez = NULL;
 855         (**di).m_syntax = NULL;
 856         (**di).m_string = NULL;
 857         (**di).m_regbytes = NULL;
 858         (**di).m_id = next_valid_grammar_id ();
 859         (**di).next = NULL;
 860     }
 861 }
 862
 863 static void dict_destroy (dict **di)
 864 {
 865     if (*di)
 866     {
 867         rule_destroy (&(**di).m_rulez);
 868         map_byte_destroy (&(**di).m_regbytes);
 869         mem_free ((void **) di);
 870     }
 871 }
 872
 873 GRAMMAR_IMPLEMENT_LIST_APPEND(dict)
 874
 875 static void dict_find (dict **di, grammar key, dict **data)
 876 {
 877     while (*di)
 878     {
 879         if ((**di).m_id == key)
 880         {
 881             *data = *di;
 882             return;
 883         }
 884
 885         di = &(**di).next;
 886     }
 887
 888     *data = NULL;
 889 }
 890
 891 static dict *g_dicts = NULL;
 892
 893 /*
 894     byte array typedef
 895 */
 896 typedef struct barray_
 897 {
 898     byte *data;
 899     unsigned int len;
 900 } barray;
 901
 902 static void barray_create (barray **ba)
 903 {
 904     *ba = (barray *) mem_alloc (sizeof (barray));
 905     if (*ba)
 906     {
 907         (**ba).data = NULL;
 908         (**ba).len = 0;
 909     }
 910 }
 911
 912 static void barray_destroy (barray **ba)
 913 {
 914     if (*ba)
 915     {
 916         mem_free ((void **) &(**ba).data);
 917         mem_free ((void **) ba);
 918     }
 919 }
 920
 921 /*
 922     reallocates byte array to requested size,
 923     returns 0 on success,
 924     returns 1 otherwise
 925 */
 926 static int barray_resize (barray **ba, unsigned int nlen)
 927 {
 928     byte *new_pointer;
 929
 930     if (nlen == 0)
 931     {
 932         mem_free ((void **) &(**ba).data);
 933         (**ba).data = NULL;
 934         (**ba).len = 0;
 935
 936         return 0;
 937     }
 938     else
 939     {
 940         new_pointer = (byte *) mem_realloc ((**ba).data, (**ba).len * sizeof (byte),
 941             nlen * sizeof (byte));
 942         if (new_pointer)
 943         {
 944             (**ba).data = new_pointer;
 945             (**ba).len = nlen;
 946
 947             return 0;
 948         }
 949     }
 950
 951     return 1;
 952 }
 953
 954 /*
 955     adds byte array pointed by *nb to the end of array pointed by *ba,
 956     returns 0 on success,
 957     returns 1 otherwise
 958 */
 959 static int barray_append (barray **ba, barray **nb)
 960 {
 961     const unsigned int len = (**ba).len;
 962
 963     if (barray_resize (ba, (**ba).len + (**nb).len))
 964         return 1;
 965
 966     mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
 967
 968     return 0;
 969 }
 970
 971 /*
 972     adds emit chain pointed by em to the end of array pointed by *ba,
 973     returns 0 on success,
 974     returns 1 otherwise
 975 */
 976 static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
 977 {
 978     unsigned int count = emit_size (em);
 979
 980     if (barray_resize (ba, (**ba).len + count))
 981         return 1;
 982
 983     return emit_push (em, (**ba).data + ((**ba).len - count), c, pos, rbc);
 984 }
 985
 986 /*
 987     byte pool typedef
 988 */
 989 typedef struct bytepool_
 990 {
 991     byte *_F;
 992     unsigned int _Siz;
 993 } bytepool;
 994
 995 static void bytepool_destroy (bytepool **by)
 996 {
 997     if (*by != NULL)
 998     {
 999         mem_free ((void **) &(**by)._F);
1000         mem_free ((void **) by);
1001     }
1002 }
1003
1004 static void bytepool_create (bytepool **by, int len)
1005 {
1006     *by = (bytepool *) (mem_alloc (sizeof (bytepool)));
1007     if (*by != NULL)
1008     {
1009         (**by)._F = (byte *) (mem_alloc (sizeof (byte) * len));
1010         (**by)._Siz = len;
1011
1012         if ((**by)._F == NULL)
1013             bytepool_destroy (by);
1014     }
1015 }
1016
1017 static int bytepool_reserve (bytepool *by, unsigned int _N)
1018 {
1019     byte *_P;
1020
1021     if (_N <= by->_Siz)
1022         return 0;
1023
1024     /* byte pool can only grow and at least by doubling its size */
1025     _N = _N >= by->_Siz * 2 ? _N : by->_Siz * 2;
1026
1027     /* reallocate the memory and adjust pointers to the new memory location */
1028     _P = (byte *) (mem_realloc (by->_F, sizeof (byte) * by->_Siz, sizeof (byte) * _N));
1029     if (_P != NULL)
1030     {
1031         by->_F = _P;
1032         by->_Siz = _N;
1033         return 0;
1034     }
1035
1036     return 1;
1037 }
1038
1039 /*
1040     string to string map typedef
1041 */
1042 typedef struct map_str_
1043 {
1044     byte *key;
1045     byte *data;
1046     struct map_str_ *next;
1047 } map_str;
1048
1049 static void map_str_create (map_str **ma)
1050 {
1051     *ma = (map_str *) mem_alloc (sizeof (map_str));
1052     if (*ma)
1053     {
1054         (**ma).key = NULL;
1055         (**ma).data = NULL;
1056         (**ma).next = NULL;
1057     }
1058 }
1059
1060 static void map_str_destroy (map_str **ma)
1061 {
1062     if (*ma)
1063     {
1064         map_str_destroy (&(**ma).next);
1065         mem_free ((void **) &(**ma).key);
1066         mem_free ((void **) &(**ma).data);
1067         mem_free ((void **) ma);
1068     }
1069 }
1070
1071 GRAMMAR_IMPLEMENT_LIST_APPEND(map_str)
1072
1073 /*
1074     searches the map for specified key,
1075     if the key is matched, *data is filled with data associated with the key,
1076     returns 0 if the key is matched,
1077     returns 1 otherwise
1078 */
1079 static int map_str_find (map_str **ma, const byte *key, byte **data)
1080 {
1081     while (*ma)
1082     {
1083         if (str_equal ((**ma).key, key))
1084         {
1085             *data = str_duplicate ((**ma).data);
1086             if (*data == NULL)
1087                 return 1;
1088
1089             return 0;
1090         }
1091
1092         ma = &(**ma).next;
1093     }
1094
1095     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1096     return 1;
1097 }
1098
1099 /*
1100     string to rule map typedef
1101 */
1102 typedef struct map_rule_
1103 {
1104     byte *key;
1105     rule *data;
1106     struct map_rule_ *next;
1107 } map_rule;
1108
1109 static void map_rule_create (map_rule **ma)
1110 {
1111     *ma = (map_rule *) mem_alloc (sizeof (map_rule));
1112     if (*ma)
1113     {
1114         (**ma).key = NULL;
1115         (**ma).data = NULL;
1116         (**ma).next = NULL;
1117     }
1118 }
1119
1120 static void map_rule_destroy (map_rule **ma)
1121 {
1122     if (*ma)
1123     {
1124         map_rule_destroy (&(**ma).next);
1125         mem_free ((void **) &(**ma).key);
1126         mem_free ((void **) ma);
1127     }
1128 }
1129
1130 GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule)
1131
1132 /*
1133     searches the map for specified key,
1134     if the key is matched, *data is filled with data associated with the key,
1135     returns 0 if the is matched,
1136     returns 1 otherwise
1137 */
1138 static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1139 {
1140     while (*ma)
1141     {
1142         if (str_equal ((**ma).key, key))
1143         {
1144             *data = (**ma).data;
1145
1146             return 0;
1147         }
1148
1149         ma = &(**ma).next;
1150     }
1151
1152     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1153     return 1;
1154 }
1155
1156 /*
1157     returns 1 if given character is a white space,
1158     returns 0 otherwise
1159 */
1160 static int is_space (byte c)
1161 {
1162     return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1163 }
1164
1165 /*
1166     advances text pointer by 1 if character pointed by *text is a space,
1167     returns 1 if a space has been eaten,
1168     returns 0 otherwise
1169 */
1170 static int eat_space (const byte **text)
1171 {
1172     if (is_space (**text))
1173     {
1174         (*text)++;
1175
1176         return 1;
1177     }
1178
1179     return 0;
1180 }
1181
1182 /*
1183     returns 1 if text points to C-style comment start string,
1184     returns 0 otherwise
1185 */
1186 static int is_comment_start (const byte *text)
1187 {
1188     return text[0] == '/' && text[1] == '*';
1189 }
1190
1191 /*
1192     advances text pointer to first character after C-style comment block - if any,
1193     returns 1 if C-style comment block has been encountered and eaten,
1194     returns 0 otherwise
1195 */
1196 static int eat_comment (const byte **text)
1197 {
1198     if (is_comment_start (*text))
1199     {
1200         /* *text points to comment block - skip two characters to enter comment body */
1201         *text += 2;
1202         /* skip any character except consecutive '*' and '/' */
1203         while (!((*text)[0] == '*' && (*text)[1] == '/'))
1204             (*text)++;
1205         /* skip those two terminating characters */
1206         *text += 2;
1207
1208         return 1;
1209     }
1210
1211     return 0;
1212 }
1213
1214 /*
1215     advances text pointer to first character that is neither space nor C-style comment block
1216 */
1217 static void eat_spaces (const byte **text)
1218 {
1219     while (eat_space (text) || eat_comment (text))
1220         ;
1221 }
1222
1223 /*
1224     resizes string pointed by *ptr to successfully add character c to the end of the string,
1225     returns 0 on success,
1226     returns 1 otherwise
1227 */
1228 static int string_grow (byte **ptr, unsigned int *len, byte c)
1229 {
1230     /* reallocate the string in 16-byte increments */
1231     if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1232     {
1233         byte *tmp = (byte *) mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
1234             ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1235         if (tmp == NULL)
1236             return 1;
1237
1238         *ptr = tmp;
1239     }
1240
1241     if (c)
1242     {
1243         /* append given character */
1244         (*ptr)[*len] = c;
1245         (*len)++;
1246     }
1247     (*ptr)[*len] = '\0';
1248
1249     return 0;
1250 }
1251
1252 /*
1253     returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1254     returns 0 otherwise
1255 */
1256 static int is_identifier (byte c)
1257 {
1258     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1259 }
1260
1261 /*
1262     copies characters from *text to *id until non-identifier character is encountered,
1263     assumes that *id points to NULL object - caller is responsible for later freeing the string,
1264     text pointer is advanced to point past the copied identifier,
1265     returns 0 if identifier was successfully copied,
1266     returns 1 otherwise
1267 */
1268 static int get_identifier (const byte **text, byte **id)
1269 {
1270     const byte *t = *text;
1271     byte *p = NULL;
1272     unsigned int len = 0;
1273
1274     if (string_grow (&p, &len, '\0'))
1275         return 1;
1276
1277     /* loop while next character in buffer is valid for identifiers */
1278     while (is_identifier (*t))
1279     {
1280         if (string_grow (&p, &len, *t++))
1281         {
1282             mem_free ((void **) (void *) &p);
1283             return 1;
1284         }
1285     }
1286
1287     *text = t;
1288     *id = p;
1289
1290     return 0;
1291 }
1292
1293 /*
1294     converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1295     advances text pointer past the converted sequence,
1296     returns the converted value
1297 */
1298 static unsigned int dec_convert (const byte **text)
1299 {
1300     unsigned int value = 0;
1301
1302     while (**text >= '0' && **text <= '9')
1303     {
1304         value = value * 10 + **text - '0';
1305         (*text)++;
1306     }
1307
1308     return value;
1309 }
1310
1311 /*
1312     returns 1 if given character is HEX digit 0-9, A-F or a-f,
1313     returns 0 otherwise
1314 */
1315 static int is_hex (byte c)
1316 {
1317     return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1318 }
1319
1320 /*
1321     returns value of passed character as if it was HEX digit
1322 */
1323 static unsigned int hex2dec (byte c)
1324 {
1325     if (c >= '0' && c <= '9')
1326         return c - '0';
1327     if (c >= 'A' && c <= 'F')
1328         return c - 'A' + 10;
1329     return c - 'a' + 10;
1330 }
1331
1332 /*
1333     converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1334     advances text pointer past the converted sequence,
1335     returns the converted value
1336 */
1337 static unsigned int hex_convert (const byte **text)
1338 {
1339     unsigned int value = 0;
1340
1341     while (is_hex (**text))
1342     {
1343         value = value * 0x10 + hex2dec (**text);
1344         (*text)++;
1345     }
1346
1347     return value;
1348 }
1349
1350 /*
1351     returns 1 if given character is OCT digit 0-7,
1352     returns 0 otherwise
1353 */
1354 static int is_oct (byte c)
1355 {
1356     return c >= '0' && c <= '7';
1357 }
1358
1359 /*
1360     returns value of passed character as if it was OCT digit
1361 */
1362 static int oct2dec (byte c)
1363 {
1364     return c - '0';
1365 }
1366
1367 static byte get_escape_sequence (const byte **text)
1368 {
1369     int value = 0;
1370
1371     /* skip '\' character */
1372     (*text)++;
1373
1374     switch (*(*text)++)
1375     {
1376     case '\'':
1377         return '\'';
1378     case '"':
1379         return '\"';
1380     case '?':
1381         return '\?';
1382     case '\\':
1383         return '\\';
1384     case 'a':
1385         return '\a';
1386     case 'b':
1387         return '\b';
1388     case 'f':
1389         return '\f';
1390     case 'n':
1391         return '\n';
1392     case 'r':
1393         return '\r';
1394     case 't':
1395         return '\t';
1396     case 'v':
1397         return '\v';
1398     case 'x':
1399         return (byte) hex_convert (text);
1400     }
1401
1402     (*text)--;
1403     if (is_oct (**text))
1404     {
1405         value = oct2dec (*(*text)++);
1406         if (is_oct (**text))
1407         {
1408             value = value * 010 + oct2dec (*(*text)++);
1409             if (is_oct (**text))
1410                 value = value * 010 + oct2dec (*(*text)++);
1411         }
1412     }
1413
1414     return (byte) value;
1415 }
1416
1417 /*
1418     copies characters from *text to *str until " or ' character is encountered,
1419     assumes that *str points to NULL object - caller is responsible for later freeing the string,
1420     assumes that *text points to " or ' character that starts the string,
1421     text pointer is advanced to point past the " or ' character,
1422     returns 0 if string was successfully copied,
1423     returns 1 otherwise
1424 */
1425 static int get_string (const byte **text, byte **str)
1426 {
1427     const byte *t = *text;
1428     byte *p = NULL;
1429     unsigned int len = 0;
1430     byte term_char;
1431
1432     if (string_grow (&p, &len, '\0'))
1433         return 1;
1434
1435     /* read " or ' character that starts the string */
1436     term_char = *t++;
1437     /* while next character is not the terminating character */
1438     while (*t && *t != term_char)
1439     {
1440         byte c;
1441
1442         if (*t == '\\')
1443             c = get_escape_sequence (&t);
1444         else
1445             c = *t++;
1446
1447         if (string_grow (&p, &len, c))
1448         {
1449             mem_free ((void **) (void *) &p);
1450             return 1;
1451         }
1452     }
1453     /* skip " or ' character that ends the string */
1454     t++;
1455
1456     *text = t;
1457     *str = p;
1458     return 0;
1459 }
1460
1461 /*
1462     gets emit code, the syntax is:
1463     ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1464     assumes that *text already points to <symbol>,
1465     returns 0 if emit code is successfully read,
1466     returns 1 otherwise
1467 */
1468 static int get_emtcode (const byte **text, map_byte **ma)
1469 {
1470     const byte *t = *text;
1471     map_byte *m = NULL;
1472
1473     map_byte_create (&m);
1474     if (m == NULL)
1475         return 1;
1476
1477     if (get_identifier (&t, &m->key))
1478     {
1479         map_byte_destroy (&m);
1480         return 1;
1481     }
1482     eat_spaces (&t);
1483
1484     if (*t == '\'')
1485     {
1486         byte *c;
1487
1488         if (get_string (&t, &c))
1489         {
1490             map_byte_destroy (&m);
1491             return 1;
1492         }
1493
1494         m->data = (byte) c[0];
1495         mem_free ((void **) (void *) &c);
1496     }
1497     else if (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))
1498     {
1499         /* skip HEX "0x" or "0X" prefix */
1500         t += 2;
1501         m->data = (byte) hex_convert (&t);
1502     }
1503     else
1504     {
1505         m->data = (byte) dec_convert (&t);
1506     }
1507
1508     eat_spaces (&t);
1509
1510     *text = t;
1511     *ma = m;
1512     return 0;
1513 }
1514
1515 /*
1516     gets regbyte declaration, the syntax is:
1517     ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1518     assumes that *text already points to <symbol>,
1519     returns 0 if regbyte is successfully read,
1520     returns 1 otherwise
1521 */
1522 static int get_regbyte (const byte **text, map_byte **ma)
1523 {
1524     /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
1525     return get_emtcode (text, ma);
1526 }
1527
1528 /*
1529     returns 0 on success,
1530     returns 1 otherwise
1531 */
1532 static int get_errtext (const byte **text, map_str **ma)
1533 {
1534     const byte *t = *text;
1535     map_str *m = NULL;
1536
1537     map_str_create (&m);
1538     if (m == NULL)
1539         return 1;
1540
1541     if (get_identifier (&t, &m->key))
1542     {
1543         map_str_destroy (&m);
1544         return 1;
1545     }
1546     eat_spaces (&t);
1547
1548     if (get_string (&t, &m->data))
1549     {
1550         map_str_destroy (&m);
1551         return 1;
1552     }
1553     eat_spaces (&t);
1554
1555     *text = t;
1556     *ma = m;
1557     return 0;
1558 }
1559
1560 /*
1561     returns 0 on success,
1562     returns 1 otherwise,
1563 */
1564 static int get_error (const byte **text, error **er, map_str *maps)
1565 {
1566     const byte *t = *text;
1567     byte *temp = NULL;
1568
1569     if (*t != '.')
1570         return 0;
1571
1572     t++;
1573     if (get_identifier (&t, &temp))
1574         return 1;
1575     eat_spaces (&t);
1576
1577     if (!str_equal ((byte *) "error", temp))
1578     {
1579         mem_free ((void **) (void *) &temp);
1580         return 0;
1581     }
1582
1583     mem_free ((void **) (void *) &temp);
1584
1585     error_create (er);
1586     if (*er == NULL)
1587         return 1;
1588
1589     if (*t == '\"')
1590     {
1591         if (get_string (&t, &(**er).m_text))
1592         {
1593             error_destroy (er);
1594             return 1;
1595         }
1596         eat_spaces (&t);
1597     }
1598     else
1599     {
1600         if (get_identifier (&t, &temp))
1601         {
1602             error_destroy (er);
1603             return 1;
1604         }
1605         eat_spaces (&t);
1606
1607         if (map_str_find (&maps, temp, &(**er).m_text))
1608         {
1609             mem_free ((void **) (void *) &temp);
1610             error_destroy (er);
1611             return 1;
1612         }
1613
1614         mem_free ((void **) (void *) &temp);
1615     }
1616
1617     /* try to extract "token" from "...$token$..." */
1618     {
1619         byte *processed = NULL;
1620         unsigned int len = 0, i = 0;
1621
1622         if (string_grow (&processed, &len, '\0'))
1623         {
1624             error_destroy (er);
1625             return 1;
1626         }
1627
1628         while (i < str_length ((**er).m_text))
1629         {
1630             /* check if the dollar sign is repeated - if so skip it */
1631             if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1632             {
1633                 if (string_grow (&processed, &len, '$'))
1634                 {
1635                     mem_free ((void **) (void *) &processed);
1636                     error_destroy (er);
1637                     return 1;
1638                 }
1639
1640                 i += 2;
1641             }
1642             else if ((**er).m_text[i] != '$')
1643             {
1644                 if (string_grow (&processed, &len, (**er).m_text[i]))
1645                 {
1646                     mem_free ((void **) (void *) &processed);
1647                     error_destroy (er);
1648                     return 1;
1649                 }
1650
1651                 i++;
1652             }
1653             else
1654             {
1655                 if (string_grow (&processed, &len, '$'))
1656                 {
1657                     mem_free ((void **) (void *) &processed);
1658                     error_destroy (er);
1659                     return 1;
1660                 }
1661
1662                 {
1663                     /* length of token being extracted */
1664                     unsigned int tlen = 0;
1665
1666                     if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1667                     {
1668                         mem_free ((void **) (void *) &processed);
1669                         error_destroy (er);
1670                         return 1;
1671                     }
1672
1673                     /* skip the dollar sign */
1674                     i++;
1675
1676                     while ((**er).m_text[i] != '$')
1677                     {
1678                         if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1679                         {
1680                             mem_free ((void **) (void *) &processed);
1681                             error_destroy (er);
1682                             return 1;
1683                         }
1684
1685                         i++;
1686                     }
1687
1688                     /* skip the dollar sign */
1689                     i++;
1690                 }
1691             }
1692         }
1693
1694         mem_free ((void **) &(**er).m_text);
1695         (**er).m_text = processed;
1696     }
1697
1698     *text = t;
1699     return 0;
1700 }
1701
1702 /*
1703     returns 0 on success,
1704     returns 1 otherwise,
1705 */
1706 static int get_emits (const byte **text, emit **em, map_byte *mapb)
1707 {
1708     const byte *t = *text;
1709     byte *temp = NULL;
1710     emit *e = NULL;
1711     emit_dest dest;
1712
1713     if (*t != '.')
1714         return 0;
1715
1716     t++;
1717     if (get_identifier (&t, &temp))
1718         return 1;
1719     eat_spaces (&t);
1720
1721     /* .emit */
1722     if (str_equal ((byte *) "emit", temp))
1723         dest = ed_output;
1724     /* .load */
1725     else if (str_equal ((byte *) "load", temp))
1726         dest = ed_regbyte;
1727     else
1728     {
1729         mem_free ((void **) (void *) &temp);
1730         return 0;
1731     }
1732
1733     mem_free ((void **) (void *) &temp);
1734
1735     emit_create (&e);
1736     if (e == NULL)
1737         return 1;
1738
1739     e->m_emit_dest = dest;
1740
1741     if (dest == ed_regbyte)
1742     {
1743         if (get_identifier (&t, &e->m_regname))
1744         {
1745             emit_destroy (&e);
1746             return 1;
1747         }
1748         eat_spaces (&t);
1749     }
1750
1751     /* 0xNN */
1752     if (*t == '0' && (t[1] == 'x' || t[1] == 'X'))
1753     {
1754         t += 2;
1755         e->m_byte = (byte) hex_convert (&t);
1756
1757         e->m_emit_type = et_byte;
1758     }
1759     /* NNN */
1760     else if (*t >= '0' && *t <= '9')
1761     {
1762         e->m_byte = (byte) dec_convert (&t);
1763
1764         e->m_emit_type = et_byte;
1765     }
1766     /* * */
1767     else if (*t == '*')
1768     {
1769         t++;
1770
1771         e->m_emit_type = et_stream;
1772     }
1773     /* $ */
1774     else if (*t == '$')
1775     {
1776         t++;
1777
1778         e->m_emit_type = et_position;
1779     }
1780     /* 'c' */
1781     else if (*t == '\'')
1782     {
1783         if (get_string (&t, &temp))
1784         {
1785             emit_destroy (&e);
1786             return 1;
1787         }
1788         e->m_byte = (byte) temp[0];
1789
1790         mem_free ((void **) (void *) &temp);
1791
1792         e->m_emit_type = et_byte;
1793     }
1794     else
1795     {
1796         if (get_identifier (&t, &temp))
1797         {
1798             emit_destroy (&e);
1799             return 1;
1800         }
1801
1802         if (map_byte_find (&mapb, temp, &e->m_byte))
1803         {
1804             mem_free ((void **) (void *) &temp);
1805             emit_destroy (&e);
1806             return 1;
1807         }
1808
1809         mem_free ((void **) (void *) &temp);
1810
1811         e->m_emit_type = et_byte;
1812     }
1813
1814     eat_spaces (&t);
1815
1816     if (get_emits (&t, &e->m_next, mapb))
1817     {
1818         emit_destroy (&e);
1819         return 1;
1820     }
1821
1822     *text = t;
1823     *em = e;
1824     return 0;
1825 }
1826
1827 /*
1828     returns 0 on success,
1829     returns 1 otherwise,
1830 */
1831 static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1832 {
1833     const byte *t = *text;
1834     spec *s = NULL;
1835
1836     spec_create (&s);
1837     if (s == NULL)
1838         return 1;
1839
1840     /* first - read optional .if statement */
1841     if (*t == '.')
1842     {
1843         const byte *u = t;
1844         byte *keyword = NULL;
1845
1846         /* skip the dot */
1847         u++;
1848
1849         if (get_identifier (&u, &keyword))
1850         {
1851             spec_destroy (&s);
1852             return 1;
1853         }
1854
1855         /* .if */
1856         if (str_equal ((byte *) "if", keyword))
1857         {
1858             cond_create (&s->m_cond);
1859             if (s->m_cond == NULL)
1860             {
1861                 spec_destroy (&s);
1862                 return 1;
1863             }
1864
1865             /* skip the left paren */
1866             eat_spaces (&u);
1867             u++;
1868
1869             /* get the left operand */
1870             eat_spaces (&u);
1871             if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1872             {
1873                 spec_destroy (&s);
1874                 return 1;
1875             }
1876             s->m_cond->m_operands[0].m_type = cot_regbyte;
1877
1878             /* get the operator (!= or ==) */
1879             eat_spaces (&u);
1880             if (*u == '!')
1881                 s->m_cond->m_type = ct_not_equal;
1882             else
1883                 s->m_cond->m_type = ct_equal;
1884             u += 2;
1885             eat_spaces (&u);
1886
1887             if (u[0] == '0' && (u[1] == 'x' || u[1] == 'X'))
1888             {
1889                 /* skip the 0x prefix */
1890                 u += 2;
1891
1892                 /* get the right operand */
1893                 s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1894                 s->m_cond->m_operands[1].m_type = cot_byte;
1895             }
1896             else /*if (*u >= '0' && *u <= '9')*/
1897             {
1898                 /* get the right operand */
1899                 s->m_cond->m_operands[1].m_byte = dec_convert (&u);
1900                 s->m_cond->m_operands[1].m_type = cot_byte;
1901             }
1902
1903             /* skip the right paren */
1904             eat_spaces (&u);
1905             u++;
1906
1907             eat_spaces (&u);
1908
1909             t = u;
1910         }
1911
1912         mem_free ((void **) (void *) &keyword);
1913     }
1914
1915     if (*t == '\'')
1916     {
1917         byte *temp = NULL;
1918
1919         if (get_string (&t, &temp))
1920         {
1921             spec_destroy (&s);
1922             return 1;
1923         }
1924         eat_spaces (&t);
1925
1926         if (*t == '-')
1927         {
1928             byte *temp2 = NULL;
1929
1930             /* skip the '-' character */
1931             t++;
1932             eat_spaces (&t);
1933
1934             if (get_string (&t, &temp2))
1935             {
1936                 mem_free ((void **) (void *) &temp);
1937                 spec_destroy (&s);
1938                 return 1;
1939             }
1940             eat_spaces (&t);
1941
1942             s->m_spec_type = st_byte_range;
1943             s->m_byte[0] = *temp;
1944             s->m_byte[1] = *temp2;
1945
1946             mem_free ((void **) (void *) &temp2);
1947         }
1948         else
1949         {
1950             s->m_spec_type = st_byte;
1951             *s->m_byte = *temp;
1952         }
1953
1954         mem_free ((void **) (void *) &temp);
1955     }
1956     else if (*t == '"')
1957     {
1958         if (get_string (&t, &s->m_string))
1959         {
1960             spec_destroy (&s);
1961             return 1;
1962         }
1963         eat_spaces (&t);
1964
1965         s->m_spec_type = st_string;
1966     }
1967     else if (*t == '.')
1968     {
1969         byte *keyword = NULL;
1970
1971         /* skip the dot */
1972         t++;
1973
1974         if (get_identifier (&t, &keyword))
1975         {
1976             spec_destroy (&s);
1977             return 1;
1978         }
1979         eat_spaces (&t);
1980
1981         /* .true */
1982         if (str_equal ((byte *) "true", keyword))
1983         {
1984             s->m_spec_type = st_true;
1985         }
1986         /* .false */
1987         else if (str_equal ((byte *) "false", keyword))
1988         {
1989             s->m_spec_type = st_false;
1990         }
1991         /* .debug */
1992         else if (str_equal ((byte *) "debug", keyword))
1993         {
1994             s->m_spec_type = st_debug;
1995         }
1996         /* .loop */
1997         else if (str_equal ((byte *) "loop", keyword))
1998         {
1999             if (get_identifier (&t, &s->m_string))
2000             {
2001                 mem_free ((void **) (void *) &keyword);
2002                 spec_destroy (&s);
2003                 return 1;
2004             }
2005             eat_spaces (&t);
2006
2007             s->m_spec_type = st_identifier_loop;
2008         }
2009         mem_free ((void **) (void *) &keyword);
2010     }
2011     else
2012     {
2013         if (get_identifier (&t, &s->m_string))
2014         {
2015             spec_destroy (&s);
2016             return 1;
2017         }
2018         eat_spaces (&t);
2019
2020         s->m_spec_type = st_identifier;
2021     }
2022
2023     if (get_error (&t, &s->m_errtext, maps))
2024     {
2025         spec_destroy (&s);
2026         return 1;
2027     }
2028
2029     if (get_emits (&t, &s->m_emits, mapb))
2030     {
2031         spec_destroy (&s);
2032         return 1;
2033     }
2034
2035     *text = t;
2036     *sp = s;
2037     return 0;
2038 }
2039
2040 /*
2041     returns 0 on success,
2042     returns 1 otherwise,
2043 */
2044 static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
2045 {
2046     const byte *t = *text;
2047     rule *r = NULL;
2048
2049     rule_create (&r);
2050     if (r == NULL)
2051         return 1;
2052
2053     if (get_spec (&t, &r->m_specs, maps, mapb))
2054     {
2055         rule_destroy (&r);
2056         return 1;
2057     }
2058
2059     while (*t != ';')
2060     {
2061         byte *op = NULL;
2062         spec *sp = NULL;
2063
2064         /* skip the dot that precedes "and" or "or" */
2065         t++;
2066
2067         /* read "and" or "or" keyword */
2068         if (get_identifier (&t, &op))
2069         {
2070             rule_destroy (&r);
2071             return 1;
2072         }
2073         eat_spaces (&t);
2074
2075         if (r->m_oper == op_none)
2076         {
2077             /* .and */
2078             if (str_equal ((byte *) "and", op))
2079                 r->m_oper = op_and;
2080             /* .or */
2081             else
2082                 r->m_oper = op_or;
2083         }
2084
2085         mem_free ((void **) (void *) &op);
2086
2087         if (get_spec (&t, &sp, maps, mapb))
2088         {
2089             rule_destroy (&r);
2090             return 1;
2091         }
2092
2093         spec_append (&r->m_specs, sp);
2094     }
2095
2096     /* skip the semicolon */
2097     t++;
2098     eat_spaces (&t);
2099
2100     *text = t;
2101     *ru = r;
2102     return 0;
2103 }
2104
2105 /*
2106     returns 0 on success,
2107     returns 1 otherwise,
2108 */
2109 static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2110 {
2111     if (map_rule_find (&mapr, symbol, ru))
2112         return 1;
2113
2114     (**ru).m_referenced = 1;
2115
2116     return 0;
2117 }
2118
2119 /*
2120     returns 0 on success,
2121     returns 1 otherwise,
2122 */
2123 static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2124     byte **string_symbol, map_byte *regbytes)
2125 {
2126     rule *rulez = di->m_rulez;
2127
2128     /* update dependecies for the root and lexer symbols */
2129     if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2130         (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2131         return 1;
2132
2133     mem_free ((void **) syntax_symbol);
2134     mem_free ((void **) string_symbol);
2135
2136     /* update dependecies for the rest of the rules */
2137     while (rulez)
2138     {
2139         spec *sp = rulez->m_specs;
2140
2141         /* iterate through all the specifiers */
2142         while (sp)
2143         {
2144             /* update dependency for identifier */
2145             if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2146             {
2147                 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2148                     return 1;
2149
2150                 mem_free ((void **) &sp->m_string);
2151             }
2152
2153             /* some errtexts reference to a rule */
2154             if (sp->m_errtext && sp->m_errtext->m_token_name)
2155             {
2156                 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2157                     return 1;
2158
2159                 mem_free ((void **) &sp->m_errtext->m_token_name);
2160             }
2161
2162             /* update dependency for condition */
2163             if (sp->m_cond)
2164             {
2165                 int i;
2166                 for (i = 0; i < 2; i++)
2167                     if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2168                     {
2169                         sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2170                             sp->m_cond->m_operands[i].m_regname);
2171
2172                         if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2173                             return 1;
2174
2175                         mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2176                     }
2177             }
2178
2179             /* update dependency for all .load instructions */
2180             if (sp->m_emits)
2181             {
2182                 emit *em = sp->m_emits;
2183                 while (em != NULL)
2184                 {
2185                     if (em->m_emit_dest == ed_regbyte)
2186                     {
2187                         em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2188
2189                         if (em->m_regbyte == NULL)
2190                             return 1;
2191
2192                         mem_free ((void **) &em->m_regname);
2193                     }
2194
2195                     em = em->m_next;
2196                 }
2197             }
2198
2199             sp = sp->next;
2200         }
2201
2202         rulez = rulez->next;
2203     }
2204
2205     /* check for unreferenced symbols */
2206     rulez = di->m_rulez;
2207     while (rulez != NULL)
2208     {
2209         if (!rulez->m_referenced)
2210         {
2211             map_rule *ma = mapr;
2212             while (ma)
2213             {
2214                 if (ma->data == rulez)
2215                 {
2216                     set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1);
2217                     return 1;
2218                 }
2219                 ma = ma->next;
2220             }
2221         }
2222         rulez = rulez->next;
2223     }
2224
2225     return 0;
2226 }
2227
2228 static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2229 {
2230     byte values[2];
2231     int i;
2232
2233     if (co == NULL)
2234         return 1;
2235
2236     for (i = 0; i < 2; i++)
2237         switch (co->m_operands[i].m_type)
2238         {
2239         case cot_byte:
2240             values[i] = co->m_operands[i].m_byte;
2241             break;
2242         case cot_regbyte:
2243             values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2244             break;
2245         }
2246
2247     switch (co->m_type)
2248     {
2249     case ct_equal:
2250         return values[0] == values[1];
2251     case ct_not_equal:
2252         return values[0] != values[1];
2253     }
2254
2255     return 0;
2256 }
2257
2258 static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2259 {
2260     while (top != limit)
2261     {
2262         regbyte_ctx *rbc = top->m_prev;
2263         regbyte_ctx_destroy (&top);
2264         top = rbc;
2265     }
2266 }
2267
2268 typedef enum match_result_
2269 {
2270     mr_not_matched,     /* the examined string does not match */
2271     mr_matched,         /* the examined string matches */
2272     mr_error_raised,    /* mr_not_matched + error has been raised */
2273     mr_dont_emit,       /* used by identifier loops only */
2274     mr_internal_error   /* an internal error has occured such as out of memory */
2275 } match_result;
2276
2277 /*
2278     This function does the main job. It parses the text and generates output data.
2279 */
2280 static match_result match (dict *di, const byte *text, unsigned int *index, rule *ru, barray **ba,
2281     int filtering_string, regbyte_ctx **rbc)
2282 {
2283     unsigned int ind = *index;
2284     match_result status = mr_not_matched;
2285     spec *sp = ru->m_specs;
2286     regbyte_ctx *ctx = *rbc;
2287
2288     /* for every specifier in the rule */
2289     while (sp)
2290     {
2291         unsigned int i, len, save_ind = ind;
2292         barray *array = NULL;
2293
2294         if (satisfies_condition (sp->m_cond, ctx))
2295         {
2296             switch (sp->m_spec_type)
2297             {
2298             case st_identifier:
2299                 barray_create (&array);
2300                 if (array == NULL)
2301                 {
2302                     free_regbyte_ctx_stack (ctx, *rbc);
2303                     return mr_internal_error;
2304                 }
2305
2306                 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2307
2308                 if (status == mr_internal_error)
2309                 {
2310                     free_regbyte_ctx_stack (ctx, *rbc);
2311                     barray_destroy (&array);
2312                     return mr_internal_error;
2313                 }
2314                 break;
2315             case st_string:
2316                 len = str_length (sp->m_string);
2317
2318                 /* prefilter the stream */
2319                 if (!filtering_string && di->m_string)
2320                 {
2321                     barray *ba;
2322                     unsigned int filter_index = 0;
2323                     match_result result;
2324                     regbyte_ctx *null_ctx = NULL;
2325
2326                     barray_create (&ba);
2327                     if (ba == NULL)
2328                     {
2329                         free_regbyte_ctx_stack (ctx, *rbc);
2330                         return mr_internal_error;
2331                     }
2332
2333                     result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2334
2335                     if (result == mr_internal_error)
2336                     {
2337                         free_regbyte_ctx_stack (ctx, *rbc);
2338                         barray_destroy (&ba);
2339                         return mr_internal_error;
2340                     }
2341
2342                     if (result != mr_matched)
2343                     {
2344                         barray_destroy (&ba);
2345                         status = mr_not_matched;
2346                         break;
2347                     }
2348
2349                     barray_destroy (&ba);
2350
2351                     if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2352                     {
2353                         status = mr_not_matched;
2354                         break;
2355                     }
2356
2357                     status = mr_matched;
2358                     ind += len;
2359                 }
2360                 else
2361                 {
2362                     status = mr_matched;
2363                     for (i = 0; status == mr_matched && i < len; i++)
2364                         if (text[ind + i] != sp->m_string[i])
2365                             status = mr_not_matched;
2366
2367                     if (status == mr_matched)
2368                         ind += len;
2369                 }
2370                 break;
2371             case st_byte:
2372                 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2373                 if (status == mr_matched)
2374                     ind++;
2375                 break;
2376             case st_byte_range:
2377                 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2378                     mr_matched : mr_not_matched;
2379                 if (status == mr_matched)
2380                     ind++;
2381                 break;
2382             case st_true:
2383                 status = mr_matched;
2384                 break;
2385             case st_false:
2386                 status = mr_not_matched;
2387                 break;
2388             case st_debug:
2389                 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2390                 break;
2391             case st_identifier_loop:
2392                 barray_create (&array);
2393                 if (array == NULL)
2394                 {
2395                     free_regbyte_ctx_stack (ctx, *rbc);
2396                     return mr_internal_error;
2397                 }
2398
2399                 status = mr_dont_emit;
2400                 for (;;)
2401                 {
2402                     match_result result;
2403
2404                     save_ind = ind;
2405                     result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2406
2407                     if (result == mr_error_raised)
2408                     {
2409                         status = result;
2410                         break;
2411                     }
2412                     else if (result == mr_matched)
2413                     {
2414                         if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2415                             barray_append (ba, &array))
2416                         {
2417                             free_regbyte_ctx_stack (ctx, *rbc);
2418                             barray_destroy (&array);
2419                             return mr_internal_error;
2420                         }
2421                         barray_destroy (&array);
2422                         barray_create (&array);
2423                         if (array == NULL)
2424                         {
2425                             free_regbyte_ctx_stack (ctx, *rbc);
2426                             return mr_internal_error;
2427                         }
2428                     }
2429                     else if (result == mr_internal_error)
2430                     {
2431                         free_regbyte_ctx_stack (ctx, *rbc);
2432                         barray_destroy (&array);
2433                         return mr_internal_error;
2434                     }
2435                     else
2436                         break;
2437                 }
2438                 break;
2439             }
2440         }
2441         else
2442         {
2443             status = mr_not_matched;
2444         }
2445
2446         if (status == mr_error_raised)
2447         {
2448             free_regbyte_ctx_stack (ctx, *rbc);
2449             barray_destroy (&array);
2450
2451             return mr_error_raised;
2452         }
2453
2454         if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2455         {
2456             free_regbyte_ctx_stack (ctx, *rbc);
2457             barray_destroy (&array);
2458
2459             if (sp->m_errtext)
2460             {
2461                 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2462                     ind), ind);
2463
2464                 return mr_error_raised;
2465             }
2466
2467             return mr_not_matched;
2468         }
2469
2470         if (status == mr_matched)
2471         {
2472             if (sp->m_emits)
2473                 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2474                 {
2475                     free_regbyte_ctx_stack (ctx, *rbc);
2476                     barray_destroy (&array);
2477                     return mr_internal_error;
2478                 }
2479
2480             if (array)
2481                 if (barray_append (ba, &array))
2482                 {
2483                     free_regbyte_ctx_stack (ctx, *rbc);
2484                     barray_destroy (&array);
2485                     return mr_internal_error;
2486                 }
2487         }
2488
2489         barray_destroy (&array);
2490
2491         /* if the rule operator is a logical or, we pick up the first matching specifier */
2492         if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2493         {
2494             *index = ind;
2495             *rbc = ctx;
2496             return mr_matched;
2497         }
2498
2499         sp = sp->next;
2500     }
2501
2502     /* everything went fine - all specifiers match up */
2503     if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2504     {
2505         *index = ind;
2506         *rbc = ctx;
2507         return mr_matched;
2508     }
2509
2510     free_regbyte_ctx_stack (ctx, *rbc);
2511     return mr_not_matched;
2512 }
2513
2514 static match_result fast_match (dict *di, const byte *text, unsigned int *index, rule *ru, int *_PP, bytepool *_BP,
2515     int filtering_string, regbyte_ctx **rbc)
2516 {
2517     unsigned int ind = *index;
2518     int _P = filtering_string ? 0 : *_PP;
2519     int _P2;
2520     match_result status = mr_not_matched;
2521     spec *sp = ru->m_specs;
2522     regbyte_ctx *ctx = *rbc;
2523
2524     /* for every specifier in the rule */
2525     while (sp)
2526     {
2527         unsigned int i, len, save_ind = ind;
2528
2529         _P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0);
2530         if (bytepool_reserve (_BP, _P2))
2531         {
2532             free_regbyte_ctx_stack (ctx, *rbc);
2533             return mr_internal_error;
2534         }
2535
2536         if (satisfies_condition (sp->m_cond, ctx))
2537         {
2538             switch (sp->m_spec_type)
2539             {
2540             case st_identifier:
2541                 status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2542
2543                 if (status == mr_internal_error)
2544                 {
2545                     free_regbyte_ctx_stack (ctx, *rbc);
2546                     return mr_internal_error;
2547                 }
2548                 break;
2549             case st_string:
2550                 len = str_length (sp->m_string);
2551
2552                 /* prefilter the stream */
2553                 if (!filtering_string && di->m_string)
2554                 {
2555                     unsigned int filter_index = 0;
2556                     match_result result;
2557                     regbyte_ctx *null_ctx = NULL;
2558
2559                     result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx);
2560
2561                     if (result == mr_internal_error)
2562                     {
2563                         free_regbyte_ctx_stack (ctx, *rbc);
2564                         return mr_internal_error;
2565                     }
2566
2567                     if (result != mr_matched)
2568                     {
2569                         status = mr_not_matched;
2570                         break;
2571                     }
2572
2573                     if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2574                     {
2575                         status = mr_not_matched;
2576                         break;
2577                     }
2578
2579                     status = mr_matched;
2580                     ind += len;
2581                 }
2582                 else
2583                 {
2584                     status = mr_matched;
2585                     for (i = 0; status == mr_matched && i < len; i++)
2586                         if (text[ind + i] != sp->m_string[i])
2587                             status = mr_not_matched;
2588
2589                     if (status == mr_matched)
2590                         ind += len;
2591                 }
2592                 break;
2593             case st_byte:
2594                 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2595                 if (status == mr_matched)
2596                     ind++;
2597                 break;
2598             case st_byte_range:
2599                 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2600                     mr_matched : mr_not_matched;
2601                 if (status == mr_matched)
2602                     ind++;
2603                 break;
2604             case st_true:
2605                 status = mr_matched;
2606                 break;
2607             case st_false:
2608                 status = mr_not_matched;
2609                 break;
2610             case st_debug:
2611                 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2612                 break;
2613             case st_identifier_loop:
2614                 status = mr_dont_emit;
2615                 for (;;)
2616                 {
2617                     match_result result;
2618
2619                     save_ind = ind;
2620                     result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2621
2622                     if (result == mr_error_raised)
2623                     {
2624                         status = result;
2625                         break;
2626                     }
2627                     else if (result == mr_matched)
2628                     {
2629                         if (!filtering_string)
2630                         {
2631                             if (sp->m_emits != NULL)
2632                             {
2633                                 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2634                                 {
2635                                     free_regbyte_ctx_stack (ctx, *rbc);
2636                                     return mr_internal_error;
2637                                 }
2638                             }
2639
2640                             _P = _P2;
2641                             _P2 += sp->m_emits ? emit_size (sp->m_emits) : 0;
2642                             if (bytepool_reserve (_BP, _P2))
2643                             {
2644                                 free_regbyte_ctx_stack (ctx, *rbc);
2645                                 return mr_internal_error;
2646                             }
2647                         }
2648                     }
2649                     else if (result == mr_internal_error)
2650                     {
2651                         free_regbyte_ctx_stack (ctx, *rbc);
2652                         return mr_internal_error;
2653                     }
2654                     else
2655                         break;
2656                 }
2657                 break;
2658             }
2659         }
2660         else
2661         {
2662             status = mr_not_matched;
2663         }
2664
2665         if (status == mr_error_raised)
2666         {
2667             free_regbyte_ctx_stack (ctx, *rbc);
2668
2669             return mr_error_raised;
2670         }
2671
2672         if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2673         {
2674             free_regbyte_ctx_stack (ctx, *rbc);
2675
2676             if (sp->m_errtext)
2677             {
2678                 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2679                     ind), ind);
2680
2681                 return mr_error_raised;
2682             }
2683
2684             return mr_not_matched;
2685         }
2686
2687         if (status == mr_matched)
2688         {
2689             if (sp->m_emits != NULL)
2690                 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2691                 {
2692                     free_regbyte_ctx_stack (ctx, *rbc);
2693                     return mr_internal_error;
2694                 }
2695
2696             _P = _P2;
2697         }
2698
2699         /* if the rule operator is a logical or, we pick up the first matching specifier */
2700         if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2701         {
2702             *index = ind;
2703             *rbc = ctx;
2704             if (!filtering_string)
2705                 *_PP = _P;
2706             return mr_matched;
2707         }
2708
2709         sp = sp->next;
2710     }
2711
2712     /* everything went fine - all specifiers match up */
2713     if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2714     {
2715         *index = ind;
2716         *rbc = ctx;
2717         if (!filtering_string)
2718             *_PP = _P;
2719         return mr_matched;
2720     }
2721
2722     free_regbyte_ctx_stack (ctx, *rbc);
2723     return mr_not_matched;
2724 }
2725
2726 static byte *error_get_token (error *er, dict *di, const byte *text, unsigned int ind)
2727 {
2728     byte *str = NULL;
2729
2730     if (er->m_token)
2731     {
2732         barray *ba;
2733         unsigned int filter_index = 0;
2734         regbyte_ctx *ctx = NULL;
2735
2736         barray_create (&ba);
2737         if (ba != NULL)
2738         {
2739             if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2740                 filter_index)
2741             {
2742                 str = (byte *) mem_alloc (filter_index + 1);
2743                 if (str != NULL)
2744                 {
2745                     str_copy_n (str, text + ind, filter_index);
2746                     str[filter_index] = '\0';
2747                 }
2748             }
2749             barray_destroy (&ba);
2750         }
2751     }
2752
2753     return str;
2754 }
2755
2756 typedef struct grammar_load_state_
2757 {
2758     dict *di;
2759     byte *syntax_symbol;
2760     byte *string_symbol;
2761     map_str *maps;
2762     map_byte *mapb;
2763     map_rule *mapr;
2764 } grammar_load_state;
2765
2766 static void grammar_load_state_create (grammar_load_state **gr)
2767 {
2768     *gr = (grammar_load_state *) mem_alloc (sizeof (grammar_load_state));
2769     if (*gr)
2770     {
2771         (**gr).di = NULL;
2772         (**gr).syntax_symbol = NULL;
2773         (**gr).string_symbol = NULL;
2774         (**gr).maps = NULL;
2775         (**gr).mapb = NULL;
2776         (**gr).mapr = NULL;
2777     }
2778 }
2779
2780 static void grammar_load_state_destroy (grammar_load_state **gr)
2781 {
2782     if (*gr)
2783     {
2784         dict_destroy (&(**gr).di);
2785         mem_free ((void **) &(**gr).syntax_symbol);
2786         mem_free ((void **) &(**gr).string_symbol);
2787         map_str_destroy (&(**gr).maps);
2788         map_byte_destroy (&(**gr).mapb);
2789         map_rule_destroy (&(**gr).mapr);
2790         mem_free ((void **) gr);
2791     }
2792 }
2793
2794 /*
2795     the API
2796 */
2797
2798 grammar grammar_load_from_text (const byte *text)
2799 {
2800     grammar_load_state *g = NULL;
2801     grammar id = 0;
2802
2803     clear_last_error ();
2804
2805     grammar_load_state_create (&g);
2806     if (g == NULL)
2807         return 0;
2808
2809     dict_create (&g->di);
2810     if (g->di == NULL)
2811     {
2812         grammar_load_state_destroy (&g);
2813         return 0;
2814     }
2815
2816     eat_spaces (&text);
2817
2818     /* skip ".syntax" keyword */
2819     text += 7;
2820     eat_spaces (&text);
2821
2822     /* retrieve root symbol */
2823     if (get_identifier (&text, &g->syntax_symbol))
2824     {
2825         grammar_load_state_destroy (&g);
2826         return 0;
2827     }
2828     eat_spaces (&text);
2829
2830     /* skip semicolon */
2831     text++;
2832     eat_spaces (&text);
2833
2834     while (*text)
2835     {
2836         byte *symbol = NULL;
2837         int is_dot = *text == '.';
2838
2839         if (is_dot)
2840             text++;
2841
2842         if (get_identifier (&text, &symbol))
2843         {
2844             grammar_load_state_destroy (&g);
2845             return 0;
2846         }
2847         eat_spaces (&text);
2848
2849         /* .emtcode */
2850         if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2851         {
2852             map_byte *ma = NULL;
2853
2854             mem_free ((void **) (void *) &symbol);
2855
2856             if (get_emtcode (&text, &ma))
2857             {
2858                 grammar_load_state_destroy (&g);
2859                 return 0;
2860             }
2861
2862             map_byte_append (&g->mapb, ma);
2863         }
2864         /* .regbyte */
2865         else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2866         {
2867             map_byte *ma = NULL;
2868
2869             mem_free ((void **) (void *) &symbol);
2870
2871             if (get_regbyte (&text, &ma))
2872             {
2873                 grammar_load_state_destroy (&g);
2874                 return 0;
2875             }
2876
2877             map_byte_append (&g->di->m_regbytes, ma);
2878         }
2879         /* .errtext */
2880         else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2881         {
2882             map_str *ma = NULL;
2883
2884             mem_free ((void **) (void *) &symbol);
2885
2886             if (get_errtext (&text, &ma))
2887             {
2888                 grammar_load_state_destroy (&g);
2889                 return 0;
2890             }
2891
2892             map_str_append (&g->maps, ma);
2893         }
2894         /* .string */
2895         else if (is_dot && str_equal (symbol, (byte *) "string"))
2896         {
2897             mem_free ((void **) (void *) &symbol);
2898
2899             if (g->di->m_string != NULL)
2900             {
2901                 grammar_load_state_destroy (&g);
2902                 return 0;
2903             }
2904
2905             if (get_identifier (&text, &g->string_symbol))
2906             {
2907                 grammar_load_state_destroy (&g);
2908                 return 0;
2909             }
2910
2911             /* skip semicolon */
2912             eat_spaces (&text);
2913             text++;
2914             eat_spaces (&text);
2915         }
2916         else
2917         {
2918             rule *ru = NULL;
2919             map_rule *ma = NULL;
2920
2921             if (get_rule (&text, &ru, g->maps, g->mapb))
2922             {
2923                 grammar_load_state_destroy (&g);
2924                 return 0;
2925             }
2926
2927             rule_append (&g->di->m_rulez, ru);
2928
2929             /* if a rule consist of only one specifier, give it an ".and" operator */
2930             if (ru->m_oper == op_none)
2931                 ru->m_oper = op_and;
2932
2933             map_rule_create (&ma);
2934             if (ma == NULL)
2935             {
2936                 grammar_load_state_destroy (&g);
2937                 return 0;
2938             }
2939
2940             ma->key = symbol;
2941             ma->data = ru;
2942             map_rule_append (&g->mapr, ma);
2943         }
2944     }
2945
2946     if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2947         g->di->m_regbytes))
2948     {
2949         grammar_load_state_destroy (&g);
2950         return 0;
2951     }
2952
2953     dict_append (&g_dicts, g->di);
2954     id = g->di->m_id;
2955     g->di = NULL;
2956
2957     grammar_load_state_destroy (&g);
2958
2959     return id;
2960 }
2961
2962 int grammar_set_reg8 (grammar id, const byte *name, byte value)
2963 {
2964     dict *di = NULL;
2965     map_byte *reg = NULL;
2966
2967     clear_last_error ();
2968
2969     dict_find (&g_dicts, id, &di);
2970     if (di == NULL)
2971     {
2972         set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2973         return 0;
2974     }
2975
2976     reg = map_byte_locate (&di->m_regbytes, name);
2977     if (reg == NULL)
2978     {
2979         set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
2980         return 0;
2981     }
2982
2983     reg->data = value;
2984     return 1;
2985 }
2986
2987 /*
2988     internal checking function used by both grammar_check and grammar_fast_check functions
2989 */
2990 static int _grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size,
2991     unsigned int estimate_prod_size, int use_fast_path)
2992 {
2993     dict *di = NULL;
2994     unsigned int index = 0;
2995
2996     clear_last_error ();
2997
2998     dict_find (&g_dicts, id, &di);
2999     if (di == NULL)
3000     {
3001         set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3002         return 0;
3003     }
3004
3005     *prod = NULL;
3006     *size = 0;
3007
3008     if (use_fast_path)
3009     {
3010         regbyte_ctx *rbc = NULL;
3011         bytepool *bp = NULL;
3012         int _P = 0;
3013
3014         bytepool_create (&bp, estimate_prod_size);
3015         if (bp == NULL)
3016             return 0;
3017
3018         if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched)
3019         {
3020             bytepool_destroy (&bp);
3021             free_regbyte_ctx_stack (rbc, NULL);
3022             return 0;
3023         }
3024
3025         free_regbyte_ctx_stack (rbc, NULL);
3026
3027         *prod = bp->_F;
3028         *size = _P;
3029         bp->_F = NULL;
3030         bytepool_destroy (&bp);
3031     }
3032     else
3033     {
3034         regbyte_ctx *rbc = NULL;
3035         barray *ba = NULL;
3036
3037         barray_create (&ba);
3038         if (ba == NULL)
3039             return 0;
3040
3041         if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
3042         {
3043             barray_destroy (&ba);
3044             free_regbyte_ctx_stack (rbc, NULL);
3045             return 0;
3046         }
3047
3048         free_regbyte_ctx_stack (rbc, NULL);
3049
3050         *prod = (byte *) mem_alloc (ba->len * sizeof (byte));
3051         if (*prod == NULL)
3052         {
3053             barray_destroy (&ba);
3054             return 0;
3055         }
3056
3057         mem_copy (*prod, ba->data, ba->len * sizeof (byte));
3058         *size = ba->len;
3059         barray_destroy (&ba);
3060     }
3061
3062     return 1;
3063 }
3064
3065 int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
3066 {
3067     return _grammar_check (id, text, prod, size, 0, 0);
3068 }
3069
3070 int grammar_fast_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3071     unsigned int estimate_prod_size)
3072 {
3073     return _grammar_check (id, text, prod, size, estimate_prod_size, 1);
3074 }
3075
3076 int grammar_destroy (grammar id)
3077 {
3078     dict **di = &g_dicts;
3079
3080     clear_last_error ();
3081
3082     while (*di != NULL)
3083     {
3084         if ((**di).m_id == id)
3085         {
3086             dict *tmp = *di;
3087             *di = (**di).next;
3088             dict_destroy (&tmp);
3089             return 1;
3090         }
3091
3092         di = &(**di).next;
3093     }
3094
3095     set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3096     return 0;
3097 }
3098
3099 static void append_character (const char x, byte *text, int *dots_made, int *len, int size)
3100 {
3101     if (*dots_made == 0)
3102     {
3103         if (*len < size - 1)
3104         {
3105             text[(*len)++] = x;
3106             text[*len] = '\0';
3107         }
3108         else
3109         {
3110             int i;
3111             for (i = 0; i < 3; i++)
3112                 if (--(*len) >= 0)
3113                     text[*len] = '.';
3114             *dots_made = 1;
3115         }
3116     }
3117 }
3118
3119 void grammar_get_last_error (byte *text, unsigned int size, int *pos)
3120 {
3121     int len = 0, dots_made = 0;
3122     const byte *p = error_message;
3123
3124     *text = '\0';
3125
3126     if (p)
3127     {
3128         while (*p)
3129         {
3130             if (*p == '$')
3131             {
3132                 const byte *r = error_param;
3133
3134                 while (*r)
3135                 {
3136                     append_character (*r++, text, &dots_made, &len, (int) size);
3137                 }
3138
3139                 p++;
3140             }
3141             else
3142             {
3143                 append_character (*p++, text, &dots_made, &len, size);
3144             }
3145         }
3146     }
3147
3148     *pos = error_position;
3149 }