src/mesa/shader/grammar/grammar.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.6
   4  *
   5  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file grammar.c
  27  * syntax parsing engine
  28  * \author Michal Krol
  29  */
  30
  31 #ifndef GRAMMAR_PORT_BUILD
  32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
  33 #endif
  34
  35 /*
  36 */
  37
  38 /*
  39     INTRODUCTION
  40     ------------
  41
  42     The task is to check the syntax of an input string. Input string is a stream of ASCII
  43     characters terminated with a null-character ('\0'). Checking it using C language is
  44     difficult and hard to implement without bugs. It is hard to maintain and make changes when
  45     the syntax changes.
  46
  47     This is because of a high redundancy of the C code. Large blocks of code are duplicated with
  48     only small changes. Even use of macros does not solve the problem because macros cannot
  49     erase the complexity of the problem.
  50
  51     The resolution is to create a new language that will be highly oriented to our task. Once
  52     we describe a particular syntax, we are done. We can then focus on the code that implements
  53     the language. The size and complexity of it is relatively small than the code that directly
  54     checks the syntax.
  55
  56     First, we must implement our new language. Here, the language is implemented in C, but it
  57     could also be implemented in any other language. The code is listed below. We must take
  58     a good care that it is bug free. This is simple because the code is simple and clean.
  59
  60     Next, we must describe the syntax of our new language in itself. Once created and checked
  61     manually that it is correct, we can use it to check another scripts.
  62
  63     Note that our new language loading code does not have to check the syntax. It is because we
  64     assume that the script describing itself is correct, and other scripts can be syntactically
  65     checked by the former script. The loading code must only do semantic checking which leads us to
  66     simple resolving references.
  67
  68     THE LANGUAGE
  69     ------------
  70
  71     Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
  72     sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
  73     which is an identifier, and its definition. A definition is in turn a sequence of specifiers
  74     connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
  75     definition. Specifier can be a symbol, string, character, character range or a special
  76     keyword ".true" or ".false".
  77
  78     On the very beginning of the script there is a declaration of a root symbol and is in the form:
  79         .syntax <root_symbol>;
  80     The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
  81     the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
  82     the symbol evaluates to true. Definition evaluation depends on the operator used to connect
  83     specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
  84     only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
  85     true if any of the specifiers evaluates to true. If definition contains only one specifier,
  86     it is evaluated as if it was connected with ".true" keyword by ".and" operator.
  87
  88     If specifier is a ".true" keyword, it always evaluates to true.
  89
  90     If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
  91     when it does not evaluate to true.
  92
  93     Character range specifier is in the form:
  94         '<first_character>' - '<second_character>'
  95     If specifier is a character range, it evaluates to true if character in the stream is greater
  96     or equal to <first_character> and less or equal to <second_character>. In that situation
  97     the stream pointer is advanced to point to next character in the stream. All C-style escape
  98     sequences are supported although trigraph sequences are not. The comparisions are performed
  99     on 8-bit unsigned integers.
 100
 101     Character specifier is in the form:
 102         '<single_character>'
 103     It evaluates to true if the following character range specifier evaluates to true:
 104         '<single_character>' - '<single_character>'
 105
 106     String specifier is in the form:
 107         "<string>"
 108     Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
 109     <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
 110     the following character specifier evaluates to true:
 111         '<string>[i]'
 112     If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
 113
 114     Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
 115         .loop <symbol>                  (1)
 116     where <symbol> is defined as follows:
 117         <symbol> <definition>;          (2)
 118     Construction (1) is replaced by the following code:
 119         <symbol$1>
 120     and declaration (2) is replaced by the following:
 121         <symbol$1> <symbol$2> .or .true;
 122         <symbol$2> <symbol> .and <symbol$1>;
 123         <symbol> <definition>;
 124
 125     Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
 126     registers that can be accessed in the syn body. Each reg has its name and a default value.
 127     The register is one byte wide. The C code can change the default value by calling
 128     grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
 129     a sequence of specifiers joined with .and or .or operator. And now each specifier can be
 130     prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
 131     where <operator> can be == or !=. If the condition evaluates to false, the specifier
 132     evaluates to .false. Otherwise it evalutes to the specifier.
 133
 134     ESCAPE SEQUENCES
 135     ----------------
 136
 137     Synek supports all escape sequences in character specifiers. The mapping table is listed below.
 138     All occurences of the characters in the first column are replaced with the corresponding
 139     character in the second column.
 140
 141         Escape sequence         Represents
 142     ------------------------------------------------------------------------------------------------
 143         \a                      Bell (alert)
 144         \b                      Backspace
 145         \f                      Formfeed
 146         \n                      New line
 147         \r                      Carriage return
 148         \t                      Horizontal tab
 149         \v                      Vertical tab
 150         \'                      Single quotation mark
 151         \"                      Double quotation mark
 152         \\                      Backslash
 153         \?                      Literal question mark
 154         \ooo                    ASCII character in octal notation
 155         \xhhh                   ASCII character in hexadecimal notation
 156     ------------------------------------------------------------------------------------------------
 157
 158     RAISING ERRORS
 159     --------------
 160
 161     Any specifier can be followed by a special construction that is executed when the specifier
 162     evaluates to false. The construction is in the form:
 163         .error <ERROR_TEXT>
 164     <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
 165     in the form:
 166         .errtext <ERROR_TEXT> "<error_desc>"
 167     When specifier evaluates to false and this construction is present, parsing is stopped
 168     immediately and <error_desc> is returned as a result of parsing. The error position is also
 169     returned and it is meant as an offset from the beggining of the stream to the character that
 170     was valid so far. Example:
 171
 172         (**** syntax script ****)
 173
 174         .syntax program;
 175         .errtext MISSING_SEMICOLON      "missing ';'"
 176         program         declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
 177                         .loop space .and '\0';
 178         declaration     "declare" .and .loop space .and identifier;
 179         space           ' ';
 180
 181         (**** sample code ****)
 182
 183         declare foo ,
 184
 185     In the example above checking the sample code will result in error message "missing ';'" and
 186     error position 12. The sample code is not correct. Note the presence of '\0' specifier to
 187     assure that there is no code after semicolon - only spaces.
 188     <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
 189     the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
 190     the identifier name. The starting position is the error position. The lenght of the resulting
 191     string is the position after invoking the symbol.
 192
 193     PRODUCTION
 194     ----------
 195
 196     Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
 197     that evaluate to true. That is, every specifier and optional error construction can be followed
 198     by a number of emit constructions that are in the form:
 199         .emit <parameter>
 200     <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
 201     0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
 202     in the form:
 203         .emtcode <identifier> <hex_number>
 204
 205     When given specifier evaluates to true, all emits associated with the specifier are output
 206     in order they were declared. A star means that last-read character should be output instead
 207     of constant value. Example:
 208
 209         (**** syntax script ****)
 210
 211         .syntax foobar;
 212         .emtcode WORD_FOO       0x01
 213         .emtcode WORD_BAR       0x02
 214         foobar      FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
 215         FOO         "foo" .and SPACE;
 216         BAR         "bar" .and SPACE;
 217         SPACE       ' ' .or '\0';
 218
 219         (**** sample text 1 ****)
 220
 221         foo
 222
 223         (**** sample text 2 ****)
 224
 225         foobar
 226
 227     For both samples the result will be one-element array. For first sample text it will be
 228     value 1, for second - 0. Note that every text will be accepted because of presence of
 229     .true as an alternative.
 230
 231     Another example:
 232
 233         (**** syntax script ****)
 234
 235         .syntax declaration;
 236         .emtcode VARIABLE       0x01
 237         declaration     "declare" .and .loop space .and
 238                         identifier .emit VARIABLE .and          (1)
 239                         .true .emit 0x00 .and                   (2)
 240                         .loop space .and ';';
 241         space           ' ' .or '\t';
 242         identifier      .loop id_char .emit *;                  (3)
 243         id_char         'a'-'z' .or 'A'-'Z' .or '_';
 244
 245         (**** sample code ****)
 246
 247         declare    fubar;
 248
 249     In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
 250     true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
 251     to terminate the string with null to signal when the string ends. Specifier (3) outputs
 252     all characters that make declared identifier. The result of sample code will be the
 253     following array:
 254         { 1, 'f', 'u', 'b', 'a', 'r', 0 }
 255
 256     If .emit is followed by dollar $, it means that current position should be output. Current
 257     position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
 258     first character consumed by the specifier associated with the .emit instruction. Current
 259     position is stored in the output buffer in Little-Endian convention (the lowest byte comes
 260     first).
 261 */
 262
 263 static void mem_free (void **);
 264
 265 /*
 266     internal error messages
 267 */
 268 static const byte *OUT_OF_MEMORY =          (byte *) "internal error 1001: out of physical memory";
 269 static const byte *UNRESOLVED_REFERENCE =   (byte *) "internal error 1002: unresolved reference '$'";
 270 static const byte *INVALID_GRAMMAR_ID =     (byte *) "internal error 1003: invalid grammar object";
 271 static const byte *INVALID_REGISTER_NAME =  (byte *) "internal error 1004: invalid register name: '$'";
 272 /*static const byte *DUPLICATE_IDENTIFIER =   (byte *) "internal error 1005: identifier '$' already defined";*/
 273 static const byte *UNREFERENCED_IDENTIFIER =(byte *) "internal error 1006: unreferenced identifier '$'";
 274
 275 static const byte *error_message = NULL;    /* points to one of the error messages above */
 276 static byte *error_param = NULL;        /* this is inserted into error_message in place of $ */
 277 static int error_position = -1;
 278
 279 static byte *unknown = (byte *) "???";
 280
 281 static void clear_last_error (void)
 282 {
 283     /* reset error message */
 284     error_message = NULL;
 285
 286     /* free error parameter - if error_param is a "???" don't free it - it's static */
 287     if (error_param != unknown)
 288         mem_free ((void **) (void *) &error_param);
 289     else
 290         error_param = NULL;
 291
 292     /* reset error position */
 293     error_position = -1;
 294 }
 295
 296 static void set_last_error (const byte *msg, byte *param, int pos)
 297 {
 298     /* error message can be set only once */
 299     if (error_message != NULL)
 300     {
 301         mem_free ((void **) (void *) &param);
 302         return;
 303     }
 304
 305     error_message = msg;
 306
 307     /* if param is NULL, set error_param to unknown ("???") */
 308     /* note: do not try to strdup the "???" - it may be that we are here because of */
 309     /* out of memory error so strdup can fail */
 310     if (param != NULL)
 311         error_param = param;
 312     else
 313         error_param = unknown;
 314
 315     error_position = pos;
 316 }
 317
 318 /*
 319     memory management routines
 320 */
 321 static void *mem_alloc (size_t size)
 322 {
 323     void *ptr = grammar_alloc_malloc (size);
 324     if (ptr == NULL)
 325         set_last_error (OUT_OF_MEMORY, NULL, -1);
 326     return ptr;
 327 }
 328
 329 static void *mem_copy (void *dst, const void *src, size_t size)
 330 {
 331     return grammar_memory_copy (dst, src, size);
 332 }
 333
 334 static void mem_free (void **ptr)
 335 {
 336     grammar_alloc_free (*ptr);
 337     *ptr = NULL;
 338 }
 339
 340 static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
 341 {
 342     void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
 343     if (ptr2 == NULL)
 344         set_last_error (OUT_OF_MEMORY, NULL, -1);
 345     return ptr2;
 346 }
 347
 348 static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
 349 {
 350     return grammar_string_copy_n (dst, src, max_len);
 351 }
 352
 353 static byte *str_duplicate (const byte *str)
 354 {
 355     byte *new_str = grammar_string_duplicate (str);
 356     if (new_str == NULL)
 357         set_last_error (OUT_OF_MEMORY, NULL, -1);
 358     return new_str;
 359 }
 360
 361 static int str_equal (const byte *str1, const byte *str2)
 362 {
 363     return grammar_string_compare (str1, str2) == 0;
 364 }
 365
 366 static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
 367 {
 368     return grammar_string_compare_n (str1, str2, n) == 0;
 369 }
 370
 371 static int
 372 str_length (const byte *str)
 373 {
 374    return (int) (grammar_string_length (str));
 375 }
 376
 377 /*
 378     useful macros
 379 */
 380 #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
 381     static void _Ty##_append (_Ty **x, _Ty *nx) {\
 382         while (*x) x = &(**x).next;\
 383         *x = nx;\
 384     }
 385
 386 /*
 387     string to byte map typedef
 388 */
 389 typedef struct map_byte_
 390 {
 391     byte *key;
 392     byte data;
 393     struct map_byte_ *next;
 394 } map_byte;
 395
 396 static void map_byte_create (map_byte **ma)
 397 {
 398     *ma = (map_byte *) mem_alloc (sizeof (map_byte));
 399     if (*ma)
 400     {
 401         (**ma).key = NULL;
 402         (**ma).data = '\0';
 403         (**ma).next = NULL;
 404     }
 405 }
 406
 407 static void map_byte_destroy (map_byte **ma)
 408 {
 409     if (*ma)
 410     {
 411         map_byte_destroy (&(**ma).next);
 412         mem_free ((void **) &(**ma).key);
 413         mem_free ((void **) ma);
 414     }
 415 }
 416
 417 GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte)
 418
 419 /*
 420     searches the map for the specified key,
 421     returns pointer to the element with the specified key if it exists
 422     returns NULL otherwise
 423 */
 424 static map_byte *map_byte_locate (map_byte **ma, const byte *key)
 425 {
 426     while (*ma)
 427     {
 428         if (str_equal ((**ma).key, key))
 429             return *ma;
 430
 431         ma = &(**ma).next;
 432     }
 433
 434     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
 435     return NULL;
 436 }
 437
 438 /*
 439     searches the map for specified key,
 440     if the key is matched, *data is filled with data associated with the key,
 441     returns 0 if the key is matched,
 442     returns 1 otherwise
 443 */
 444 static int map_byte_find (map_byte **ma, const byte *key, byte *data)
 445 {
 446     map_byte *found = map_byte_locate (ma, key);
 447     if (found != NULL)
 448     {
 449         *data = found->data;
 450
 451         return 0;
 452     }
 453
 454     return 1;
 455 }
 456
 457 /*
 458     regbyte context typedef
 459
 460     Each regbyte consists of its name and a default value. These are static and created at
 461     grammar script compile-time, for example the following line:
 462         .regbyte vertex_blend      0x00
 463     adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
 464     When the script is executed, this regbyte can be accessed by name for read and write. When a
 465     particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
 466     stack. The new entry contains information abot which regbyte it references and its new value.
 467     When a given regbyte is accessed for read, the stack is searched top-down to find an
 468     entry that references the regbyte. The first matching entry is used to return the current
 469     value it holds. If no entry is found, the default value is returned.
 470 */
 471 typedef struct regbyte_ctx_
 472 {
 473     map_byte *m_regbyte;
 474     byte m_current_value;
 475     struct regbyte_ctx_ *m_prev;
 476 } regbyte_ctx;
 477
 478 static void regbyte_ctx_create (regbyte_ctx **re)
 479 {
 480     *re = (regbyte_ctx *) mem_alloc (sizeof (regbyte_ctx));
 481     if (*re)
 482     {
 483         (**re).m_regbyte = NULL;
 484         (**re).m_prev = NULL;
 485     }
 486 }
 487
 488 static void regbyte_ctx_destroy (regbyte_ctx **re)
 489 {
 490     if (*re)
 491     {
 492         mem_free ((void **) re);
 493     }
 494 }
 495
 496 static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
 497 {
 498     /* first lookup in the register stack */
 499     while (*re != NULL)
 500     {
 501         if ((**re).m_regbyte == reg)
 502             return (**re).m_current_value;
 503
 504         re = &(**re).m_prev;
 505     }
 506
 507     /* if not found - return the default value */
 508     return reg->data;
 509 }
 510
 511 /*
 512     emit type typedef
 513 */
 514 typedef enum emit_type_
 515 {
 516     et_byte,            /* explicit number */
 517     et_stream,          /* eaten character */
 518     et_position         /* current position */
 519 } emit_type;
 520
 521 /*
 522     emit destination typedef
 523 */
 524 typedef enum emit_dest_
 525 {
 526     ed_output,          /* write to the output buffer */
 527     ed_regbyte          /* write a particular regbyte */
 528 } emit_dest;
 529
 530 /*
 531     emit typedef
 532 */
 533 typedef struct emit_
 534 {
 535     emit_dest m_emit_dest;
 536     emit_type m_emit_type;      /* ed_output */
 537     byte m_byte;                /* et_byte */
 538     map_byte *m_regbyte;        /* ed_regbyte */
 539     byte *m_regname;            /* ed_regbyte - temporary */
 540     struct emit_ *m_next;
 541 } emit;
 542
 543 static void emit_create (emit **em)
 544 {
 545     *em = (emit *) mem_alloc (sizeof (emit));
 546     if (*em)
 547     {
 548         (**em).m_emit_dest = ed_output;
 549         (**em).m_emit_type = et_byte;
 550         (**em).m_byte = '\0';
 551         (**em).m_regbyte = NULL;
 552         (**em).m_regname = NULL;
 553         (**em).m_next = NULL;
 554     }
 555 }
 556
 557 static void emit_destroy (emit **em)
 558 {
 559     if (*em)
 560     {
 561         emit_destroy (&(**em).m_next);
 562         mem_free ((void **) &(**em).m_regname);
 563         mem_free ((void **) em);
 564     }
 565 }
 566
 567 static unsigned int emit_size (emit *_E)
 568 {
 569     unsigned int n = 0;
 570
 571     while (_E != NULL)
 572     {
 573         if (_E->m_emit_dest == ed_output)
 574         {
 575             if (_E->m_emit_type == et_position)
 576                 n += 4;     /* position is a 32-bit unsigned integer */
 577             else
 578                 n++;
 579         }
 580         _E = _E->m_next;
 581     }
 582
 583     return n;
 584 }
 585
 586 static int emit_push (emit *_E, byte *_P, byte c, unsigned int _Pos, regbyte_ctx **_Ctx)
 587 {
 588     while (_E != NULL)
 589     {
 590         if (_E->m_emit_dest == ed_output)
 591         {
 592             if (_E->m_emit_type == et_byte)
 593                 *_P++ = _E->m_byte;
 594             else if (_E->m_emit_type == et_stream)
 595                 *_P++ = c;
 596             else /* _Em->type == et_position */
 597             {
 598                 *_P++ = (byte) (_Pos);
 599                 *_P++ = (byte) (_Pos >> 8);
 600                 *_P++ = (byte) (_Pos >> 16);
 601                 *_P++ = (byte) (_Pos >> 24);
 602             }
 603         }
 604         else
 605         {
 606             regbyte_ctx *new_rbc;
 607             regbyte_ctx_create (&new_rbc);
 608             if (new_rbc == NULL)
 609                 return 1;
 610
 611             new_rbc->m_prev = *_Ctx;
 612             new_rbc->m_regbyte = _E->m_regbyte;
 613             *_Ctx = new_rbc;
 614
 615             if (_E->m_emit_type == et_byte)
 616                 new_rbc->m_current_value = _E->m_byte;
 617             else if (_E->m_emit_type == et_stream)
 618                 new_rbc->m_current_value = c;
 619         }
 620
 621         _E = _E->m_next;
 622     }
 623
 624     return 0;
 625 }
 626
 627 /*
 628     error typedef
 629 */
 630 typedef struct error_
 631 {
 632     byte *m_text;
 633     byte *m_token_name;
 634     struct rule_ *m_token;
 635 } error;
 636
 637 static void error_create (error **er)
 638 {
 639     *er = (error *) mem_alloc (sizeof (error));
 640     if (*er)
 641     {
 642         (**er).m_text = NULL;
 643         (**er).m_token_name = NULL;
 644         (**er).m_token = NULL;
 645     }
 646 }
 647
 648 static void error_destroy (error **er)
 649 {
 650     if (*er)
 651     {
 652         mem_free ((void **) &(**er).m_text);
 653         mem_free ((void **) &(**er).m_token_name);
 654         mem_free ((void **) er);
 655     }
 656 }
 657
 658 struct dict_;
 659
 660 static byte *
 661 error_get_token (error *, struct dict_ *, const byte *, int);
 662
 663 /*
 664     condition operand type typedef
 665 */
 666 typedef enum cond_oper_type_
 667 {
 668     cot_byte,               /* constant 8-bit unsigned integer */
 669     cot_regbyte             /* pointer to byte register containing the current value */
 670 } cond_oper_type;
 671
 672 /*
 673     condition operand typedef
 674 */
 675 typedef struct cond_oper_
 676 {
 677     cond_oper_type m_type;
 678     byte m_byte;            /* cot_byte */
 679     map_byte *m_regbyte;    /* cot_regbyte */
 680     byte *m_regname;        /* cot_regbyte - temporary */
 681 } cond_oper;
 682
 683 /*
 684     condition type typedef
 685 */
 686 typedef enum cond_type_
 687 {
 688     ct_equal,
 689     ct_not_equal
 690 } cond_type;
 691
 692 /*
 693     condition typedef
 694 */
 695 typedef struct cond_
 696 {
 697     cond_type m_type;
 698     cond_oper m_operands[2];
 699 } cond;
 700
 701 static void cond_create (cond **co)
 702 {
 703     *co = (cond *) mem_alloc (sizeof (cond));
 704     if (*co)
 705     {
 706         (**co).m_operands[0].m_regname = NULL;
 707         (**co).m_operands[1].m_regname = NULL;
 708     }
 709 }
 710
 711 static void cond_destroy (cond **co)
 712 {
 713     if (*co)
 714     {
 715         mem_free ((void **) &(**co).m_operands[0].m_regname);
 716         mem_free ((void **) &(**co).m_operands[1].m_regname);
 717         mem_free ((void **) co);
 718     }
 719 }
 720
 721 /*
 722     specifier type typedef
 723 */
 724 typedef enum spec_type_
 725 {
 726     st_false,
 727     st_true,
 728     st_byte,
 729     st_byte_range,
 730     st_string,
 731     st_identifier,
 732     st_identifier_loop,
 733     st_debug
 734 } spec_type;
 735
 736 /*
 737     specifier typedef
 738 */
 739 typedef struct spec_
 740 {
 741     spec_type m_spec_type;
 742     byte m_byte[2];                 /* st_byte, st_byte_range */
 743     byte *m_string;                 /* st_string */
 744     struct rule_ *m_rule;           /* st_identifier, st_identifier_loop */
 745     emit *m_emits;
 746     error *m_errtext;
 747     cond *m_cond;
 748     struct spec_ *next;
 749 } spec;
 750
 751 static void spec_create (spec **sp)
 752 {
 753     *sp = (spec *) mem_alloc (sizeof (spec));
 754     if (*sp)
 755     {
 756         (**sp).m_spec_type = st_false;
 757         (**sp).m_byte[0] = '\0';
 758         (**sp).m_byte[1] = '\0';
 759         (**sp).m_string = NULL;
 760         (**sp).m_rule = NULL;
 761         (**sp).m_emits = NULL;
 762         (**sp).m_errtext = NULL;
 763         (**sp).m_cond = NULL;
 764         (**sp).next = NULL;
 765     }
 766 }
 767
 768 static void spec_destroy (spec **sp)
 769 {
 770     if (*sp)
 771     {
 772         spec_destroy (&(**sp).next);
 773         emit_destroy (&(**sp).m_emits);
 774         error_destroy (&(**sp).m_errtext);
 775         mem_free ((void **) &(**sp).m_string);
 776         cond_destroy (&(**sp).m_cond);
 777         mem_free ((void **) sp);
 778     }
 779 }
 780
 781 GRAMMAR_IMPLEMENT_LIST_APPEND(spec)
 782
 783 /*
 784     operator typedef
 785 */
 786 typedef enum oper_
 787 {
 788     op_none,
 789     op_and,
 790     op_or
 791 } oper;
 792
 793 /*
 794     rule typedef
 795 */
 796 typedef struct rule_
 797 {
 798     oper m_oper;
 799     spec *m_specs;
 800     struct rule_ *next;
 801     int m_referenced;
 802 } rule;
 803
 804 static void rule_create (rule **ru)
 805 {
 806     *ru = (rule *) mem_alloc (sizeof (rule));
 807     if (*ru)
 808     {
 809         (**ru).m_oper = op_none;
 810         (**ru).m_specs = NULL;
 811         (**ru).next = NULL;
 812         (**ru).m_referenced = 0;
 813     }
 814 }
 815
 816 static void rule_destroy (rule **ru)
 817 {
 818     if (*ru)
 819     {
 820         rule_destroy (&(**ru).next);
 821         spec_destroy (&(**ru).m_specs);
 822         mem_free ((void **) ru);
 823     }
 824 }
 825
 826 GRAMMAR_IMPLEMENT_LIST_APPEND(rule)
 827
 828 /*
 829     returns unique grammar id
 830 */
 831 static grammar next_valid_grammar_id (void)
 832 {
 833     static grammar id = 0;
 834
 835     return ++id;
 836 }
 837
 838 /*
 839     dictionary typedef
 840 */
 841 typedef struct dict_
 842 {
 843     rule *m_rulez;
 844     rule *m_syntax;
 845     rule *m_string;
 846     map_byte *m_regbytes;
 847     grammar m_id;
 848     struct dict_ *next;
 849 } dict;
 850
 851 static void dict_create (dict **di)
 852 {
 853     *di = (dict *) mem_alloc (sizeof (dict));
 854     if (*di)
 855     {
 856         (**di).m_rulez = NULL;
 857         (**di).m_syntax = NULL;
 858         (**di).m_string = NULL;
 859         (**di).m_regbytes = NULL;
 860         (**di).m_id = next_valid_grammar_id ();
 861         (**di).next = NULL;
 862     }
 863 }
 864
 865 static void dict_destroy (dict **di)
 866 {
 867     if (*di)
 868     {
 869         rule_destroy (&(**di).m_rulez);
 870         map_byte_destroy (&(**di).m_regbytes);
 871         mem_free ((void **) di);
 872     }
 873 }
 874
 875 GRAMMAR_IMPLEMENT_LIST_APPEND(dict)
 876
 877 static void dict_find (dict **di, grammar key, dict **data)
 878 {
 879     while (*di)
 880     {
 881         if ((**di).m_id == key)
 882         {
 883             *data = *di;
 884             return;
 885         }
 886
 887         di = &(**di).next;
 888     }
 889
 890     *data = NULL;
 891 }
 892
 893 static dict *g_dicts = NULL;
 894
 895 /*
 896     byte array typedef
 897 */
 898 typedef struct barray_
 899 {
 900     byte *data;
 901     unsigned int len;
 902 } barray;
 903
 904 static void barray_create (barray **ba)
 905 {
 906     *ba = (barray *) mem_alloc (sizeof (barray));
 907     if (*ba)
 908     {
 909         (**ba).data = NULL;
 910         (**ba).len = 0;
 911     }
 912 }
 913
 914 static void barray_destroy (barray **ba)
 915 {
 916     if (*ba)
 917     {
 918         mem_free ((void **) &(**ba).data);
 919         mem_free ((void **) ba);
 920     }
 921 }
 922
 923 /*
 924     reallocates byte array to requested size,
 925     returns 0 on success,
 926     returns 1 otherwise
 927 */
 928 static int barray_resize (barray **ba, unsigned int nlen)
 929 {
 930     byte *new_pointer;
 931
 932     if (nlen == 0)
 933     {
 934         mem_free ((void **) &(**ba).data);
 935         (**ba).data = NULL;
 936         (**ba).len = 0;
 937
 938         return 0;
 939     }
 940     else
 941     {
 942         new_pointer = (byte *) mem_realloc ((**ba).data, (**ba).len * sizeof (byte),
 943             nlen * sizeof (byte));
 944         if (new_pointer)
 945         {
 946             (**ba).data = new_pointer;
 947             (**ba).len = nlen;
 948
 949             return 0;
 950         }
 951     }
 952
 953     return 1;
 954 }
 955
 956 /*
 957     adds byte array pointed by *nb to the end of array pointed by *ba,
 958     returns 0 on success,
 959     returns 1 otherwise
 960 */
 961 static int barray_append (barray **ba, barray **nb)
 962 {
 963     const unsigned int len = (**ba).len;
 964
 965     if (barray_resize (ba, (**ba).len + (**nb).len))
 966         return 1;
 967
 968     mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
 969
 970     return 0;
 971 }
 972
 973 /*
 974     adds emit chain pointed by em to the end of array pointed by *ba,
 975     returns 0 on success,
 976     returns 1 otherwise
 977 */
 978 static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
 979 {
 980     unsigned int count = emit_size (em);
 981
 982     if (barray_resize (ba, (**ba).len + count))
 983         return 1;
 984
 985     return emit_push (em, (**ba).data + ((**ba).len - count), c, pos, rbc);
 986 }
 987
 988 /*
 989     byte pool typedef
 990 */
 991 typedef struct bytepool_
 992 {
 993     byte *_F;
 994     unsigned int _Siz;
 995 } bytepool;
 996
 997 static void bytepool_destroy (bytepool **by)
 998 {
 999     if (*by != NULL)
1000     {
1001         mem_free ((void **) &(**by)._F);
1002         mem_free ((void **) by);
1003     }
1004 }
1005
1006 static void bytepool_create (bytepool **by, int len)
1007 {
1008     *by = (bytepool *) (mem_alloc (sizeof (bytepool)));
1009     if (*by != NULL)
1010     {
1011         (**by)._F = (byte *) (mem_alloc (sizeof (byte) * len));
1012         (**by)._Siz = len;
1013
1014         if ((**by)._F == NULL)
1015             bytepool_destroy (by);
1016     }
1017 }
1018
1019 static int bytepool_reserve (bytepool *by, unsigned int n)
1020 {
1021     byte *_P;
1022
1023     if (n <= by->_Siz)
1024         return 0;
1025
1026     /* byte pool can only grow and at least by doubling its size */
1027     n = n >= by->_Siz * 2 ? n : by->_Siz * 2;
1028
1029     /* reallocate the memory and adjust pointers to the new memory location */
1030     _P = (byte *) (mem_realloc (by->_F, sizeof (byte) * by->_Siz, sizeof (byte) * n));
1031     if (_P != NULL)
1032     {
1033         by->_F = _P;
1034         by->_Siz = n;
1035         return 0;
1036     }
1037
1038     return 1;
1039 }
1040
1041 /*
1042     string to string map typedef
1043 */
1044 typedef struct map_str_
1045 {
1046     byte *key;
1047     byte *data;
1048     struct map_str_ *next;
1049 } map_str;
1050
1051 static void map_str_create (map_str **ma)
1052 {
1053     *ma = (map_str *) mem_alloc (sizeof (map_str));
1054     if (*ma)
1055     {
1056         (**ma).key = NULL;
1057         (**ma).data = NULL;
1058         (**ma).next = NULL;
1059     }
1060 }
1061
1062 static void map_str_destroy (map_str **ma)
1063 {
1064     if (*ma)
1065     {
1066         map_str_destroy (&(**ma).next);
1067         mem_free ((void **) &(**ma).key);
1068         mem_free ((void **) &(**ma).data);
1069         mem_free ((void **) ma);
1070     }
1071 }
1072
1073 GRAMMAR_IMPLEMENT_LIST_APPEND(map_str)
1074
1075 /*
1076     searches the map for specified key,
1077     if the key is matched, *data is filled with data associated with the key,
1078     returns 0 if the key is matched,
1079     returns 1 otherwise
1080 */
1081 static int map_str_find (map_str **ma, const byte *key, byte **data)
1082 {
1083     while (*ma)
1084     {
1085         if (str_equal ((**ma).key, key))
1086         {
1087             *data = str_duplicate ((**ma).data);
1088             if (*data == NULL)
1089                 return 1;
1090
1091             return 0;
1092         }
1093
1094         ma = &(**ma).next;
1095     }
1096
1097     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1098     return 1;
1099 }
1100
1101 /*
1102     string to rule map typedef
1103 */
1104 typedef struct map_rule_
1105 {
1106     byte *key;
1107     rule *data;
1108     struct map_rule_ *next;
1109 } map_rule;
1110
1111 static void map_rule_create (map_rule **ma)
1112 {
1113     *ma = (map_rule *) mem_alloc (sizeof (map_rule));
1114     if (*ma)
1115     {
1116         (**ma).key = NULL;
1117         (**ma).data = NULL;
1118         (**ma).next = NULL;
1119     }
1120 }
1121
1122 static void map_rule_destroy (map_rule **ma)
1123 {
1124     if (*ma)
1125     {
1126         map_rule_destroy (&(**ma).next);
1127         mem_free ((void **) &(**ma).key);
1128         mem_free ((void **) ma);
1129     }
1130 }
1131
1132 GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule)
1133
1134 /*
1135     searches the map for specified key,
1136     if the key is matched, *data is filled with data associated with the key,
1137     returns 0 if the is matched,
1138     returns 1 otherwise
1139 */
1140 static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1141 {
1142     while (*ma)
1143     {
1144         if (str_equal ((**ma).key, key))
1145         {
1146             *data = (**ma).data;
1147
1148             return 0;
1149         }
1150
1151         ma = &(**ma).next;
1152     }
1153
1154     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1155     return 1;
1156 }
1157
1158 /*
1159     returns 1 if given character is a white space,
1160     returns 0 otherwise
1161 */
1162 static int is_space (byte c)
1163 {
1164     return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1165 }
1166
1167 /*
1168     advances text pointer by 1 if character pointed by *text is a space,
1169     returns 1 if a space has been eaten,
1170     returns 0 otherwise
1171 */
1172 static int eat_space (const byte **text)
1173 {
1174     if (is_space (**text))
1175     {
1176         (*text)++;
1177
1178         return 1;
1179     }
1180
1181     return 0;
1182 }
1183
1184 /*
1185     returns 1 if text points to C-style comment start string,
1186     returns 0 otherwise
1187 */
1188 static int is_comment_start (const byte *text)
1189 {
1190     return text[0] == '/' && text[1] == '*';
1191 }
1192
1193 /*
1194     advances text pointer to first character after C-style comment block - if any,
1195     returns 1 if C-style comment block has been encountered and eaten,
1196     returns 0 otherwise
1197 */
1198 static int eat_comment (const byte **text)
1199 {
1200     if (is_comment_start (*text))
1201     {
1202         /* *text points to comment block - skip two characters to enter comment body */
1203         *text += 2;
1204         /* skip any character except consecutive '*' and '/' */
1205         while (!((*text)[0] == '*' && (*text)[1] == '/'))
1206             (*text)++;
1207         /* skip those two terminating characters */
1208         *text += 2;
1209
1210         return 1;
1211     }
1212
1213     return 0;
1214 }
1215
1216 /*
1217     advances text pointer to first character that is neither space nor C-style comment block
1218 */
1219 static void eat_spaces (const byte **text)
1220 {
1221     while (eat_space (text) || eat_comment (text))
1222         ;
1223 }
1224
1225 /*
1226     resizes string pointed by *ptr to successfully add character c to the end of the string,
1227     returns 0 on success,
1228     returns 1 otherwise
1229 */
1230 static int string_grow (byte **ptr, unsigned int *len, byte c)
1231 {
1232     /* reallocate the string in 16-byte increments */
1233     if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1234     {
1235         byte *tmp = (byte *) mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
1236             ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1237         if (tmp == NULL)
1238             return 1;
1239
1240         *ptr = tmp;
1241     }
1242
1243     if (c)
1244     {
1245         /* append given character */
1246         (*ptr)[*len] = c;
1247         (*len)++;
1248     }
1249     (*ptr)[*len] = '\0';
1250
1251     return 0;
1252 }
1253
1254 /*
1255     returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1256     returns 0 otherwise
1257 */
1258 static int is_identifier (byte c)
1259 {
1260     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1261 }
1262
1263 /*
1264     copies characters from *text to *id until non-identifier character is encountered,
1265     assumes that *id points to NULL object - caller is responsible for later freeing the string,
1266     text pointer is advanced to point past the copied identifier,
1267     returns 0 if identifier was successfully copied,
1268     returns 1 otherwise
1269 */
1270 static int get_identifier (const byte **text, byte **id)
1271 {
1272     const byte *t = *text;
1273     byte *p = NULL;
1274     unsigned int len = 0;
1275
1276     if (string_grow (&p, &len, '\0'))
1277         return 1;
1278
1279     /* loop while next character in buffer is valid for identifiers */
1280     while (is_identifier (*t))
1281     {
1282         if (string_grow (&p, &len, *t++))
1283         {
1284             mem_free ((void **) (void *) &p);
1285             return 1;
1286         }
1287     }
1288
1289     *text = t;
1290     *id = p;
1291
1292     return 0;
1293 }
1294
1295 /*
1296     converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1297     advances text pointer past the converted sequence,
1298     returns the converted value
1299 */
1300 static unsigned int dec_convert (const byte **text)
1301 {
1302     unsigned int value = 0;
1303
1304     while (**text >= '0' && **text <= '9')
1305     {
1306         value = value * 10 + **text - '0';
1307         (*text)++;
1308     }
1309
1310     return value;
1311 }
1312
1313 /*
1314     returns 1 if given character is HEX digit 0-9, A-F or a-f,
1315     returns 0 otherwise
1316 */
1317 static int is_hex (byte c)
1318 {
1319     return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1320 }
1321
1322 /*
1323     returns value of passed character as if it was HEX digit
1324 */
1325 static unsigned int hex2dec (byte c)
1326 {
1327     if (c >= '0' && c <= '9')
1328         return c - '0';
1329     if (c >= 'A' && c <= 'F')
1330         return c - 'A' + 10;
1331     return c - 'a' + 10;
1332 }
1333
1334 /*
1335     converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1336     advances text pointer past the converted sequence,
1337     returns the converted value
1338 */
1339 static unsigned int hex_convert (const byte **text)
1340 {
1341     unsigned int value = 0;
1342
1343     while (is_hex (**text))
1344     {
1345         value = value * 0x10 + hex2dec (**text);
1346         (*text)++;
1347     }
1348
1349     return value;
1350 }
1351
1352 /*
1353     returns 1 if given character is OCT digit 0-7,
1354     returns 0 otherwise
1355 */
1356 static int is_oct (byte c)
1357 {
1358     return c >= '0' && c <= '7';
1359 }
1360
1361 /*
1362     returns value of passed character as if it was OCT digit
1363 */
1364 static int oct2dec (byte c)
1365 {
1366     return c - '0';
1367 }
1368
1369 static byte get_escape_sequence (const byte **text)
1370 {
1371     int value = 0;
1372
1373     /* skip '\' character */
1374     (*text)++;
1375
1376     switch (*(*text)++)
1377     {
1378     case '\'':
1379         return '\'';
1380     case '"':
1381         return '\"';
1382     case '?':
1383         return '\?';
1384     case '\\':
1385         return '\\';
1386     case 'a':
1387         return '\a';
1388     case 'b':
1389         return '\b';
1390     case 'f':
1391         return '\f';
1392     case 'n':
1393         return '\n';
1394     case 'r':
1395         return '\r';
1396     case 't':
1397         return '\t';
1398     case 'v':
1399         return '\v';
1400     case 'x':
1401         return (byte) hex_convert (text);
1402     }
1403
1404     (*text)--;
1405     if (is_oct (**text))
1406     {
1407         value = oct2dec (*(*text)++);
1408         if (is_oct (**text))
1409         {
1410             value = value * 010 + oct2dec (*(*text)++);
1411             if (is_oct (**text))
1412                 value = value * 010 + oct2dec (*(*text)++);
1413         }
1414     }
1415
1416     return (byte) value;
1417 }
1418
1419 /*
1420     copies characters from *text to *str until " or ' character is encountered,
1421     assumes that *str points to NULL object - caller is responsible for later freeing the string,
1422     assumes that *text points to " or ' character that starts the string,
1423     text pointer is advanced to point past the " or ' character,
1424     returns 0 if string was successfully copied,
1425     returns 1 otherwise
1426 */
1427 static int get_string (const byte **text, byte **str)
1428 {
1429     const byte *t = *text;
1430     byte *p = NULL;
1431     unsigned int len = 0;
1432     byte term_char;
1433
1434     if (string_grow (&p, &len, '\0'))
1435         return 1;
1436
1437     /* read " or ' character that starts the string */
1438     term_char = *t++;
1439     /* while next character is not the terminating character */
1440     while (*t && *t != term_char)
1441     {
1442         byte c;
1443
1444         if (*t == '\\')
1445             c = get_escape_sequence (&t);
1446         else
1447             c = *t++;
1448
1449         if (string_grow (&p, &len, c))
1450         {
1451             mem_free ((void **) (void *) &p);
1452             return 1;
1453         }
1454     }
1455     /* skip " or ' character that ends the string */
1456     t++;
1457
1458     *text = t;
1459     *str = p;
1460     return 0;
1461 }
1462
1463 /*
1464     gets emit code, the syntax is:
1465     ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1466     assumes that *text already points to <symbol>,
1467     returns 0 if emit code is successfully read,
1468     returns 1 otherwise
1469 */
1470 static int get_emtcode (const byte **text, map_byte **ma)
1471 {
1472     const byte *t = *text;
1473     map_byte *m = NULL;
1474
1475     map_byte_create (&m);
1476     if (m == NULL)
1477         return 1;
1478
1479     if (get_identifier (&t, &m->key))
1480     {
1481         map_byte_destroy (&m);
1482         return 1;
1483     }
1484     eat_spaces (&t);
1485
1486     if (*t == '\'')
1487     {
1488         byte *c;
1489
1490         if (get_string (&t, &c))
1491         {
1492             map_byte_destroy (&m);
1493             return 1;
1494         }
1495
1496         m->data = (byte) c[0];
1497         mem_free ((void **) (void *) &c);
1498     }
1499     else if (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))
1500     {
1501         /* skip HEX "0x" or "0X" prefix */
1502         t += 2;
1503         m->data = (byte) hex_convert (&t);
1504     }
1505     else
1506     {
1507         m->data = (byte) dec_convert (&t);
1508     }
1509
1510     eat_spaces (&t);
1511
1512     *text = t;
1513     *ma = m;
1514     return 0;
1515 }
1516
1517 /*
1518     gets regbyte declaration, the syntax is:
1519     ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1520     assumes that *text already points to <symbol>,
1521     returns 0 if regbyte is successfully read,
1522     returns 1 otherwise
1523 */
1524 static int get_regbyte (const byte **text, map_byte **ma)
1525 {
1526     /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
1527     return get_emtcode (text, ma);
1528 }
1529
1530 /*
1531     returns 0 on success,
1532     returns 1 otherwise
1533 */
1534 static int get_errtext (const byte **text, map_str **ma)
1535 {
1536     const byte *t = *text;
1537     map_str *m = NULL;
1538
1539     map_str_create (&m);
1540     if (m == NULL)
1541         return 1;
1542
1543     if (get_identifier (&t, &m->key))
1544     {
1545         map_str_destroy (&m);
1546         return 1;
1547     }
1548     eat_spaces (&t);
1549
1550     if (get_string (&t, &m->data))
1551     {
1552         map_str_destroy (&m);
1553         return 1;
1554     }
1555     eat_spaces (&t);
1556
1557     *text = t;
1558     *ma = m;
1559     return 0;
1560 }
1561
1562 /*
1563     returns 0 on success,
1564     returns 1 otherwise,
1565 */
1566 static int get_error (const byte **text, error **er, map_str *maps)
1567 {
1568     const byte *t = *text;
1569     byte *temp = NULL;
1570
1571     if (*t != '.')
1572         return 0;
1573
1574     t++;
1575     if (get_identifier (&t, &temp))
1576         return 1;
1577     eat_spaces (&t);
1578
1579     if (!str_equal ((byte *) "error", temp))
1580     {
1581         mem_free ((void **) (void *) &temp);
1582         return 0;
1583     }
1584
1585     mem_free ((void **) (void *) &temp);
1586
1587     error_create (er);
1588     if (*er == NULL)
1589         return 1;
1590
1591     if (*t == '\"')
1592     {
1593         if (get_string (&t, &(**er).m_text))
1594         {
1595             error_destroy (er);
1596             return 1;
1597         }
1598         eat_spaces (&t);
1599     }
1600     else
1601     {
1602         if (get_identifier (&t, &temp))
1603         {
1604             error_destroy (er);
1605             return 1;
1606         }
1607         eat_spaces (&t);
1608
1609         if (map_str_find (&maps, temp, &(**er).m_text))
1610         {
1611             mem_free ((void **) (void *) &temp);
1612             error_destroy (er);
1613             return 1;
1614         }
1615
1616         mem_free ((void **) (void *) &temp);
1617     }
1618
1619     /* try to extract "token" from "...$token$..." */
1620     {
1621         byte *processed = NULL;
1622         unsigned int len = 0;
1623       int i = 0;
1624
1625         if (string_grow (&processed, &len, '\0'))
1626         {
1627             error_destroy (er);
1628             return 1;
1629         }
1630
1631         while (i < str_length ((**er).m_text))
1632         {
1633             /* check if the dollar sign is repeated - if so skip it */
1634             if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1635             {
1636                 if (string_grow (&processed, &len, '$'))
1637                 {
1638                     mem_free ((void **) (void *) &processed);
1639                     error_destroy (er);
1640                     return 1;
1641                 }
1642
1643                 i += 2;
1644             }
1645             else if ((**er).m_text[i] != '$')
1646             {
1647                 if (string_grow (&processed, &len, (**er).m_text[i]))
1648                 {
1649                     mem_free ((void **) (void *) &processed);
1650                     error_destroy (er);
1651                     return 1;
1652                 }
1653
1654                 i++;
1655             }
1656             else
1657             {
1658                 if (string_grow (&processed, &len, '$'))
1659                 {
1660                     mem_free ((void **) (void *) &processed);
1661                     error_destroy (er);
1662                     return 1;
1663                 }
1664
1665                 {
1666                     /* length of token being extracted */
1667                     unsigned int tlen = 0;
1668
1669                     if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1670                     {
1671                         mem_free ((void **) (void *) &processed);
1672                         error_destroy (er);
1673                         return 1;
1674                     }
1675
1676                     /* skip the dollar sign */
1677                     i++;
1678
1679                     while ((**er).m_text[i] != '$')
1680                     {
1681                         if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1682                         {
1683                             mem_free ((void **) (void *) &processed);
1684                             error_destroy (er);
1685                             return 1;
1686                         }
1687
1688                         i++;
1689                     }
1690
1691                     /* skip the dollar sign */
1692                     i++;
1693                 }
1694             }
1695         }
1696
1697         mem_free ((void **) &(**er).m_text);
1698         (**er).m_text = processed;
1699     }
1700
1701     *text = t;
1702     return 0;
1703 }
1704
1705 /*
1706     returns 0 on success,
1707     returns 1 otherwise,
1708 */
1709 static int get_emits (const byte **text, emit **em, map_byte *mapb)
1710 {
1711     const byte *t = *text;
1712     byte *temp = NULL;
1713     emit *e = NULL;
1714     emit_dest dest;
1715
1716     if (*t != '.')
1717         return 0;
1718
1719     t++;
1720     if (get_identifier (&t, &temp))
1721         return 1;
1722     eat_spaces (&t);
1723
1724     /* .emit */
1725     if (str_equal ((byte *) "emit", temp))
1726         dest = ed_output;
1727     /* .load */
1728     else if (str_equal ((byte *) "load", temp))
1729         dest = ed_regbyte;
1730     else
1731     {
1732         mem_free ((void **) (void *) &temp);
1733         return 0;
1734     }
1735
1736     mem_free ((void **) (void *) &temp);
1737
1738     emit_create (&e);
1739     if (e == NULL)
1740         return 1;
1741
1742     e->m_emit_dest = dest;
1743
1744     if (dest == ed_regbyte)
1745     {
1746         if (get_identifier (&t, &e->m_regname))
1747         {
1748             emit_destroy (&e);
1749             return 1;
1750         }
1751         eat_spaces (&t);
1752     }
1753
1754     /* 0xNN */
1755     if (*t == '0' && (t[1] == 'x' || t[1] == 'X'))
1756     {
1757         t += 2;
1758         e->m_byte = (byte) hex_convert (&t);
1759
1760         e->m_emit_type = et_byte;
1761     }
1762     /* NNN */
1763     else if (*t >= '0' && *t <= '9')
1764     {
1765         e->m_byte = (byte) dec_convert (&t);
1766
1767         e->m_emit_type = et_byte;
1768     }
1769     /* * */
1770     else if (*t == '*')
1771     {
1772         t++;
1773
1774         e->m_emit_type = et_stream;
1775     }
1776     /* $ */
1777     else if (*t == '$')
1778     {
1779         t++;
1780
1781         e->m_emit_type = et_position;
1782     }
1783     /* 'c' */
1784     else if (*t == '\'')
1785     {
1786         if (get_string (&t, &temp))
1787         {
1788             emit_destroy (&e);
1789             return 1;
1790         }
1791         e->m_byte = (byte) temp[0];
1792
1793         mem_free ((void **) (void *) &temp);
1794
1795         e->m_emit_type = et_byte;
1796     }
1797     else
1798     {
1799         if (get_identifier (&t, &temp))
1800         {
1801             emit_destroy (&e);
1802             return 1;
1803         }
1804
1805         if (map_byte_find (&mapb, temp, &e->m_byte))
1806         {
1807             mem_free ((void **) (void *) &temp);
1808             emit_destroy (&e);
1809             return 1;
1810         }
1811
1812         mem_free ((void **) (void *) &temp);
1813
1814         e->m_emit_type = et_byte;
1815     }
1816
1817     eat_spaces (&t);
1818
1819     if (get_emits (&t, &e->m_next, mapb))
1820     {
1821         emit_destroy (&e);
1822         return 1;
1823     }
1824
1825     *text = t;
1826     *em = e;
1827     return 0;
1828 }
1829
1830 /*
1831     returns 0 on success,
1832     returns 1 otherwise,
1833 */
1834 static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1835 {
1836     const byte *t = *text;
1837     spec *s = NULL;
1838
1839     spec_create (&s);
1840     if (s == NULL)
1841         return 1;
1842
1843     /* first - read optional .if statement */
1844     if (*t == '.')
1845     {
1846         const byte *u = t;
1847         byte *keyword = NULL;
1848
1849         /* skip the dot */
1850         u++;
1851
1852         if (get_identifier (&u, &keyword))
1853         {
1854             spec_destroy (&s);
1855             return 1;
1856         }
1857
1858         /* .if */
1859         if (str_equal ((byte *) "if", keyword))
1860         {
1861             cond_create (&s->m_cond);
1862             if (s->m_cond == NULL)
1863             {
1864                 spec_destroy (&s);
1865                 return 1;
1866             }
1867
1868             /* skip the left paren */
1869             eat_spaces (&u);
1870             u++;
1871
1872             /* get the left operand */
1873             eat_spaces (&u);
1874             if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1875             {
1876                 spec_destroy (&s);
1877                 return 1;
1878             }
1879             s->m_cond->m_operands[0].m_type = cot_regbyte;
1880
1881             /* get the operator (!= or ==) */
1882             eat_spaces (&u);
1883             if (*u == '!')
1884                 s->m_cond->m_type = ct_not_equal;
1885             else
1886                 s->m_cond->m_type = ct_equal;
1887             u += 2;
1888             eat_spaces (&u);
1889
1890             if (u[0] == '0' && (u[1] == 'x' || u[1] == 'X'))
1891             {
1892                 /* skip the 0x prefix */
1893                 u += 2;
1894
1895                 /* get the right operand */
1896                 s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1897                 s->m_cond->m_operands[1].m_type = cot_byte;
1898             }
1899             else /*if (*u >= '0' && *u <= '9')*/
1900             {
1901                 /* get the right operand */
1902                 s->m_cond->m_operands[1].m_byte = dec_convert (&u);
1903                 s->m_cond->m_operands[1].m_type = cot_byte;
1904             }
1905
1906             /* skip the right paren */
1907             eat_spaces (&u);
1908             u++;
1909
1910             eat_spaces (&u);
1911
1912             t = u;
1913         }
1914
1915         mem_free ((void **) (void *) &keyword);
1916     }
1917
1918     if (*t == '\'')
1919     {
1920         byte *temp = NULL;
1921
1922         if (get_string (&t, &temp))
1923         {
1924             spec_destroy (&s);
1925             return 1;
1926         }
1927         eat_spaces (&t);
1928
1929         if (*t == '-')
1930         {
1931             byte *temp2 = NULL;
1932
1933             /* skip the '-' character */
1934             t++;
1935             eat_spaces (&t);
1936
1937             if (get_string (&t, &temp2))
1938             {
1939                 mem_free ((void **) (void *) &temp);
1940                 spec_destroy (&s);
1941                 return 1;
1942             }
1943             eat_spaces (&t);
1944
1945             s->m_spec_type = st_byte_range;
1946             s->m_byte[0] = *temp;
1947             s->m_byte[1] = *temp2;
1948
1949             mem_free ((void **) (void *) &temp2);
1950         }
1951         else
1952         {
1953             s->m_spec_type = st_byte;
1954             *s->m_byte = *temp;
1955         }
1956
1957         mem_free ((void **) (void *) &temp);
1958     }
1959     else if (*t == '"')
1960     {
1961         if (get_string (&t, &s->m_string))
1962         {
1963             spec_destroy (&s);
1964             return 1;
1965         }
1966         eat_spaces (&t);
1967
1968         s->m_spec_type = st_string;
1969     }
1970     else if (*t == '.')
1971     {
1972         byte *keyword = NULL;
1973
1974         /* skip the dot */
1975         t++;
1976
1977         if (get_identifier (&t, &keyword))
1978         {
1979             spec_destroy (&s);
1980             return 1;
1981         }
1982         eat_spaces (&t);
1983
1984         /* .true */
1985         if (str_equal ((byte *) "true", keyword))
1986         {
1987             s->m_spec_type = st_true;
1988         }
1989         /* .false */
1990         else if (str_equal ((byte *) "false", keyword))
1991         {
1992             s->m_spec_type = st_false;
1993         }
1994         /* .debug */
1995         else if (str_equal ((byte *) "debug", keyword))
1996         {
1997             s->m_spec_type = st_debug;
1998         }
1999         /* .loop */
2000         else if (str_equal ((byte *) "loop", keyword))
2001         {
2002             if (get_identifier (&t, &s->m_string))
2003             {
2004                 mem_free ((void **) (void *) &keyword);
2005                 spec_destroy (&s);
2006                 return 1;
2007             }
2008             eat_spaces (&t);
2009
2010             s->m_spec_type = st_identifier_loop;
2011         }
2012         mem_free ((void **) (void *) &keyword);
2013     }
2014     else
2015     {
2016         if (get_identifier (&t, &s->m_string))
2017         {
2018             spec_destroy (&s);
2019             return 1;
2020         }
2021         eat_spaces (&t);
2022
2023         s->m_spec_type = st_identifier;
2024     }
2025
2026     if (get_error (&t, &s->m_errtext, maps))
2027     {
2028         spec_destroy (&s);
2029         return 1;
2030     }
2031
2032     if (get_emits (&t, &s->m_emits, mapb))
2033     {
2034         spec_destroy (&s);
2035         return 1;
2036     }
2037
2038     *text = t;
2039     *sp = s;
2040     return 0;
2041 }
2042
2043 /*
2044     returns 0 on success,
2045     returns 1 otherwise,
2046 */
2047 static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
2048 {
2049     const byte *t = *text;
2050     rule *r = NULL;
2051
2052     rule_create (&r);
2053     if (r == NULL)
2054         return 1;
2055
2056     if (get_spec (&t, &r->m_specs, maps, mapb))
2057     {
2058         rule_destroy (&r);
2059         return 1;
2060     }
2061
2062     while (*t != ';')
2063     {
2064         byte *op = NULL;
2065         spec *sp = NULL;
2066
2067         /* skip the dot that precedes "and" or "or" */
2068         t++;
2069
2070         /* read "and" or "or" keyword */
2071         if (get_identifier (&t, &op))
2072         {
2073             rule_destroy (&r);
2074             return 1;
2075         }
2076         eat_spaces (&t);
2077
2078         if (r->m_oper == op_none)
2079         {
2080             /* .and */
2081             if (str_equal ((byte *) "and", op))
2082                 r->m_oper = op_and;
2083             /* .or */
2084             else
2085                 r->m_oper = op_or;
2086         }
2087
2088         mem_free ((void **) (void *) &op);
2089
2090         if (get_spec (&t, &sp, maps, mapb))
2091         {
2092             rule_destroy (&r);
2093             return 1;
2094         }
2095
2096         spec_append (&r->m_specs, sp);
2097     }
2098
2099     /* skip the semicolon */
2100     t++;
2101     eat_spaces (&t);
2102
2103     *text = t;
2104     *ru = r;
2105     return 0;
2106 }
2107
2108 /*
2109     returns 0 on success,
2110     returns 1 otherwise,
2111 */
2112 static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2113 {
2114     if (map_rule_find (&mapr, symbol, ru))
2115         return 1;
2116
2117     (**ru).m_referenced = 1;
2118
2119     return 0;
2120 }
2121
2122 /*
2123     returns 0 on success,
2124     returns 1 otherwise,
2125 */
2126 static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2127     byte **string_symbol, map_byte *regbytes)
2128 {
2129     rule *rulez = di->m_rulez;
2130
2131     /* update dependecies for the root and lexer symbols */
2132     if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2133         (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2134         return 1;
2135
2136     mem_free ((void **) syntax_symbol);
2137     mem_free ((void **) string_symbol);
2138
2139     /* update dependecies for the rest of the rules */
2140     while (rulez)
2141     {
2142         spec *sp = rulez->m_specs;
2143
2144         /* iterate through all the specifiers */
2145         while (sp)
2146         {
2147             /* update dependency for identifier */
2148             if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2149             {
2150                 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2151                     return 1;
2152
2153                 mem_free ((void **) &sp->m_string);
2154             }
2155
2156             /* some errtexts reference to a rule */
2157             if (sp->m_errtext && sp->m_errtext->m_token_name)
2158             {
2159                 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2160                     return 1;
2161
2162                 mem_free ((void **) &sp->m_errtext->m_token_name);
2163             }
2164
2165             /* update dependency for condition */
2166             if (sp->m_cond)
2167             {
2168                 int i;
2169                 for (i = 0; i < 2; i++)
2170                     if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2171                     {
2172                         sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2173                             sp->m_cond->m_operands[i].m_regname);
2174
2175                         if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2176                             return 1;
2177
2178                         mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2179                     }
2180             }
2181
2182             /* update dependency for all .load instructions */
2183             if (sp->m_emits)
2184             {
2185                 emit *em = sp->m_emits;
2186                 while (em != NULL)
2187                 {
2188                     if (em->m_emit_dest == ed_regbyte)
2189                     {
2190                         em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2191
2192                         if (em->m_regbyte == NULL)
2193                             return 1;
2194
2195                         mem_free ((void **) &em->m_regname);
2196                     }
2197
2198                     em = em->m_next;
2199                 }
2200             }
2201
2202             sp = sp->next;
2203         }
2204
2205         rulez = rulez->next;
2206     }
2207
2208     /* check for unreferenced symbols */
2209     rulez = di->m_rulez;
2210     while (rulez != NULL)
2211     {
2212         if (!rulez->m_referenced)
2213         {
2214             map_rule *ma = mapr;
2215             while (ma)
2216             {
2217                 if (ma->data == rulez)
2218                 {
2219                     set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1);
2220                     return 1;
2221                 }
2222                 ma = ma->next;
2223             }
2224         }
2225         rulez = rulez->next;
2226     }
2227
2228     return 0;
2229 }
2230
2231 static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2232 {
2233     byte values[2];
2234     int i;
2235
2236     if (co == NULL)
2237         return 1;
2238
2239     for (i = 0; i < 2; i++)
2240         switch (co->m_operands[i].m_type)
2241         {
2242         case cot_byte:
2243             values[i] = co->m_operands[i].m_byte;
2244             break;
2245         case cot_regbyte:
2246             values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2247             break;
2248         }
2249
2250     switch (co->m_type)
2251     {
2252     case ct_equal:
2253         return values[0] == values[1];
2254     case ct_not_equal:
2255         return values[0] != values[1];
2256     }
2257
2258     return 0;
2259 }
2260
2261 static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2262 {
2263     while (top != limit)
2264     {
2265         regbyte_ctx *rbc = top->m_prev;
2266         regbyte_ctx_destroy (&top);
2267         top = rbc;
2268     }
2269 }
2270
2271 typedef enum match_result_
2272 {
2273     mr_not_matched,     /* the examined string does not match */
2274     mr_matched,         /* the examined string matches */
2275     mr_error_raised,    /* mr_not_matched + error has been raised */
2276     mr_dont_emit,       /* used by identifier loops only */
2277     mr_internal_error   /* an internal error has occured such as out of memory */
2278 } match_result;
2279
2280 /*
2281  * This function does the main job. It parses the text and generates output data.
2282  */
2283 static match_result
2284 match (dict *di, const byte *text, int *index, rule *ru, barray **ba, int filtering_string,
2285        regbyte_ctx **rbc)
2286 {
2287    int ind = *index;
2288     match_result status = mr_not_matched;
2289     spec *sp = ru->m_specs;
2290     regbyte_ctx *ctx = *rbc;
2291
2292     /* for every specifier in the rule */
2293     while (sp)
2294     {
2295       int i, len, save_ind = ind;
2296         barray *array = NULL;
2297
2298         if (satisfies_condition (sp->m_cond, ctx))
2299         {
2300             switch (sp->m_spec_type)
2301             {
2302             case st_identifier:
2303                 barray_create (&array);
2304                 if (array == NULL)
2305                 {
2306                     free_regbyte_ctx_stack (ctx, *rbc);
2307                     return mr_internal_error;
2308                 }
2309
2310                 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2311
2312                 if (status == mr_internal_error)
2313                 {
2314                     free_regbyte_ctx_stack (ctx, *rbc);
2315                     barray_destroy (&array);
2316                     return mr_internal_error;
2317                 }
2318                 break;
2319             case st_string:
2320                 len = str_length (sp->m_string);
2321
2322                 /* prefilter the stream */
2323                 if (!filtering_string && di->m_string)
2324                 {
2325                     barray *ba;
2326                int filter_index = 0;
2327                     match_result result;
2328                     regbyte_ctx *null_ctx = NULL;
2329
2330                     barray_create (&ba);
2331                     if (ba == NULL)
2332                     {
2333                         free_regbyte_ctx_stack (ctx, *rbc);
2334                         return mr_internal_error;
2335                     }
2336
2337                     result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2338
2339                     if (result == mr_internal_error)
2340                     {
2341                         free_regbyte_ctx_stack (ctx, *rbc);
2342                         barray_destroy (&ba);
2343                         return mr_internal_error;
2344                     }
2345
2346                     if (result != mr_matched)
2347                     {
2348                         barray_destroy (&ba);
2349                         status = mr_not_matched;
2350                         break;
2351                     }
2352
2353                     barray_destroy (&ba);
2354
2355                     if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2356                     {
2357                         status = mr_not_matched;
2358                         break;
2359                     }
2360
2361                     status = mr_matched;
2362                     ind += len;
2363                 }
2364                 else
2365                 {
2366                     status = mr_matched;
2367                     for (i = 0; status == mr_matched && i < len; i++)
2368                         if (text[ind + i] != sp->m_string[i])
2369                             status = mr_not_matched;
2370
2371                     if (status == mr_matched)
2372                         ind += len;
2373                 }
2374                 break;
2375             case st_byte:
2376                 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2377                 if (status == mr_matched)
2378                     ind++;
2379                 break;
2380             case st_byte_range:
2381                 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2382                     mr_matched : mr_not_matched;
2383                 if (status == mr_matched)
2384                     ind++;
2385                 break;
2386             case st_true:
2387                 status = mr_matched;
2388                 break;
2389             case st_false:
2390                 status = mr_not_matched;
2391                 break;
2392             case st_debug:
2393                 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2394                 break;
2395             case st_identifier_loop:
2396                 barray_create (&array);
2397                 if (array == NULL)
2398                 {
2399                     free_regbyte_ctx_stack (ctx, *rbc);
2400                     return mr_internal_error;
2401                 }
2402
2403                 status = mr_dont_emit;
2404                 for (;;)
2405                 {
2406                     match_result result;
2407
2408                     save_ind = ind;
2409                     result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2410
2411                     if (result == mr_error_raised)
2412                     {
2413                         status = result;
2414                         break;
2415                     }
2416                     else if (result == mr_matched)
2417                     {
2418                         if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2419                             barray_append (ba, &array))
2420                         {
2421                             free_regbyte_ctx_stack (ctx, *rbc);
2422                             barray_destroy (&array);
2423                             return mr_internal_error;
2424                         }
2425                         barray_destroy (&array);
2426                         barray_create (&array);
2427                         if (array == NULL)
2428                         {
2429                             free_regbyte_ctx_stack (ctx, *rbc);
2430                             return mr_internal_error;
2431                         }
2432                     }
2433                     else if (result == mr_internal_error)
2434                     {
2435                         free_regbyte_ctx_stack (ctx, *rbc);
2436                         barray_destroy (&array);
2437                         return mr_internal_error;
2438                     }
2439                     else
2440                         break;
2441                 }
2442                 break;
2443             }
2444         }
2445         else
2446         {
2447             status = mr_not_matched;
2448         }
2449
2450         if (status == mr_error_raised)
2451         {
2452             free_regbyte_ctx_stack (ctx, *rbc);
2453             barray_destroy (&array);
2454
2455             return mr_error_raised;
2456         }
2457
2458         if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2459         {
2460             free_regbyte_ctx_stack (ctx, *rbc);
2461             barray_destroy (&array);
2462
2463             if (sp->m_errtext)
2464             {
2465                 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2466                     ind), ind);
2467
2468                 return mr_error_raised;
2469             }
2470
2471             return mr_not_matched;
2472         }
2473
2474         if (status == mr_matched)
2475         {
2476             if (sp->m_emits)
2477                 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2478                 {
2479                     free_regbyte_ctx_stack (ctx, *rbc);
2480                     barray_destroy (&array);
2481                     return mr_internal_error;
2482                 }
2483
2484             if (array)
2485                 if (barray_append (ba, &array))
2486                 {
2487                     free_regbyte_ctx_stack (ctx, *rbc);
2488                     barray_destroy (&array);
2489                     return mr_internal_error;
2490                 }
2491         }
2492
2493         barray_destroy (&array);
2494
2495         /* if the rule operator is a logical or, we pick up the first matching specifier */
2496         if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2497         {
2498             *index = ind;
2499             *rbc = ctx;
2500             return mr_matched;
2501         }
2502
2503         sp = sp->next;
2504     }
2505
2506     /* everything went fine - all specifiers match up */
2507     if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2508     {
2509         *index = ind;
2510         *rbc = ctx;
2511         return mr_matched;
2512     }
2513
2514     free_regbyte_ctx_stack (ctx, *rbc);
2515     return mr_not_matched;
2516 }
2517
2518 static match_result
2519 fast_match (dict *di, const byte *text, int *index, rule *ru, int *_PP, bytepool *_BP,
2520             int filtering_string, regbyte_ctx **rbc)
2521 {
2522    int ind = *index;
2523     int _P = filtering_string ? 0 : *_PP;
2524     int _P2;
2525     match_result status = mr_not_matched;
2526     spec *sp = ru->m_specs;
2527     regbyte_ctx *ctx = *rbc;
2528
2529     /* for every specifier in the rule */
2530     while (sp)
2531     {
2532       int i, len, save_ind = ind;
2533
2534         _P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0);
2535         if (bytepool_reserve (_BP, _P2))
2536         {
2537             free_regbyte_ctx_stack (ctx, *rbc);
2538             return mr_internal_error;
2539         }
2540
2541         if (satisfies_condition (sp->m_cond, ctx))
2542         {
2543             switch (sp->m_spec_type)
2544             {
2545             case st_identifier:
2546                 status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2547
2548                 if (status == mr_internal_error)
2549                 {
2550                     free_regbyte_ctx_stack (ctx, *rbc);
2551                     return mr_internal_error;
2552                 }
2553                 break;
2554             case st_string:
2555                 len = str_length (sp->m_string);
2556
2557                 /* prefilter the stream */
2558                 if (!filtering_string && di->m_string)
2559                 {
2560                int filter_index = 0;
2561                     match_result result;
2562                     regbyte_ctx *null_ctx = NULL;
2563
2564                     result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx);
2565
2566                     if (result == mr_internal_error)
2567                     {
2568                         free_regbyte_ctx_stack (ctx, *rbc);
2569                         return mr_internal_error;
2570                     }
2571
2572                     if (result != mr_matched)
2573                     {
2574                         status = mr_not_matched;
2575                         break;
2576                     }
2577
2578                     if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2579                     {
2580                         status = mr_not_matched;
2581                         break;
2582                     }
2583
2584                     status = mr_matched;
2585                     ind += len;
2586                 }
2587                 else
2588                 {
2589                     status = mr_matched;
2590                     for (i = 0; status == mr_matched && i < len; i++)
2591                         if (text[ind + i] != sp->m_string[i])
2592                             status = mr_not_matched;
2593
2594                     if (status == mr_matched)
2595                         ind += len;
2596                 }
2597                 break;
2598             case st_byte:
2599                 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2600                 if (status == mr_matched)
2601                     ind++;
2602                 break;
2603             case st_byte_range:
2604                 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2605                     mr_matched : mr_not_matched;
2606                 if (status == mr_matched)
2607                     ind++;
2608                 break;
2609             case st_true:
2610                 status = mr_matched;
2611                 break;
2612             case st_false:
2613                 status = mr_not_matched;
2614                 break;
2615             case st_debug:
2616                 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2617                 break;
2618             case st_identifier_loop:
2619                 status = mr_dont_emit;
2620                 for (;;)
2621                 {
2622                     match_result result;
2623
2624                     save_ind = ind;
2625                     result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2626
2627                     if (result == mr_error_raised)
2628                     {
2629                         status = result;
2630                         break;
2631                     }
2632                     else if (result == mr_matched)
2633                     {
2634                         if (!filtering_string)
2635                         {
2636                             if (sp->m_emits != NULL)
2637                             {
2638                                 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2639                                 {
2640                                     free_regbyte_ctx_stack (ctx, *rbc);
2641                                     return mr_internal_error;
2642                                 }
2643                             }
2644
2645                             _P = _P2;
2646                             _P2 += sp->m_emits ? emit_size (sp->m_emits) : 0;
2647                             if (bytepool_reserve (_BP, _P2))
2648                             {
2649                                 free_regbyte_ctx_stack (ctx, *rbc);
2650                                 return mr_internal_error;
2651                             }
2652                         }
2653                     }
2654                     else if (result == mr_internal_error)
2655                     {
2656                         free_regbyte_ctx_stack (ctx, *rbc);
2657                         return mr_internal_error;
2658                     }
2659                     else
2660                         break;
2661                 }
2662                 break;
2663             }
2664         }
2665         else
2666         {
2667             status = mr_not_matched;
2668         }
2669
2670         if (status == mr_error_raised)
2671         {
2672             free_regbyte_ctx_stack (ctx, *rbc);
2673
2674             return mr_error_raised;
2675         }
2676
2677         if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2678         {
2679             free_regbyte_ctx_stack (ctx, *rbc);
2680
2681             if (sp->m_errtext)
2682             {
2683                 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2684                     ind), ind);
2685
2686                 return mr_error_raised;
2687             }
2688
2689             return mr_not_matched;
2690         }
2691
2692         if (status == mr_matched)
2693         {
2694             if (sp->m_emits != NULL)
2695                 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2696                 {
2697                     free_regbyte_ctx_stack (ctx, *rbc);
2698                     return mr_internal_error;
2699                 }
2700
2701             _P = _P2;
2702         }
2703
2704         /* if the rule operator is a logical or, we pick up the first matching specifier */
2705         if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2706         {
2707             *index = ind;
2708             *rbc = ctx;
2709             if (!filtering_string)
2710                 *_PP = _P;
2711             return mr_matched;
2712         }
2713
2714         sp = sp->next;
2715     }
2716
2717     /* everything went fine - all specifiers match up */
2718     if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2719     {
2720         *index = ind;
2721         *rbc = ctx;
2722         if (!filtering_string)
2723             *_PP = _P;
2724         return mr_matched;
2725     }
2726
2727     free_regbyte_ctx_stack (ctx, *rbc);
2728     return mr_not_matched;
2729 }
2730
2731 static byte *
2732 error_get_token (error *er, dict *di, const byte *text, int ind)
2733 {
2734     byte *str = NULL;
2735
2736     if (er->m_token)
2737     {
2738         barray *ba;
2739       int filter_index = 0;
2740         regbyte_ctx *ctx = NULL;
2741
2742         barray_create (&ba);
2743         if (ba != NULL)
2744         {
2745             if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2746                 filter_index)
2747             {
2748                 str = (byte *) mem_alloc (filter_index + 1);
2749                 if (str != NULL)
2750                 {
2751                     str_copy_n (str, text + ind, filter_index);
2752                     str[filter_index] = '\0';
2753                 }
2754             }
2755             barray_destroy (&ba);
2756         }
2757     }
2758
2759     return str;
2760 }
2761
2762 typedef struct grammar_load_state_
2763 {
2764     dict *di;
2765     byte *syntax_symbol;
2766     byte *string_symbol;
2767     map_str *maps;
2768     map_byte *mapb;
2769     map_rule *mapr;
2770 } grammar_load_state;
2771
2772 static void grammar_load_state_create (grammar_load_state **gr)
2773 {
2774     *gr = (grammar_load_state *) mem_alloc (sizeof (grammar_load_state));
2775     if (*gr)
2776     {
2777         (**gr).di = NULL;
2778         (**gr).syntax_symbol = NULL;
2779         (**gr).string_symbol = NULL;
2780         (**gr).maps = NULL;
2781         (**gr).mapb = NULL;
2782         (**gr).mapr = NULL;
2783     }
2784 }
2785
2786 static void grammar_load_state_destroy (grammar_load_state **gr)
2787 {
2788     if (*gr)
2789     {
2790         dict_destroy (&(**gr).di);
2791         mem_free ((void **) &(**gr).syntax_symbol);
2792         mem_free ((void **) &(**gr).string_symbol);
2793         map_str_destroy (&(**gr).maps);
2794         map_byte_destroy (&(**gr).mapb);
2795         map_rule_destroy (&(**gr).mapr);
2796         mem_free ((void **) gr);
2797     }
2798 }
2799
2800 /*
2801     the API
2802 */
2803
2804 grammar grammar_load_from_text (const byte *text)
2805 {
2806     grammar_load_state *g = NULL;
2807     grammar id = 0;
2808
2809     clear_last_error ();
2810
2811     grammar_load_state_create (&g);
2812     if (g == NULL)
2813         return 0;
2814
2815     dict_create (&g->di);
2816     if (g->di == NULL)
2817     {
2818         grammar_load_state_destroy (&g);
2819         return 0;
2820     }
2821
2822     eat_spaces (&text);
2823
2824     /* skip ".syntax" keyword */
2825     text += 7;
2826     eat_spaces (&text);
2827
2828     /* retrieve root symbol */
2829     if (get_identifier (&text, &g->syntax_symbol))
2830     {
2831         grammar_load_state_destroy (&g);
2832         return 0;
2833     }
2834     eat_spaces (&text);
2835
2836     /* skip semicolon */
2837     text++;
2838     eat_spaces (&text);
2839
2840     while (*text)
2841     {
2842         byte *symbol = NULL;
2843         int is_dot = *text == '.';
2844
2845         if (is_dot)
2846             text++;
2847
2848         if (get_identifier (&text, &symbol))
2849         {
2850             grammar_load_state_destroy (&g);
2851             return 0;
2852         }
2853         eat_spaces (&text);
2854
2855         /* .emtcode */
2856         if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2857         {
2858             map_byte *ma = NULL;
2859
2860             mem_free ((void **) (void *) &symbol);
2861
2862             if (get_emtcode (&text, &ma))
2863             {
2864                 grammar_load_state_destroy (&g);
2865                 return 0;
2866             }
2867
2868             map_byte_append (&g->mapb, ma);
2869         }
2870         /* .regbyte */
2871         else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2872         {
2873             map_byte *ma = NULL;
2874
2875             mem_free ((void **) (void *) &symbol);
2876
2877             if (get_regbyte (&text, &ma))
2878             {
2879                 grammar_load_state_destroy (&g);
2880                 return 0;
2881             }
2882
2883             map_byte_append (&g->di->m_regbytes, ma);
2884         }
2885         /* .errtext */
2886         else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2887         {
2888             map_str *ma = NULL;
2889
2890             mem_free ((void **) (void *) &symbol);
2891
2892             if (get_errtext (&text, &ma))
2893             {
2894                 grammar_load_state_destroy (&g);
2895                 return 0;
2896             }
2897
2898             map_str_append (&g->maps, ma);
2899         }
2900         /* .string */
2901         else if (is_dot && str_equal (symbol, (byte *) "string"))
2902         {
2903             mem_free ((void **) (void *) &symbol);
2904
2905             if (g->di->m_string != NULL)
2906             {
2907                 grammar_load_state_destroy (&g);
2908                 return 0;
2909             }
2910
2911             if (get_identifier (&text, &g->string_symbol))
2912             {
2913                 grammar_load_state_destroy (&g);
2914                 return 0;
2915             }
2916
2917             /* skip semicolon */
2918             eat_spaces (&text);
2919             text++;
2920             eat_spaces (&text);
2921         }
2922         else
2923         {
2924             rule *ru = NULL;
2925             map_rule *ma = NULL;
2926
2927             if (get_rule (&text, &ru, g->maps, g->mapb))
2928             {
2929                 grammar_load_state_destroy (&g);
2930                 return 0;
2931             }
2932
2933             rule_append (&g->di->m_rulez, ru);
2934
2935             /* if a rule consist of only one specifier, give it an ".and" operator */
2936             if (ru->m_oper == op_none)
2937                 ru->m_oper = op_and;
2938
2939             map_rule_create (&ma);
2940             if (ma == NULL)
2941             {
2942                 grammar_load_state_destroy (&g);
2943                 return 0;
2944             }
2945
2946             ma->key = symbol;
2947             ma->data = ru;
2948             map_rule_append (&g->mapr, ma);
2949         }
2950     }
2951
2952     if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2953         g->di->m_regbytes))
2954     {
2955         grammar_load_state_destroy (&g);
2956         return 0;
2957     }
2958
2959     dict_append (&g_dicts, g->di);
2960     id = g->di->m_id;
2961     g->di = NULL;
2962
2963     grammar_load_state_destroy (&g);
2964
2965     return id;
2966 }
2967
2968 int grammar_set_reg8 (grammar id, const byte *name, byte value)
2969 {
2970     dict *di = NULL;
2971     map_byte *reg = NULL;
2972
2973     clear_last_error ();
2974
2975     dict_find (&g_dicts, id, &di);
2976     if (di == NULL)
2977     {
2978         set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2979         return 0;
2980     }
2981
2982     reg = map_byte_locate (&di->m_regbytes, name);
2983     if (reg == NULL)
2984     {
2985         set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
2986         return 0;
2987     }
2988
2989     reg->data = value;
2990     return 1;
2991 }
2992
2993 /*
2994     internal checking function used by both grammar_check and grammar_fast_check functions
2995 */
2996 static int _grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size,
2997     unsigned int estimate_prod_size, int use_fast_path)
2998 {
2999     dict *di = NULL;
3000    int index = 0;
3001
3002     clear_last_error ();
3003
3004     dict_find (&g_dicts, id, &di);
3005     if (di == NULL)
3006     {
3007         set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3008         return 0;
3009     }
3010
3011     *prod = NULL;
3012     *size = 0;
3013
3014     if (use_fast_path)
3015     {
3016         regbyte_ctx *rbc = NULL;
3017         bytepool *bp = NULL;
3018         int _P = 0;
3019
3020         bytepool_create (&bp, estimate_prod_size);
3021         if (bp == NULL)
3022             return 0;
3023
3024         if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched)
3025         {
3026             bytepool_destroy (&bp);
3027             free_regbyte_ctx_stack (rbc, NULL);
3028             return 0;
3029         }
3030
3031         free_regbyte_ctx_stack (rbc, NULL);
3032
3033         *prod = bp->_F;
3034         *size = _P;
3035         bp->_F = NULL;
3036         bytepool_destroy (&bp);
3037     }
3038     else
3039     {
3040         regbyte_ctx *rbc = NULL;
3041         barray *ba = NULL;
3042
3043         barray_create (&ba);
3044         if (ba == NULL)
3045             return 0;
3046
3047         if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
3048         {
3049             barray_destroy (&ba);
3050             free_regbyte_ctx_stack (rbc, NULL);
3051             return 0;
3052         }
3053
3054         free_regbyte_ctx_stack (rbc, NULL);
3055
3056         *prod = (byte *) mem_alloc (ba->len * sizeof (byte));
3057         if (*prod == NULL)
3058         {
3059             barray_destroy (&ba);
3060             return 0;
3061         }
3062
3063         mem_copy (*prod, ba->data, ba->len * sizeof (byte));
3064         *size = ba->len;
3065         barray_destroy (&ba);
3066     }
3067
3068     return 1;
3069 }
3070
3071 int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
3072 {
3073     return _grammar_check (id, text, prod, size, 0, 0);
3074 }
3075
3076 int grammar_fast_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3077     unsigned int estimate_prod_size)
3078 {
3079     return _grammar_check (id, text, prod, size, estimate_prod_size, 1);
3080 }
3081
3082 int grammar_destroy (grammar id)
3083 {
3084     dict **di = &g_dicts;
3085
3086     clear_last_error ();
3087
3088     while (*di != NULL)
3089     {
3090         if ((**di).m_id == id)
3091         {
3092             dict *tmp = *di;
3093             *di = (**di).next;
3094             dict_destroy (&tmp);
3095             return 1;
3096         }
3097
3098         di = &(**di).next;
3099     }
3100
3101     set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3102     return 0;
3103 }
3104
3105 static void append_character (const char x, byte *text, int *dots_made, int *len, int size)
3106 {
3107     if (*dots_made == 0)
3108     {
3109         if (*len < size - 1)
3110         {
3111             text[(*len)++] = x;
3112             text[*len] = '\0';
3113         }
3114         else
3115         {
3116             int i;
3117             for (i = 0; i < 3; i++)
3118                 if (--(*len) >= 0)
3119                     text[*len] = '.';
3120             *dots_made = 1;
3121         }
3122     }
3123 }
3124
3125 void grammar_get_last_error (byte *text, unsigned int size, int *pos)
3126 {
3127     int len = 0, dots_made = 0;
3128     const byte *p = error_message;
3129
3130     *text = '\0';
3131
3132     if (p)
3133     {
3134         while (*p)
3135         {
3136             if (*p == '$')
3137             {
3138                 const byte *r = error_param;
3139
3140                 while (*r)
3141                 {
3142                     append_character (*r++, text, &dots_made, &len, (int) size);
3143                 }
3144
3145                 p++;
3146             }
3147             else
3148             {
3149                 append_character (*p++, text, &dots_made, &len, size);
3150             }
3151         }
3152     }
3153
3154     *pos = error_position;
3155 }