src/mesa/shader/grammar/grammar.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.6
   4  *
   5  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file grammar.c
  27  * syntax parsing engine
  28  * \author Michal Krol
  29  */
  30
  31 #ifndef GRAMMAR_PORT_BUILD
  32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
  33 #endif
  34
  35 /*
  36 */
  37
  38 /*
  39     INTRODUCTION
  40     ------------
  41
  42     The task is to check the syntax of an input string. Input string is a stream of ASCII
  43     characters terminated with a null-character ('\0'). Checking it using C language is
  44     difficult and hard to implement without bugs. It is hard to maintain and make changes when
  45     the syntax changes.
  46
  47     This is because of a high redundancy of the C code. Large blocks of code are duplicated with
  48     only small changes. Even use of macros does not solve the problem because macros cannot
  49     erase the complexity of the problem.
  50
  51     The resolution is to create a new language that will be highly oriented to our task. Once
  52     we describe a particular syntax, we are done. We can then focus on the code that implements
  53     the language. The size and complexity of it is relatively small than the code that directly
  54     checks the syntax.
  55
  56     First, we must implement our new language. Here, the language is implemented in C, but it
  57     could also be implemented in any other language. The code is listed below. We must take
  58     a good care that it is bug free. This is simple because the code is simple and clean.
  59
  60     Next, we must describe the syntax of our new language in itself. Once created and checked
  61     manually that it is correct, we can use it to check another scripts.
  62
  63     Note that our new language loading code does not have to check the syntax. It is because we
  64     assume that the script describing itself is correct, and other scripts can be syntactically
  65     checked by the former script. The loading code must only do semantic checking which leads us to
  66     simple resolving references.
  67
  68     THE LANGUAGE
  69     ------------
  70
  71     Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
  72     sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
  73     which is an identifier, and its definition. A definition is in turn a sequence of specifiers
  74     connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
  75     definition. Specifier can be a symbol, string, character, character range or a special
  76     keyword ".true" or ".false".
  77
  78     On the very beginning of the script there is a declaration of a root symbol and is in the form:
  79         .syntax <root_symbol>;
  80     The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
  81     the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
  82     the symbol evaluates to true. Definition evaluation depends on the operator used to connect
  83     specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
  84     only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
  85     true if any of the specifiers evaluates to true. If definition contains only one specifier,
  86     it is evaluated as if it was connected with ".true" keyword by ".and" operator.
  87
  88     If specifier is a ".true" keyword, it always evaluates to true.
  89
  90     If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
  91     when it does not evaluate to true.
  92
  93     Character range specifier is in the form:
  94         '<first_character>' - '<second_character>'
  95     If specifier is a character range, it evaluates to true if character in the stream is greater
  96     or equal to <first_character> and less or equal to <second_character>. In that situation
  97     the stream pointer is advanced to point to next character in the stream. All C-style escape
  98     sequences are supported although trigraph sequences are not. The comparisions are performed
  99     on 8-bit unsigned integers.
 100
 101     Character specifier is in the form:
 102         '<single_character>'
 103     It evaluates to true if the following character range specifier evaluates to true:
 104         '<single_character>' - '<single_character>'
 105
 106     String specifier is in the form:
 107         "<string>"
 108     Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
 109     <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
 110     the following character specifier evaluates to true:
 111         '<string>[i]'
 112     If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
 113
 114     Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
 115         .loop <symbol>                  (1)
 116     where <symbol> is defined as follows:
 117         <symbol> <definition>;          (2)
 118     Construction (1) is replaced by the following code:
 119         <symbol$1>
 120     and declaration (2) is replaced by the following:
 121         <symbol$1> <symbol$2> .or .true;
 122         <symbol$2> <symbol> .and <symbol$1>;
 123         <symbol> <definition>;
 124
 125     Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
 126     registers that can be accessed in the syn body. Each reg has its name and a default value.
 127     The register is one byte wide. The C code can change the default value by calling
 128     grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
 129     a sequence of specifiers joined with .and or .or operator. And now each specifier can be
 130     prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
 131     where <operator> can be == or !=. If the condition evaluates to false, the specifier
 132     evaluates to .false. Otherwise it evalutes to the specifier.
 133
 134     ESCAPE SEQUENCES
 135     ----------------
 136
 137     Synek supports all escape sequences in character specifiers. The mapping table is listed below.
 138     All occurences of the characters in the first column are replaced with the corresponding
 139     character in the second column.
 140
 141         Escape sequence         Represents
 142     ------------------------------------------------------------------------------------------------
 143         \a                      Bell (alert)
 144         \b                      Backspace
 145         \f                      Formfeed
 146         \n                      New line
 147         \r                      Carriage return
 148         \t                      Horizontal tab
 149         \v                      Vertical tab
 150         \'                      Single quotation mark
 151         \"                      Double quotation mark
 152         \\                      Backslash
 153         \?                      Literal question mark
 154         \ooo                    ASCII character in octal notation
 155         \xhhh                   ASCII character in hexadecimal notation
 156     ------------------------------------------------------------------------------------------------
 157
 158     RAISING ERRORS
 159     --------------
 160
 161     Any specifier can be followed by a special construction that is executed when the specifier
 162     evaluates to false. The construction is in the form:
 163         .error <ERROR_TEXT>
 164     <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
 165     in the form:
 166         .errtext <ERROR_TEXT> "<error_desc>"
 167     When specifier evaluates to false and this construction is present, parsing is stopped
 168     immediately and <error_desc> is returned as a result of parsing. The error position is also
 169     returned and it is meant as an offset from the beggining of the stream to the character that
 170     was valid so far. Example:
 171
 172         (**** syntax script ****)
 173
 174         .syntax program;
 175         .errtext MISSING_SEMICOLON      "missing ';'"
 176         program         declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
 177                         .loop space .and '\0';
 178         declaration     "declare" .and .loop space .and identifier;
 179         space           ' ';
 180
 181         (**** sample code ****)
 182
 183         declare foo ,
 184
 185     In the example above checking the sample code will result in error message "missing ';'" and
 186     error position 12. The sample code is not correct. Note the presence of '\0' specifier to
 187     assure that there is no code after semicolon - only spaces.
 188     <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
 189     the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
 190     the identifier name. The starting position is the error position. The lenght of the resulting
 191     string is the position after invoking the symbol.
 192
 193     PRODUCTION
 194     ----------
 195
 196     Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
 197     that evaluate to true. That is, every specifier and optional error construction can be followed
 198     by a number of emit constructions that are in the form:
 199         .emit <parameter>
 200     <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
 201     0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
 202     in the form:
 203         .emtcode <identifier> <hex_number>
 204
 205     When given specifier evaluates to true, all emits associated with the specifier are output
 206     in order they were declared. A star means that last-read character should be output instead
 207     of constant value. Example:
 208
 209         (**** syntax script ****)
 210
 211         .syntax foobar;
 212         .emtcode WORD_FOO       0x01
 213         .emtcode WORD_BAR       0x02
 214         foobar      FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
 215         FOO         "foo" .and SPACE;
 216         BAR         "bar" .and SPACE;
 217         SPACE       ' ' .or '\0';
 218
 219         (**** sample text 1 ****)
 220
 221         foo
 222
 223         (**** sample text 2 ****)
 224
 225         foobar
 226
 227     For both samples the result will be one-element array. For first sample text it will be
 228     value 1, for second - 0. Note that every text will be accepted because of presence of
 229     .true as an alternative.
 230
 231     Another example:
 232
 233         (**** syntax script ****)
 234
 235         .syntax declaration;
 236         .emtcode VARIABLE       0x01
 237         declaration     "declare" .and .loop space .and
 238                         identifier .emit VARIABLE .and          (1)
 239                         .true .emit 0x00 .and                   (2)
 240                         .loop space .and ';';
 241         space           ' ' .or '\t';
 242         identifier      .loop id_char .emit *;                  (3)
 243         id_char         'a'-'z' .or 'A'-'Z' .or '_';
 244
 245         (**** sample code ****)
 246
 247         declare    fubar;
 248
 249     In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
 250     true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
 251     to terminate the string with null to signal when the string ends. Specifier (3) outputs
 252     all characters that make declared identifier. The result of sample code will be the
 253     following array:
 254         { 1, 'f', 'u', 'b', 'a', 'r', 0 }
 255
 256     If .emit is followed by dollar $, it means that current position should be output. Current
 257     position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
 258     first character consumed by the specifier associated with the .emit instruction. Current
 259     position is stored in the output buffer in Little-Endian convention (the lowest byte comes
 260     first).
 261 */
 262
 263 #include <stdio.h>
 264
 265 static void mem_free (void **);
 266
 267 /*
 268     internal error messages
 269 */
 270 static const byte *OUT_OF_MEMORY =          (byte *) "internal error 1001: out of physical memory";
 271 static const byte *UNRESOLVED_REFERENCE =   (byte *) "internal error 1002: unresolved reference '$'";
 272 static const byte *INVALID_GRAMMAR_ID =     (byte *) "internal error 1003: invalid grammar object";
 273 static const byte *INVALID_REGISTER_NAME =  (byte *) "internal error 1004: invalid register name: '$'";
 274 /*static const byte *DUPLICATE_IDENTIFIER =   (byte *) "internal error 1005: identifier '$' already defined";*/
 275 static const byte *UNREFERENCED_IDENTIFIER =(byte *) "internal error 1006: unreferenced identifier '$'";
 276
 277 static const byte *error_message = NULL;    /* points to one of the error messages above */
 278 static byte *error_param = NULL;        /* this is inserted into error_message in place of $ */
 279 static int error_position = -1;
 280
 281 static byte *unknown = (byte *) "???";
 282
 283 static void clear_last_error (void)
 284 {
 285     /* reset error message */
 286     error_message = NULL;
 287
 288     /* free error parameter - if error_param is a "???" don't free it - it's static */
 289     if (error_param != unknown)
 290         mem_free ((void **) (void *) &error_param);
 291     else
 292         error_param = NULL;
 293
 294     /* reset error position */
 295     error_position = -1;
 296 }
 297
 298 static void set_last_error (const byte *msg, byte *param, int pos)
 299 {
 300     /* error message can be set only once */
 301     if (error_message != NULL)
 302     {
 303         mem_free ((void **) (void *) &param);
 304         return;
 305     }
 306
 307     error_message = msg;
 308
 309     /* if param is NULL, set error_param to unknown ("???") */
 310     /* note: do not try to strdup the "???" - it may be that we are here because of */
 311     /* out of memory error so strdup can fail */
 312     if (param != NULL)
 313         error_param = param;
 314     else
 315         error_param = unknown;
 316
 317     error_position = pos;
 318 }
 319
 320 /*
 321     memory management routines
 322 */
 323 static void *mem_alloc (size_t size)
 324 {
 325     void *ptr = grammar_alloc_malloc (size);
 326     if (ptr == NULL)
 327         set_last_error (OUT_OF_MEMORY, NULL, -1);
 328     return ptr;
 329 }
 330
 331 static void *mem_copy (void *dst, const void *src, size_t size)
 332 {
 333     return grammar_memory_copy (dst, src, size);
 334 }
 335
 336 static void mem_free (void **ptr)
 337 {
 338     grammar_alloc_free (*ptr);
 339     *ptr = NULL;
 340 }
 341
 342 static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
 343 {
 344     void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
 345     if (ptr2 == NULL)
 346         set_last_error (OUT_OF_MEMORY, NULL, -1);
 347     return ptr2;
 348 }
 349
 350 static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
 351 {
 352     return grammar_string_copy_n (dst, src, max_len);
 353 }
 354
 355 static byte *str_duplicate (const byte *str)
 356 {
 357     byte *new_str = grammar_string_duplicate (str);
 358     if (new_str == NULL)
 359         set_last_error (OUT_OF_MEMORY, NULL, -1);
 360     return new_str;
 361 }
 362
 363 static int str_equal (const byte *str1, const byte *str2)
 364 {
 365     return grammar_string_compare (str1, str2) == 0;
 366 }
 367
 368 static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
 369 {
 370     return grammar_string_compare_n (str1, str2, n) == 0;
 371 }
 372
 373 static int
 374 str_length (const byte *str)
 375 {
 376    return (int) (grammar_string_length (str));
 377 }
 378
 379 /*
 380     useful macros
 381 */
 382 #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
 383     static void _Ty##_append (_Ty **x, _Ty *nx) {\
 384         while (*x) x = &(**x).next;\
 385         *x = nx;\
 386     }
 387
 388 /*
 389     string to byte map typedef
 390 */
 391 typedef struct map_byte_
 392 {
 393     byte *key;
 394     byte data;
 395     struct map_byte_ *next;
 396 } map_byte;
 397
 398 static void map_byte_create (map_byte **ma)
 399 {
 400     *ma = (map_byte *) mem_alloc (sizeof (map_byte));
 401     if (*ma)
 402     {
 403         (**ma).key = NULL;
 404         (**ma).data = '\0';
 405         (**ma).next = NULL;
 406     }
 407 }
 408
 409 static void map_byte_destroy (map_byte **ma)
 410 {
 411     if (*ma)
 412     {
 413         map_byte_destroy (&(**ma).next);
 414         mem_free ((void **) &(**ma).key);
 415         mem_free ((void **) ma);
 416     }
 417 }
 418
 419 GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte)
 420
 421 /*
 422     searches the map for the specified key,
 423     returns pointer to the element with the specified key if it exists
 424     returns NULL otherwise
 425 */
 426 static map_byte *map_byte_locate (map_byte **ma, const byte *key)
 427 {
 428     while (*ma)
 429     {
 430         if (str_equal ((**ma).key, key))
 431             return *ma;
 432
 433         ma = &(**ma).next;
 434     }
 435
 436     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
 437     return NULL;
 438 }
 439
 440 /*
 441     searches the map for specified key,
 442     if the key is matched, *data is filled with data associated with the key,
 443     returns 0 if the key is matched,
 444     returns 1 otherwise
 445 */
 446 static int map_byte_find (map_byte **ma, const byte *key, byte *data)
 447 {
 448     map_byte *found = map_byte_locate (ma, key);
 449     if (found != NULL)
 450     {
 451         *data = found->data;
 452
 453         return 0;
 454     }
 455
 456     return 1;
 457 }
 458
 459 /*
 460     regbyte context typedef
 461
 462     Each regbyte consists of its name and a default value. These are static and created at
 463     grammar script compile-time, for example the following line:
 464         .regbyte vertex_blend      0x00
 465     adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
 466     When the script is executed, this regbyte can be accessed by name for read and write. When a
 467     particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
 468     stack. The new entry contains information abot which regbyte it references and its new value.
 469     When a given regbyte is accessed for read, the stack is searched top-down to find an
 470     entry that references the regbyte. The first matching entry is used to return the current
 471     value it holds. If no entry is found, the default value is returned.
 472 */
 473 typedef struct regbyte_ctx_
 474 {
 475     map_byte *m_regbyte;
 476     byte m_current_value;
 477     struct regbyte_ctx_ *m_prev;
 478 } regbyte_ctx;
 479
 480 static void regbyte_ctx_create (regbyte_ctx **re)
 481 {
 482     *re = (regbyte_ctx *) mem_alloc (sizeof (regbyte_ctx));
 483     if (*re)
 484     {
 485         (**re).m_regbyte = NULL;
 486         (**re).m_prev = NULL;
 487     }
 488 }
 489
 490 static void regbyte_ctx_destroy (regbyte_ctx **re)
 491 {
 492     if (*re)
 493     {
 494         mem_free ((void **) re);
 495     }
 496 }
 497
 498 static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
 499 {
 500     /* first lookup in the register stack */
 501     while (*re != NULL)
 502     {
 503         if ((**re).m_regbyte == reg)
 504             return (**re).m_current_value;
 505
 506         re = &(**re).m_prev;
 507     }
 508
 509     /* if not found - return the default value */
 510     return reg->data;
 511 }
 512
 513 /*
 514     emit type typedef
 515 */
 516 typedef enum emit_type_
 517 {
 518     et_byte,            /* explicit number */
 519     et_stream,          /* eaten character */
 520     et_position         /* current position */
 521 } emit_type;
 522
 523 /*
 524     emit destination typedef
 525 */
 526 typedef enum emit_dest_
 527 {
 528     ed_output,          /* write to the output buffer */
 529     ed_regbyte          /* write a particular regbyte */
 530 } emit_dest;
 531
 532 /*
 533     emit typedef
 534 */
 535 typedef struct emit_
 536 {
 537     emit_dest m_emit_dest;
 538     emit_type m_emit_type;      /* ed_output */
 539     byte m_byte;                /* et_byte */
 540     map_byte *m_regbyte;        /* ed_regbyte */
 541     byte *m_regname;            /* ed_regbyte - temporary */
 542     struct emit_ *m_next;
 543 } emit;
 544
 545 static void emit_create (emit **em)
 546 {
 547     *em = (emit *) mem_alloc (sizeof (emit));
 548     if (*em)
 549     {
 550         (**em).m_emit_dest = ed_output;
 551         (**em).m_emit_type = et_byte;
 552         (**em).m_byte = '\0';
 553         (**em).m_regbyte = NULL;
 554         (**em).m_regname = NULL;
 555         (**em).m_next = NULL;
 556     }
 557 }
 558
 559 static void emit_destroy (emit **em)
 560 {
 561     if (*em)
 562     {
 563         emit_destroy (&(**em).m_next);
 564         mem_free ((void **) &(**em).m_regname);
 565         mem_free ((void **) em);
 566     }
 567 }
 568
 569 static unsigned int emit_size (emit *_E)
 570 {
 571     unsigned int n = 0;
 572
 573     while (_E != NULL)
 574     {
 575         if (_E->m_emit_dest == ed_output)
 576         {
 577             if (_E->m_emit_type == et_position)
 578                 n += 4;     /* position is a 32-bit unsigned integer */
 579             else
 580                 n++;
 581         }
 582         _E = _E->m_next;
 583     }
 584
 585     return n;
 586 }
 587
 588 static int emit_push (emit *_E, byte *_P, byte c, unsigned int _Pos, regbyte_ctx **_Ctx)
 589 {
 590     while (_E != NULL)
 591     {
 592         if (_E->m_emit_dest == ed_output)
 593         {
 594             if (_E->m_emit_type == et_byte)
 595                 *_P++ = _E->m_byte;
 596             else if (_E->m_emit_type == et_stream)
 597                 *_P++ = c;
 598             else /* _Em->type == et_position */
 599             {
 600                 *_P++ = (byte) (_Pos);
 601                 *_P++ = (byte) (_Pos >> 8);
 602                 *_P++ = (byte) (_Pos >> 16);
 603                 *_P++ = (byte) (_Pos >> 24);
 604             }
 605         }
 606         else
 607         {
 608             regbyte_ctx *new_rbc;
 609             regbyte_ctx_create (&new_rbc);
 610             if (new_rbc == NULL)
 611                 return 1;
 612
 613             new_rbc->m_prev = *_Ctx;
 614             new_rbc->m_regbyte = _E->m_regbyte;
 615             *_Ctx = new_rbc;
 616
 617             if (_E->m_emit_type == et_byte)
 618                 new_rbc->m_current_value = _E->m_byte;
 619             else if (_E->m_emit_type == et_stream)
 620                 new_rbc->m_current_value = c;
 621         }
 622
 623         _E = _E->m_next;
 624     }
 625
 626     return 0;
 627 }
 628
 629 /*
 630     error typedef
 631 */
 632 typedef struct error_
 633 {
 634     byte *m_text;
 635     byte *m_token_name;
 636     struct rule_ *m_token;
 637 } error;
 638
 639 static void error_create (error **er)
 640 {
 641     *er = (error *) mem_alloc (sizeof (error));
 642     if (*er)
 643     {
 644         (**er).m_text = NULL;
 645         (**er).m_token_name = NULL;
 646         (**er).m_token = NULL;
 647     }
 648 }
 649
 650 static void error_destroy (error **er)
 651 {
 652     if (*er)
 653     {
 654         mem_free ((void **) &(**er).m_text);
 655         mem_free ((void **) &(**er).m_token_name);
 656         mem_free ((void **) er);
 657     }
 658 }
 659
 660 struct dict_;
 661
 662 static byte *
 663 error_get_token (error *, struct dict_ *, const byte *, int);
 664
 665 /*
 666     condition operand type typedef
 667 */
 668 typedef enum cond_oper_type_
 669 {
 670     cot_byte,               /* constant 8-bit unsigned integer */
 671     cot_regbyte             /* pointer to byte register containing the current value */
 672 } cond_oper_type;
 673
 674 /*
 675     condition operand typedef
 676 */
 677 typedef struct cond_oper_
 678 {
 679     cond_oper_type m_type;
 680     byte m_byte;            /* cot_byte */
 681     map_byte *m_regbyte;    /* cot_regbyte */
 682     byte *m_regname;        /* cot_regbyte - temporary */
 683 } cond_oper;
 684
 685 /*
 686     condition type typedef
 687 */
 688 typedef enum cond_type_
 689 {
 690     ct_equal,
 691     ct_not_equal
 692 } cond_type;
 693
 694 /*
 695     condition typedef
 696 */
 697 typedef struct cond_
 698 {
 699     cond_type m_type;
 700     cond_oper m_operands[2];
 701 } cond;
 702
 703 static void cond_create (cond **co)
 704 {
 705     *co = (cond *) mem_alloc (sizeof (cond));
 706     if (*co)
 707     {
 708         (**co).m_operands[0].m_regname = NULL;
 709         (**co).m_operands[1].m_regname = NULL;
 710     }
 711 }
 712
 713 static void cond_destroy (cond **co)
 714 {
 715     if (*co)
 716     {
 717         mem_free ((void **) &(**co).m_operands[0].m_regname);
 718         mem_free ((void **) &(**co).m_operands[1].m_regname);
 719         mem_free ((void **) co);
 720     }
 721 }
 722
 723 /*
 724     specifier type typedef
 725 */
 726 typedef enum spec_type_
 727 {
 728     st_false,
 729     st_true,
 730     st_byte,
 731     st_byte_range,
 732     st_string,
 733     st_identifier,
 734     st_identifier_loop,
 735     st_debug
 736 } spec_type;
 737
 738 /*
 739     specifier typedef
 740 */
 741 typedef struct spec_
 742 {
 743     spec_type m_spec_type;
 744     byte m_byte[2];                 /* st_byte, st_byte_range */
 745     byte *m_string;                 /* st_string */
 746     struct rule_ *m_rule;           /* st_identifier, st_identifier_loop */
 747     emit *m_emits;
 748     error *m_errtext;
 749     cond *m_cond;
 750     struct spec_ *next;
 751 } spec;
 752
 753 static void spec_create (spec **sp)
 754 {
 755     *sp = (spec *) mem_alloc (sizeof (spec));
 756     if (*sp)
 757     {
 758         (**sp).m_spec_type = st_false;
 759         (**sp).m_byte[0] = '\0';
 760         (**sp).m_byte[1] = '\0';
 761         (**sp).m_string = NULL;
 762         (**sp).m_rule = NULL;
 763         (**sp).m_emits = NULL;
 764         (**sp).m_errtext = NULL;
 765         (**sp).m_cond = NULL;
 766         (**sp).next = NULL;
 767     }
 768 }
 769
 770 static void spec_destroy (spec **sp)
 771 {
 772     if (*sp)
 773     {
 774         spec_destroy (&(**sp).next);
 775         emit_destroy (&(**sp).m_emits);
 776         error_destroy (&(**sp).m_errtext);
 777         mem_free ((void **) &(**sp).m_string);
 778         cond_destroy (&(**sp).m_cond);
 779         mem_free ((void **) sp);
 780     }
 781 }
 782
 783 GRAMMAR_IMPLEMENT_LIST_APPEND(spec)
 784
 785 /*
 786     operator typedef
 787 */
 788 typedef enum oper_
 789 {
 790     op_none,
 791     op_and,
 792     op_or
 793 } oper;
 794
 795 /*
 796     rule typedef
 797 */
 798 typedef struct rule_
 799 {
 800     oper m_oper;
 801     spec *m_specs;
 802     struct rule_ *next;
 803     int m_referenced;
 804 } rule;
 805
 806 static void rule_create (rule **ru)
 807 {
 808     *ru = (rule *) mem_alloc (sizeof (rule));
 809     if (*ru)
 810     {
 811         (**ru).m_oper = op_none;
 812         (**ru).m_specs = NULL;
 813         (**ru).next = NULL;
 814         (**ru).m_referenced = 0;
 815     }
 816 }
 817
 818 static void rule_destroy (rule **ru)
 819 {
 820     if (*ru)
 821     {
 822         rule_destroy (&(**ru).next);
 823         spec_destroy (&(**ru).m_specs);
 824         mem_free ((void **) ru);
 825     }
 826 }
 827
 828 GRAMMAR_IMPLEMENT_LIST_APPEND(rule)
 829
 830 /*
 831     returns unique grammar id
 832 */
 833 static grammar next_valid_grammar_id (void)
 834 {
 835     static grammar id = 0;
 836
 837     return ++id;
 838 }
 839
 840 /*
 841     dictionary typedef
 842 */
 843 typedef struct dict_
 844 {
 845     rule *m_rulez;
 846     rule *m_syntax;
 847     rule *m_string;
 848     map_byte *m_regbytes;
 849     grammar m_id;
 850     struct dict_ *next;
 851 } dict;
 852
 853 static void dict_create (dict **di)
 854 {
 855     *di = (dict *) mem_alloc (sizeof (dict));
 856     if (*di)
 857     {
 858         (**di).m_rulez = NULL;
 859         (**di).m_syntax = NULL;
 860         (**di).m_string = NULL;
 861         (**di).m_regbytes = NULL;
 862         (**di).m_id = next_valid_grammar_id ();
 863         (**di).next = NULL;
 864     }
 865 }
 866
 867 static void dict_destroy (dict **di)
 868 {
 869     if (*di)
 870     {
 871         rule_destroy (&(**di).m_rulez);
 872         map_byte_destroy (&(**di).m_regbytes);
 873         mem_free ((void **) di);
 874     }
 875 }
 876
 877 GRAMMAR_IMPLEMENT_LIST_APPEND(dict)
 878
 879 static void dict_find (dict **di, grammar key, dict **data)
 880 {
 881     while (*di)
 882     {
 883         if ((**di).m_id == key)
 884         {
 885             *data = *di;
 886             return;
 887         }
 888
 889         di = &(**di).next;
 890     }
 891
 892     *data = NULL;
 893 }
 894
 895 static dict *g_dicts = NULL;
 896
 897 /*
 898     byte array typedef
 899 */
 900 typedef struct barray_
 901 {
 902     byte *data;
 903     unsigned int len;
 904 } barray;
 905
 906 static void barray_create (barray **ba)
 907 {
 908     *ba = (barray *) mem_alloc (sizeof (barray));
 909     if (*ba)
 910     {
 911         (**ba).data = NULL;
 912         (**ba).len = 0;
 913     }
 914 }
 915
 916 static void barray_destroy (barray **ba)
 917 {
 918     if (*ba)
 919     {
 920         mem_free ((void **) &(**ba).data);
 921         mem_free ((void **) ba);
 922     }
 923 }
 924
 925 /*
 926     reallocates byte array to requested size,
 927     returns 0 on success,
 928     returns 1 otherwise
 929 */
 930 static int barray_resize (barray **ba, unsigned int nlen)
 931 {
 932     byte *new_pointer;
 933
 934     if (nlen == 0)
 935     {
 936         mem_free ((void **) &(**ba).data);
 937         (**ba).data = NULL;
 938         (**ba).len = 0;
 939
 940         return 0;
 941     }
 942     else
 943     {
 944         new_pointer = (byte *) mem_realloc ((**ba).data, (**ba).len * sizeof (byte),
 945             nlen * sizeof (byte));
 946         if (new_pointer)
 947         {
 948             (**ba).data = new_pointer;
 949             (**ba).len = nlen;
 950
 951             return 0;
 952         }
 953     }
 954
 955     return 1;
 956 }
 957
 958 /*
 959     adds byte array pointed by *nb to the end of array pointed by *ba,
 960     returns 0 on success,
 961     returns 1 otherwise
 962 */
 963 static int barray_append (barray **ba, barray **nb)
 964 {
 965     const unsigned int len = (**ba).len;
 966
 967     if (barray_resize (ba, (**ba).len + (**nb).len))
 968         return 1;
 969
 970     mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
 971
 972     return 0;
 973 }
 974
 975 /*
 976     adds emit chain pointed by em to the end of array pointed by *ba,
 977     returns 0 on success,
 978     returns 1 otherwise
 979 */
 980 static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
 981 {
 982     unsigned int count = emit_size (em);
 983
 984     if (barray_resize (ba, (**ba).len + count))
 985         return 1;
 986
 987     return emit_push (em, (**ba).data + ((**ba).len - count), c, pos, rbc);
 988 }
 989
 990 /*
 991     byte pool typedef
 992 */
 993 typedef struct bytepool_
 994 {
 995     byte *_F;
 996     unsigned int _Siz;
 997 } bytepool;
 998
 999 static void bytepool_destroy (bytepool **by)
1000 {
1001     if (*by != NULL)
1002     {
1003         mem_free ((void **) &(**by)._F);
1004         mem_free ((void **) by);
1005     }
1006 }
1007
1008 static void bytepool_create (bytepool **by, int len)
1009 {
1010     *by = (bytepool *) (mem_alloc (sizeof (bytepool)));
1011     if (*by != NULL)
1012     {
1013         (**by)._F = (byte *) (mem_alloc (sizeof (byte) * len));
1014         (**by)._Siz = len;
1015
1016         if ((**by)._F == NULL)
1017             bytepool_destroy (by);
1018     }
1019 }
1020
1021 static int bytepool_reserve (bytepool *by, unsigned int n)
1022 {
1023     byte *_P;
1024
1025     if (n <= by->_Siz)
1026         return 0;
1027
1028     /* byte pool can only grow and at least by doubling its size */
1029     n = n >= by->_Siz * 2 ? n : by->_Siz * 2;
1030
1031     /* reallocate the memory and adjust pointers to the new memory location */
1032     _P = (byte *) (mem_realloc (by->_F, sizeof (byte) * by->_Siz, sizeof (byte) * n));
1033     if (_P != NULL)
1034     {
1035         by->_F = _P;
1036         by->_Siz = n;
1037         return 0;
1038     }
1039
1040     return 1;
1041 }
1042
1043 /*
1044     string to string map typedef
1045 */
1046 typedef struct map_str_
1047 {
1048     byte *key;
1049     byte *data;
1050     struct map_str_ *next;
1051 } map_str;
1052
1053 static void map_str_create (map_str **ma)
1054 {
1055     *ma = (map_str *) mem_alloc (sizeof (map_str));
1056     if (*ma)
1057     {
1058         (**ma).key = NULL;
1059         (**ma).data = NULL;
1060         (**ma).next = NULL;
1061     }
1062 }
1063
1064 static void map_str_destroy (map_str **ma)
1065 {
1066     if (*ma)
1067     {
1068         map_str_destroy (&(**ma).next);
1069         mem_free ((void **) &(**ma).key);
1070         mem_free ((void **) &(**ma).data);
1071         mem_free ((void **) ma);
1072     }
1073 }
1074
1075 GRAMMAR_IMPLEMENT_LIST_APPEND(map_str)
1076
1077 /*
1078     searches the map for specified key,
1079     if the key is matched, *data is filled with data associated with the key,
1080     returns 0 if the key is matched,
1081     returns 1 otherwise
1082 */
1083 static int map_str_find (map_str **ma, const byte *key, byte **data)
1084 {
1085     while (*ma)
1086     {
1087         if (str_equal ((**ma).key, key))
1088         {
1089             *data = str_duplicate ((**ma).data);
1090             if (*data == NULL)
1091                 return 1;
1092
1093             return 0;
1094         }
1095
1096         ma = &(**ma).next;
1097     }
1098
1099     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1100     return 1;
1101 }
1102
1103 /*
1104     string to rule map typedef
1105 */
1106 typedef struct map_rule_
1107 {
1108     byte *key;
1109     rule *data;
1110     struct map_rule_ *next;
1111 } map_rule;
1112
1113 static void map_rule_create (map_rule **ma)
1114 {
1115     *ma = (map_rule *) mem_alloc (sizeof (map_rule));
1116     if (*ma)
1117     {
1118         (**ma).key = NULL;
1119         (**ma).data = NULL;
1120         (**ma).next = NULL;
1121     }
1122 }
1123
1124 static void map_rule_destroy (map_rule **ma)
1125 {
1126     if (*ma)
1127     {
1128         map_rule_destroy (&(**ma).next);
1129         mem_free ((void **) &(**ma).key);
1130         mem_free ((void **) ma);
1131     }
1132 }
1133
1134 GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule)
1135
1136 /*
1137     searches the map for specified key,
1138     if the key is matched, *data is filled with data associated with the key,
1139     returns 0 if the is matched,
1140     returns 1 otherwise
1141 */
1142 static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1143 {
1144     while (*ma)
1145     {
1146         if (str_equal ((**ma).key, key))
1147         {
1148             *data = (**ma).data;
1149
1150             return 0;
1151         }
1152
1153         ma = &(**ma).next;
1154     }
1155
1156     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1157     return 1;
1158 }
1159
1160 /*
1161     returns 1 if given character is a white space,
1162     returns 0 otherwise
1163 */
1164 static int is_space (byte c)
1165 {
1166     return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1167 }
1168
1169 /*
1170     advances text pointer by 1 if character pointed by *text is a space,
1171     returns 1 if a space has been eaten,
1172     returns 0 otherwise
1173 */
1174 static int eat_space (const byte **text)
1175 {
1176     if (is_space (**text))
1177     {
1178         (*text)++;
1179
1180         return 1;
1181     }
1182
1183     return 0;
1184 }
1185
1186 /*
1187     returns 1 if text points to C-style comment start string,
1188     returns 0 otherwise
1189 */
1190 static int is_comment_start (const byte *text)
1191 {
1192     return text[0] == '/' && text[1] == '*';
1193 }
1194
1195 /*
1196     advances text pointer to first character after C-style comment block - if any,
1197     returns 1 if C-style comment block has been encountered and eaten,
1198     returns 0 otherwise
1199 */
1200 static int eat_comment (const byte **text)
1201 {
1202     if (is_comment_start (*text))
1203     {
1204         /* *text points to comment block - skip two characters to enter comment body */
1205         *text += 2;
1206         /* skip any character except consecutive '*' and '/' */
1207         while (!((*text)[0] == '*' && (*text)[1] == '/'))
1208             (*text)++;
1209         /* skip those two terminating characters */
1210         *text += 2;
1211
1212         return 1;
1213     }
1214
1215     return 0;
1216 }
1217
1218 /*
1219     advances text pointer to first character that is neither space nor C-style comment block
1220 */
1221 static void eat_spaces (const byte **text)
1222 {
1223     while (eat_space (text) || eat_comment (text))
1224         ;
1225 }
1226
1227 /*
1228     resizes string pointed by *ptr to successfully add character c to the end of the string,
1229     returns 0 on success,
1230     returns 1 otherwise
1231 */
1232 static int string_grow (byte **ptr, unsigned int *len, byte c)
1233 {
1234     /* reallocate the string in 16-byte increments */
1235     if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1236     {
1237         byte *tmp = (byte *) mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
1238             ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1239         if (tmp == NULL)
1240             return 1;
1241
1242         *ptr = tmp;
1243     }
1244
1245     if (c)
1246     {
1247         /* append given character */
1248         (*ptr)[*len] = c;
1249         (*len)++;
1250     }
1251     (*ptr)[*len] = '\0';
1252
1253     return 0;
1254 }
1255
1256 /*
1257     returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1258     returns 0 otherwise
1259 */
1260 static int is_identifier (byte c)
1261 {
1262     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1263 }
1264
1265 /*
1266     copies characters from *text to *id until non-identifier character is encountered,
1267     assumes that *id points to NULL object - caller is responsible for later freeing the string,
1268     text pointer is advanced to point past the copied identifier,
1269     returns 0 if identifier was successfully copied,
1270     returns 1 otherwise
1271 */
1272 static int get_identifier (const byte **text, byte **id)
1273 {
1274     const byte *t = *text;
1275     byte *p = NULL;
1276     unsigned int len = 0;
1277
1278     if (string_grow (&p, &len, '\0'))
1279         return 1;
1280
1281     /* loop while next character in buffer is valid for identifiers */
1282     while (is_identifier (*t))
1283     {
1284         if (string_grow (&p, &len, *t++))
1285         {
1286             mem_free ((void **) (void *) &p);
1287             return 1;
1288         }
1289     }
1290
1291     *text = t;
1292     *id = p;
1293
1294     return 0;
1295 }
1296
1297 /*
1298     converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1299     advances text pointer past the converted sequence,
1300     returns the converted value
1301 */
1302 static unsigned int dec_convert (const byte **text)
1303 {
1304     unsigned int value = 0;
1305
1306     while (**text >= '0' && **text <= '9')
1307     {
1308         value = value * 10 + **text - '0';
1309         (*text)++;
1310     }
1311
1312     return value;
1313 }
1314
1315 /*
1316     returns 1 if given character is HEX digit 0-9, A-F or a-f,
1317     returns 0 otherwise
1318 */
1319 static int is_hex (byte c)
1320 {
1321     return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1322 }
1323
1324 /*
1325     returns value of passed character as if it was HEX digit
1326 */
1327 static unsigned int hex2dec (byte c)
1328 {
1329     if (c >= '0' && c <= '9')
1330         return c - '0';
1331     if (c >= 'A' && c <= 'F')
1332         return c - 'A' + 10;
1333     return c - 'a' + 10;
1334 }
1335
1336 /*
1337     converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1338     advances text pointer past the converted sequence,
1339     returns the converted value
1340 */
1341 static unsigned int hex_convert (const byte **text)
1342 {
1343     unsigned int value = 0;
1344
1345     while (is_hex (**text))
1346     {
1347         value = value * 0x10 + hex2dec (**text);
1348         (*text)++;
1349     }
1350
1351     return value;
1352 }
1353
1354 /*
1355     returns 1 if given character is OCT digit 0-7,
1356     returns 0 otherwise
1357 */
1358 static int is_oct (byte c)
1359 {
1360     return c >= '0' && c <= '7';
1361 }
1362
1363 /*
1364     returns value of passed character as if it was OCT digit
1365 */
1366 static int oct2dec (byte c)
1367 {
1368     return c - '0';
1369 }
1370
1371 static byte get_escape_sequence (const byte **text)
1372 {
1373     int value = 0;
1374
1375     /* skip '\' character */
1376     (*text)++;
1377
1378     switch (*(*text)++)
1379     {
1380     case '\'':
1381         return '\'';
1382     case '"':
1383         return '\"';
1384     case '?':
1385         return '\?';
1386     case '\\':
1387         return '\\';
1388     case 'a':
1389         return '\a';
1390     case 'b':
1391         return '\b';
1392     case 'f':
1393         return '\f';
1394     case 'n':
1395         return '\n';
1396     case 'r':
1397         return '\r';
1398     case 't':
1399         return '\t';
1400     case 'v':
1401         return '\v';
1402     case 'x':
1403         return (byte) hex_convert (text);
1404     }
1405
1406     (*text)--;
1407     if (is_oct (**text))
1408     {
1409         value = oct2dec (*(*text)++);
1410         if (is_oct (**text))
1411         {
1412             value = value * 010 + oct2dec (*(*text)++);
1413             if (is_oct (**text))
1414                 value = value * 010 + oct2dec (*(*text)++);
1415         }
1416     }
1417
1418     return (byte) value;
1419 }
1420
1421 /*
1422     copies characters from *text to *str until " or ' character is encountered,
1423     assumes that *str points to NULL object - caller is responsible for later freeing the string,
1424     assumes that *text points to " or ' character that starts the string,
1425     text pointer is advanced to point past the " or ' character,
1426     returns 0 if string was successfully copied,
1427     returns 1 otherwise
1428 */
1429 static int get_string (const byte **text, byte **str)
1430 {
1431     const byte *t = *text;
1432     byte *p = NULL;
1433     unsigned int len = 0;
1434     byte term_char;
1435
1436     if (string_grow (&p, &len, '\0'))
1437         return 1;
1438
1439     /* read " or ' character that starts the string */
1440     term_char = *t++;
1441     /* while next character is not the terminating character */
1442     while (*t && *t != term_char)
1443     {
1444         byte c;
1445
1446         if (*t == '\\')
1447             c = get_escape_sequence (&t);
1448         else
1449             c = *t++;
1450
1451         if (string_grow (&p, &len, c))
1452         {
1453             mem_free ((void **) (void *) &p);
1454             return 1;
1455         }
1456     }
1457     /* skip " or ' character that ends the string */
1458     t++;
1459
1460     *text = t;
1461     *str = p;
1462     return 0;
1463 }
1464
1465 /*
1466     gets emit code, the syntax is:
1467     ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1468     assumes that *text already points to <symbol>,
1469     returns 0 if emit code is successfully read,
1470     returns 1 otherwise
1471 */
1472 static int get_emtcode (const byte **text, map_byte **ma)
1473 {
1474     const byte *t = *text;
1475     map_byte *m = NULL;
1476
1477     map_byte_create (&m);
1478     if (m == NULL)
1479         return 1;
1480
1481     if (get_identifier (&t, &m->key))
1482     {
1483         map_byte_destroy (&m);
1484         return 1;
1485     }
1486     eat_spaces (&t);
1487
1488     if (*t == '\'')
1489     {
1490         byte *c;
1491
1492         if (get_string (&t, &c))
1493         {
1494             map_byte_destroy (&m);
1495             return 1;
1496         }
1497
1498         m->data = (byte) c[0];
1499         mem_free ((void **) (void *) &c);
1500     }
1501     else if (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))
1502     {
1503         /* skip HEX "0x" or "0X" prefix */
1504         t += 2;
1505         m->data = (byte) hex_convert (&t);
1506     }
1507     else
1508     {
1509         m->data = (byte) dec_convert (&t);
1510     }
1511
1512     eat_spaces (&t);
1513
1514     *text = t;
1515     *ma = m;
1516     return 0;
1517 }
1518
1519 /*
1520     gets regbyte declaration, the syntax is:
1521     ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1522     assumes that *text already points to <symbol>,
1523     returns 0 if regbyte is successfully read,
1524     returns 1 otherwise
1525 */
1526 static int get_regbyte (const byte **text, map_byte **ma)
1527 {
1528     /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
1529     return get_emtcode (text, ma);
1530 }
1531
1532 /*
1533     returns 0 on success,
1534     returns 1 otherwise
1535 */
1536 static int get_errtext (const byte **text, map_str **ma)
1537 {
1538     const byte *t = *text;
1539     map_str *m = NULL;
1540
1541     map_str_create (&m);
1542     if (m == NULL)
1543         return 1;
1544
1545     if (get_identifier (&t, &m->key))
1546     {
1547         map_str_destroy (&m);
1548         return 1;
1549     }
1550     eat_spaces (&t);
1551
1552     if (get_string (&t, &m->data))
1553     {
1554         map_str_destroy (&m);
1555         return 1;
1556     }
1557     eat_spaces (&t);
1558
1559     *text = t;
1560     *ma = m;
1561     return 0;
1562 }
1563
1564 /*
1565     returns 0 on success,
1566     returns 1 otherwise,
1567 */
1568 static int get_error (const byte **text, error **er, map_str *maps)
1569 {
1570     const byte *t = *text;
1571     byte *temp = NULL;
1572
1573     if (*t != '.')
1574         return 0;
1575
1576     t++;
1577     if (get_identifier (&t, &temp))
1578         return 1;
1579     eat_spaces (&t);
1580
1581     if (!str_equal ((byte *) "error", temp))
1582     {
1583         mem_free ((void **) (void *) &temp);
1584         return 0;
1585     }
1586
1587     mem_free ((void **) (void *) &temp);
1588
1589     error_create (er);
1590     if (*er == NULL)
1591         return 1;
1592
1593     if (*t == '\"')
1594     {
1595         if (get_string (&t, &(**er).m_text))
1596         {
1597             error_destroy (er);
1598             return 1;
1599         }
1600         eat_spaces (&t);
1601     }
1602     else
1603     {
1604         if (get_identifier (&t, &temp))
1605         {
1606             error_destroy (er);
1607             return 1;
1608         }
1609         eat_spaces (&t);
1610
1611         if (map_str_find (&maps, temp, &(**er).m_text))
1612         {
1613             mem_free ((void **) (void *) &temp);
1614             error_destroy (er);
1615             return 1;
1616         }
1617
1618         mem_free ((void **) (void *) &temp);
1619     }
1620
1621     /* try to extract "token" from "...$token$..." */
1622     {
1623         byte *processed = NULL;
1624         unsigned int len = 0;
1625       int i = 0;
1626
1627         if (string_grow (&processed, &len, '\0'))
1628         {
1629             error_destroy (er);
1630             return 1;
1631         }
1632
1633         while (i < str_length ((**er).m_text))
1634         {
1635             /* check if the dollar sign is repeated - if so skip it */
1636             if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1637             {
1638                 if (string_grow (&processed, &len, '$'))
1639                 {
1640                     mem_free ((void **) (void *) &processed);
1641                     error_destroy (er);
1642                     return 1;
1643                 }
1644
1645                 i += 2;
1646             }
1647             else if ((**er).m_text[i] != '$')
1648             {
1649                 if (string_grow (&processed, &len, (**er).m_text[i]))
1650                 {
1651                     mem_free ((void **) (void *) &processed);
1652                     error_destroy (er);
1653                     return 1;
1654                 }
1655
1656                 i++;
1657             }
1658             else
1659             {
1660                 if (string_grow (&processed, &len, '$'))
1661                 {
1662                     mem_free ((void **) (void *) &processed);
1663                     error_destroy (er);
1664                     return 1;
1665                 }
1666
1667                 {
1668                     /* length of token being extracted */
1669                     unsigned int tlen = 0;
1670
1671                     if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1672                     {
1673                         mem_free ((void **) (void *) &processed);
1674                         error_destroy (er);
1675                         return 1;
1676                     }
1677
1678                     /* skip the dollar sign */
1679                     i++;
1680
1681                     while ((**er).m_text[i] != '$')
1682                     {
1683                         if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1684                         {
1685                             mem_free ((void **) (void *) &processed);
1686                             error_destroy (er);
1687                             return 1;
1688                         }
1689
1690                         i++;
1691                     }
1692
1693                     /* skip the dollar sign */
1694                     i++;
1695                 }
1696             }
1697         }
1698
1699         mem_free ((void **) &(**er).m_text);
1700         (**er).m_text = processed;
1701     }
1702
1703     *text = t;
1704     return 0;
1705 }
1706
1707 /*
1708     returns 0 on success,
1709     returns 1 otherwise,
1710 */
1711 static int get_emits (const byte **text, emit **em, map_byte *mapb)
1712 {
1713     const byte *t = *text;
1714     byte *temp = NULL;
1715     emit *e = NULL;
1716     emit_dest dest;
1717
1718     if (*t != '.')
1719         return 0;
1720
1721     t++;
1722     if (get_identifier (&t, &temp))
1723         return 1;
1724     eat_spaces (&t);
1725
1726     /* .emit */
1727     if (str_equal ((byte *) "emit", temp))
1728         dest = ed_output;
1729     /* .load */
1730     else if (str_equal ((byte *) "load", temp))
1731         dest = ed_regbyte;
1732     else
1733     {
1734         mem_free ((void **) (void *) &temp);
1735         return 0;
1736     }
1737
1738     mem_free ((void **) (void *) &temp);
1739
1740     emit_create (&e);
1741     if (e == NULL)
1742         return 1;
1743
1744     e->m_emit_dest = dest;
1745
1746     if (dest == ed_regbyte)
1747     {
1748         if (get_identifier (&t, &e->m_regname))
1749         {
1750             emit_destroy (&e);
1751             return 1;
1752         }
1753         eat_spaces (&t);
1754     }
1755
1756     /* 0xNN */
1757     if (*t == '0' && (t[1] == 'x' || t[1] == 'X'))
1758     {
1759         t += 2;
1760         e->m_byte = (byte) hex_convert (&t);
1761
1762         e->m_emit_type = et_byte;
1763     }
1764     /* NNN */
1765     else if (*t >= '0' && *t <= '9')
1766     {
1767         e->m_byte = (byte) dec_convert (&t);
1768
1769         e->m_emit_type = et_byte;
1770     }
1771     /* * */
1772     else if (*t == '*')
1773     {
1774         t++;
1775
1776         e->m_emit_type = et_stream;
1777     }
1778     /* $ */
1779     else if (*t == '$')
1780     {
1781         t++;
1782
1783         e->m_emit_type = et_position;
1784     }
1785     /* 'c' */
1786     else if (*t == '\'')
1787     {
1788         if (get_string (&t, &temp))
1789         {
1790             emit_destroy (&e);
1791             return 1;
1792         }
1793         e->m_byte = (byte) temp[0];
1794
1795         mem_free ((void **) (void *) &temp);
1796
1797         e->m_emit_type = et_byte;
1798     }
1799     else
1800     {
1801         if (get_identifier (&t, &temp))
1802         {
1803             emit_destroy (&e);
1804             return 1;
1805         }
1806
1807         if (map_byte_find (&mapb, temp, &e->m_byte))
1808         {
1809             mem_free ((void **) (void *) &temp);
1810             emit_destroy (&e);
1811             return 1;
1812         }
1813
1814         mem_free ((void **) (void *) &temp);
1815
1816         e->m_emit_type = et_byte;
1817     }
1818
1819     eat_spaces (&t);
1820
1821     if (get_emits (&t, &e->m_next, mapb))
1822     {
1823         emit_destroy (&e);
1824         return 1;
1825     }
1826
1827     *text = t;
1828     *em = e;
1829     return 0;
1830 }
1831
1832 /*
1833     returns 0 on success,
1834     returns 1 otherwise,
1835 */
1836 static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1837 {
1838     const byte *t = *text;
1839     spec *s = NULL;
1840
1841     spec_create (&s);
1842     if (s == NULL)
1843         return 1;
1844
1845     /* first - read optional .if statement */
1846     if (*t == '.')
1847     {
1848         const byte *u = t;
1849         byte *keyword = NULL;
1850
1851         /* skip the dot */
1852         u++;
1853
1854         if (get_identifier (&u, &keyword))
1855         {
1856             spec_destroy (&s);
1857             return 1;
1858         }
1859
1860         /* .if */
1861         if (str_equal ((byte *) "if", keyword))
1862         {
1863             cond_create (&s->m_cond);
1864             if (s->m_cond == NULL)
1865             {
1866                 spec_destroy (&s);
1867                 return 1;
1868             }
1869
1870             /* skip the left paren */
1871             eat_spaces (&u);
1872             u++;
1873
1874             /* get the left operand */
1875             eat_spaces (&u);
1876             if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1877             {
1878                 spec_destroy (&s);
1879                 return 1;
1880             }
1881             s->m_cond->m_operands[0].m_type = cot_regbyte;
1882
1883             /* get the operator (!= or ==) */
1884             eat_spaces (&u);
1885             if (*u == '!')
1886                 s->m_cond->m_type = ct_not_equal;
1887             else
1888                 s->m_cond->m_type = ct_equal;
1889             u += 2;
1890             eat_spaces (&u);
1891
1892             if (u[0] == '0' && (u[1] == 'x' || u[1] == 'X'))
1893             {
1894                 /* skip the 0x prefix */
1895                 u += 2;
1896
1897                 /* get the right operand */
1898                 s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1899                 s->m_cond->m_operands[1].m_type = cot_byte;
1900             }
1901             else /*if (*u >= '0' && *u <= '9')*/
1902             {
1903                 /* get the right operand */
1904                 s->m_cond->m_operands[1].m_byte = dec_convert (&u);
1905                 s->m_cond->m_operands[1].m_type = cot_byte;
1906             }
1907
1908             /* skip the right paren */
1909             eat_spaces (&u);
1910             u++;
1911
1912             eat_spaces (&u);
1913
1914             t = u;
1915         }
1916
1917         mem_free ((void **) (void *) &keyword);
1918     }
1919
1920     if (*t == '\'')
1921     {
1922         byte *temp = NULL;
1923
1924         if (get_string (&t, &temp))
1925         {
1926             spec_destroy (&s);
1927             return 1;
1928         }
1929         eat_spaces (&t);
1930
1931         if (*t == '-')
1932         {
1933             byte *temp2 = NULL;
1934
1935             /* skip the '-' character */
1936             t++;
1937             eat_spaces (&t);
1938
1939             if (get_string (&t, &temp2))
1940             {
1941                 mem_free ((void **) (void *) &temp);
1942                 spec_destroy (&s);
1943                 return 1;
1944             }
1945             eat_spaces (&t);
1946
1947             s->m_spec_type = st_byte_range;
1948             s->m_byte[0] = *temp;
1949             s->m_byte[1] = *temp2;
1950
1951             mem_free ((void **) (void *) &temp2);
1952         }
1953         else
1954         {
1955             s->m_spec_type = st_byte;
1956             *s->m_byte = *temp;
1957         }
1958
1959         mem_free ((void **) (void *) &temp);
1960     }
1961     else if (*t == '"')
1962     {
1963         if (get_string (&t, &s->m_string))
1964         {
1965             spec_destroy (&s);
1966             return 1;
1967         }
1968         eat_spaces (&t);
1969
1970         s->m_spec_type = st_string;
1971     }
1972     else if (*t == '.')
1973     {
1974         byte *keyword = NULL;
1975
1976         /* skip the dot */
1977         t++;
1978
1979         if (get_identifier (&t, &keyword))
1980         {
1981             spec_destroy (&s);
1982             return 1;
1983         }
1984         eat_spaces (&t);
1985
1986         /* .true */
1987         if (str_equal ((byte *) "true", keyword))
1988         {
1989             s->m_spec_type = st_true;
1990         }
1991         /* .false */
1992         else if (str_equal ((byte *) "false", keyword))
1993         {
1994             s->m_spec_type = st_false;
1995         }
1996         /* .debug */
1997         else if (str_equal ((byte *) "debug", keyword))
1998         {
1999             s->m_spec_type = st_debug;
2000         }
2001         /* .loop */
2002         else if (str_equal ((byte *) "loop", keyword))
2003         {
2004             if (get_identifier (&t, &s->m_string))
2005             {
2006                 mem_free ((void **) (void *) &keyword);
2007                 spec_destroy (&s);
2008                 return 1;
2009             }
2010             eat_spaces (&t);
2011
2012             s->m_spec_type = st_identifier_loop;
2013         }
2014         mem_free ((void **) (void *) &keyword);
2015     }
2016     else
2017     {
2018         if (get_identifier (&t, &s->m_string))
2019         {
2020             spec_destroy (&s);
2021             return 1;
2022         }
2023         eat_spaces (&t);
2024
2025         s->m_spec_type = st_identifier;
2026     }
2027
2028     if (get_error (&t, &s->m_errtext, maps))
2029     {
2030         spec_destroy (&s);
2031         return 1;
2032     }
2033
2034     if (get_emits (&t, &s->m_emits, mapb))
2035     {
2036         spec_destroy (&s);
2037         return 1;
2038     }
2039
2040     *text = t;
2041     *sp = s;
2042     return 0;
2043 }
2044
2045 /*
2046     returns 0 on success,
2047     returns 1 otherwise,
2048 */
2049 static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
2050 {
2051     const byte *t = *text;
2052     rule *r = NULL;
2053
2054     rule_create (&r);
2055     if (r == NULL)
2056         return 1;
2057
2058     if (get_spec (&t, &r->m_specs, maps, mapb))
2059     {
2060         rule_destroy (&r);
2061         return 1;
2062     }
2063
2064     while (*t != ';')
2065     {
2066         byte *op = NULL;
2067         spec *sp = NULL;
2068
2069         /* skip the dot that precedes "and" or "or" */
2070         t++;
2071
2072         /* read "and" or "or" keyword */
2073         if (get_identifier (&t, &op))
2074         {
2075             rule_destroy (&r);
2076             return 1;
2077         }
2078         eat_spaces (&t);
2079
2080         if (r->m_oper == op_none)
2081         {
2082             /* .and */
2083             if (str_equal ((byte *) "and", op))
2084                 r->m_oper = op_and;
2085             /* .or */
2086             else
2087                 r->m_oper = op_or;
2088         }
2089
2090         mem_free ((void **) (void *) &op);
2091
2092         if (get_spec (&t, &sp, maps, mapb))
2093         {
2094             rule_destroy (&r);
2095             return 1;
2096         }
2097
2098         spec_append (&r->m_specs, sp);
2099     }
2100
2101     /* skip the semicolon */
2102     t++;
2103     eat_spaces (&t);
2104
2105     *text = t;
2106     *ru = r;
2107     return 0;
2108 }
2109
2110 /*
2111     returns 0 on success,
2112     returns 1 otherwise,
2113 */
2114 static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2115 {
2116     if (map_rule_find (&mapr, symbol, ru))
2117         return 1;
2118
2119     (**ru).m_referenced = 1;
2120
2121     return 0;
2122 }
2123
2124 /*
2125     returns 0 on success,
2126     returns 1 otherwise,
2127 */
2128 static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2129     byte **string_symbol, map_byte *regbytes)
2130 {
2131     rule *rulez = di->m_rulez;
2132
2133     /* update dependecies for the root and lexer symbols */
2134     if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2135         (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2136         return 1;
2137
2138     mem_free ((void **) syntax_symbol);
2139     mem_free ((void **) string_symbol);
2140
2141     /* update dependecies for the rest of the rules */
2142     while (rulez)
2143     {
2144         spec *sp = rulez->m_specs;
2145
2146         /* iterate through all the specifiers */
2147         while (sp)
2148         {
2149             /* update dependency for identifier */
2150             if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2151             {
2152                 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2153                     return 1;
2154
2155                 mem_free ((void **) &sp->m_string);
2156             }
2157
2158             /* some errtexts reference to a rule */
2159             if (sp->m_errtext && sp->m_errtext->m_token_name)
2160             {
2161                 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2162                     return 1;
2163
2164                 mem_free ((void **) &sp->m_errtext->m_token_name);
2165             }
2166
2167             /* update dependency for condition */
2168             if (sp->m_cond)
2169             {
2170                 int i;
2171                 for (i = 0; i < 2; i++)
2172                     if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2173                     {
2174                         sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2175                             sp->m_cond->m_operands[i].m_regname);
2176
2177                         if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2178                             return 1;
2179
2180                         mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2181                     }
2182             }
2183
2184             /* update dependency for all .load instructions */
2185             if (sp->m_emits)
2186             {
2187                 emit *em = sp->m_emits;
2188                 while (em != NULL)
2189                 {
2190                     if (em->m_emit_dest == ed_regbyte)
2191                     {
2192                         em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2193
2194                         if (em->m_regbyte == NULL)
2195                             return 1;
2196
2197                         mem_free ((void **) &em->m_regname);
2198                     }
2199
2200                     em = em->m_next;
2201                 }
2202             }
2203
2204             sp = sp->next;
2205         }
2206
2207         rulez = rulez->next;
2208     }
2209
2210     /* check for unreferenced symbols */
2211     rulez = di->m_rulez;
2212     while (rulez != NULL)
2213     {
2214         if (!rulez->m_referenced)
2215         {
2216             map_rule *ma = mapr;
2217             while (ma)
2218             {
2219                 if (ma->data == rulez)
2220                 {
2221                     set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1);
2222                     return 1;
2223                 }
2224                 ma = ma->next;
2225             }
2226         }
2227         rulez = rulez->next;
2228     }
2229
2230     return 0;
2231 }
2232
2233 static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2234 {
2235     byte values[2];
2236     int i;
2237
2238     if (co == NULL)
2239         return 1;
2240
2241     for (i = 0; i < 2; i++)
2242         switch (co->m_operands[i].m_type)
2243         {
2244         case cot_byte:
2245             values[i] = co->m_operands[i].m_byte;
2246             break;
2247         case cot_regbyte:
2248             values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2249             break;
2250         }
2251
2252     switch (co->m_type)
2253     {
2254     case ct_equal:
2255         return values[0] == values[1];
2256     case ct_not_equal:
2257         return values[0] != values[1];
2258     }
2259
2260     return 0;
2261 }
2262
2263 static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2264 {
2265     while (top != limit)
2266     {
2267         regbyte_ctx *rbc = top->m_prev;
2268         regbyte_ctx_destroy (&top);
2269         top = rbc;
2270     }
2271 }
2272
2273 typedef enum match_result_
2274 {
2275     mr_not_matched,     /* the examined string does not match */
2276     mr_matched,         /* the examined string matches */
2277     mr_error_raised,    /* mr_not_matched + error has been raised */
2278     mr_dont_emit,       /* used by identifier loops only */
2279     mr_internal_error   /* an internal error has occured such as out of memory */
2280 } match_result;
2281
2282 /*
2283  * This function does the main job. It parses the text and generates output data.
2284  */
2285 static match_result
2286 match (dict *di, const byte *text, int *index, rule *ru, barray **ba, int filtering_string,
2287        regbyte_ctx **rbc)
2288 {
2289    int ind = *index;
2290     match_result status = mr_not_matched;
2291     spec *sp = ru->m_specs;
2292     regbyte_ctx *ctx = *rbc;
2293
2294     /* for every specifier in the rule */
2295     while (sp)
2296     {
2297       int i, len, save_ind = ind;
2298         barray *array = NULL;
2299
2300         if (satisfies_condition (sp->m_cond, ctx))
2301         {
2302             switch (sp->m_spec_type)
2303             {
2304             case st_identifier:
2305                 barray_create (&array);
2306                 if (array == NULL)
2307                 {
2308                     free_regbyte_ctx_stack (ctx, *rbc);
2309                     return mr_internal_error;
2310                 }
2311
2312                 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2313
2314                 if (status == mr_internal_error)
2315                 {
2316                     free_regbyte_ctx_stack (ctx, *rbc);
2317                     barray_destroy (&array);
2318                     return mr_internal_error;
2319                 }
2320                 break;
2321             case st_string:
2322                 len = str_length (sp->m_string);
2323
2324                 /* prefilter the stream */
2325                 if (!filtering_string && di->m_string)
2326                 {
2327                     barray *ba;
2328                int filter_index = 0;
2329                     match_result result;
2330                     regbyte_ctx *null_ctx = NULL;
2331
2332                     barray_create (&ba);
2333                     if (ba == NULL)
2334                     {
2335                         free_regbyte_ctx_stack (ctx, *rbc);
2336                         return mr_internal_error;
2337                     }
2338
2339                     result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2340
2341                     if (result == mr_internal_error)
2342                     {
2343                         free_regbyte_ctx_stack (ctx, *rbc);
2344                         barray_destroy (&ba);
2345                         return mr_internal_error;
2346                     }
2347
2348                     if (result != mr_matched)
2349                     {
2350                         barray_destroy (&ba);
2351                         status = mr_not_matched;
2352                         break;
2353                     }
2354
2355                     barray_destroy (&ba);
2356
2357                     if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2358                     {
2359                         status = mr_not_matched;
2360                         break;
2361                     }
2362
2363                     status = mr_matched;
2364                     ind += len;
2365                 }
2366                 else
2367                 {
2368                     status = mr_matched;
2369                     for (i = 0; status == mr_matched && i < len; i++)
2370                         if (text[ind + i] != sp->m_string[i])
2371                             status = mr_not_matched;
2372
2373                     if (status == mr_matched)
2374                         ind += len;
2375                 }
2376                 break;
2377             case st_byte:
2378                 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2379                 if (status == mr_matched)
2380                     ind++;
2381                 break;
2382             case st_byte_range:
2383                 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2384                     mr_matched : mr_not_matched;
2385                 if (status == mr_matched)
2386                     ind++;
2387                 break;
2388             case st_true:
2389                 status = mr_matched;
2390                 break;
2391             case st_false:
2392                 status = mr_not_matched;
2393                 break;
2394             case st_debug:
2395                 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2396                 break;
2397             case st_identifier_loop:
2398                 barray_create (&array);
2399                 if (array == NULL)
2400                 {
2401                     free_regbyte_ctx_stack (ctx, *rbc);
2402                     return mr_internal_error;
2403                 }
2404
2405                 status = mr_dont_emit;
2406                 for (;;)
2407                 {
2408                     match_result result;
2409
2410                     save_ind = ind;
2411                     result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2412
2413                     if (result == mr_error_raised)
2414                     {
2415                         status = result;
2416                         break;
2417                     }
2418                     else if (result == mr_matched)
2419                     {
2420                         if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2421                             barray_append (ba, &array))
2422                         {
2423                             free_regbyte_ctx_stack (ctx, *rbc);
2424                             barray_destroy (&array);
2425                             return mr_internal_error;
2426                         }
2427                         barray_destroy (&array);
2428                         barray_create (&array);
2429                         if (array == NULL)
2430                         {
2431                             free_regbyte_ctx_stack (ctx, *rbc);
2432                             return mr_internal_error;
2433                         }
2434                     }
2435                     else if (result == mr_internal_error)
2436                     {
2437                         free_regbyte_ctx_stack (ctx, *rbc);
2438                         barray_destroy (&array);
2439                         return mr_internal_error;
2440                     }
2441                     else
2442                         break;
2443                 }
2444                 break;
2445             }
2446         }
2447         else
2448         {
2449             status = mr_not_matched;
2450         }
2451
2452         if (status == mr_error_raised)
2453         {
2454             free_regbyte_ctx_stack (ctx, *rbc);
2455             barray_destroy (&array);
2456
2457             return mr_error_raised;
2458         }
2459
2460         if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2461         {
2462             free_regbyte_ctx_stack (ctx, *rbc);
2463             barray_destroy (&array);
2464
2465             if (sp->m_errtext)
2466             {
2467                 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2468                     ind), ind);
2469
2470                 return mr_error_raised;
2471             }
2472
2473             return mr_not_matched;
2474         }
2475
2476         if (status == mr_matched)
2477         {
2478             if (sp->m_emits)
2479                 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2480                 {
2481                     free_regbyte_ctx_stack (ctx, *rbc);
2482                     barray_destroy (&array);
2483                     return mr_internal_error;
2484                 }
2485
2486             if (array)
2487                 if (barray_append (ba, &array))
2488                 {
2489                     free_regbyte_ctx_stack (ctx, *rbc);
2490                     barray_destroy (&array);
2491                     return mr_internal_error;
2492                 }
2493         }
2494
2495         barray_destroy (&array);
2496
2497         /* if the rule operator is a logical or, we pick up the first matching specifier */
2498         if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2499         {
2500             *index = ind;
2501             *rbc = ctx;
2502             return mr_matched;
2503         }
2504
2505         sp = sp->next;
2506     }
2507
2508     /* everything went fine - all specifiers match up */
2509     if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2510     {
2511         *index = ind;
2512         *rbc = ctx;
2513         return mr_matched;
2514     }
2515
2516     free_regbyte_ctx_stack (ctx, *rbc);
2517     return mr_not_matched;
2518 }
2519
2520 static match_result
2521 fast_match (dict *di, const byte *text, int *index, rule *ru, int *_PP, bytepool *_BP,
2522             int filtering_string, regbyte_ctx **rbc)
2523 {
2524    int ind = *index;
2525     int _P = filtering_string ? 0 : *_PP;
2526     int _P2;
2527     match_result status = mr_not_matched;
2528     spec *sp = ru->m_specs;
2529     regbyte_ctx *ctx = *rbc;
2530
2531     /* for every specifier in the rule */
2532     while (sp)
2533     {
2534       int i, len, save_ind = ind;
2535
2536         _P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0);
2537         if (bytepool_reserve (_BP, _P2))
2538         {
2539             free_regbyte_ctx_stack (ctx, *rbc);
2540             return mr_internal_error;
2541         }
2542
2543         if (satisfies_condition (sp->m_cond, ctx))
2544         {
2545             switch (sp->m_spec_type)
2546             {
2547             case st_identifier:
2548                 status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2549
2550                 if (status == mr_internal_error)
2551                 {
2552                     free_regbyte_ctx_stack (ctx, *rbc);
2553                     return mr_internal_error;
2554                 }
2555                 break;
2556             case st_string:
2557                 len = str_length (sp->m_string);
2558
2559                 /* prefilter the stream */
2560                 if (!filtering_string && di->m_string)
2561                 {
2562                int filter_index = 0;
2563                     match_result result;
2564                     regbyte_ctx *null_ctx = NULL;
2565
2566                     result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx);
2567
2568                     if (result == mr_internal_error)
2569                     {
2570                         free_regbyte_ctx_stack (ctx, *rbc);
2571                         return mr_internal_error;
2572                     }
2573
2574                     if (result != mr_matched)
2575                     {
2576                         status = mr_not_matched;
2577                         break;
2578                     }
2579
2580                     if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2581                     {
2582                         status = mr_not_matched;
2583                         break;
2584                     }
2585
2586                     status = mr_matched;
2587                     ind += len;
2588                 }
2589                 else
2590                 {
2591                     status = mr_matched;
2592                     for (i = 0; status == mr_matched && i < len; i++)
2593                         if (text[ind + i] != sp->m_string[i])
2594                             status = mr_not_matched;
2595
2596                     if (status == mr_matched)
2597                         ind += len;
2598                 }
2599                 break;
2600             case st_byte:
2601                 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2602                 if (status == mr_matched)
2603                     ind++;
2604                 break;
2605             case st_byte_range:
2606                 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2607                     mr_matched : mr_not_matched;
2608                 if (status == mr_matched)
2609                     ind++;
2610                 break;
2611             case st_true:
2612                 status = mr_matched;
2613                 break;
2614             case st_false:
2615                 status = mr_not_matched;
2616                 break;
2617             case st_debug:
2618                 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2619                 break;
2620             case st_identifier_loop:
2621                 status = mr_dont_emit;
2622                 for (;;)
2623                 {
2624                     match_result result;
2625
2626                     save_ind = ind;
2627                     result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2628
2629                     if (result == mr_error_raised)
2630                     {
2631                         status = result;
2632                         break;
2633                     }
2634                     else if (result == mr_matched)
2635                     {
2636                         if (!filtering_string)
2637                         {
2638                             if (sp->m_emits != NULL)
2639                             {
2640                                 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2641                                 {
2642                                     free_regbyte_ctx_stack (ctx, *rbc);
2643                                     return mr_internal_error;
2644                                 }
2645                             }
2646
2647                             _P = _P2;
2648                             _P2 += sp->m_emits ? emit_size (sp->m_emits) : 0;
2649                             if (bytepool_reserve (_BP, _P2))
2650                             {
2651                                 free_regbyte_ctx_stack (ctx, *rbc);
2652                                 return mr_internal_error;
2653                             }
2654                         }
2655                     }
2656                     else if (result == mr_internal_error)
2657                     {
2658                         free_regbyte_ctx_stack (ctx, *rbc);
2659                         return mr_internal_error;
2660                     }
2661                     else
2662                         break;
2663                 }
2664                 break;
2665             }
2666         }
2667         else
2668         {
2669             status = mr_not_matched;
2670         }
2671
2672         if (status == mr_error_raised)
2673         {
2674             free_regbyte_ctx_stack (ctx, *rbc);
2675
2676             return mr_error_raised;
2677         }
2678
2679         if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2680         {
2681             free_regbyte_ctx_stack (ctx, *rbc);
2682
2683             if (sp->m_errtext)
2684             {
2685                 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2686                     ind), ind);
2687
2688                 return mr_error_raised;
2689             }
2690
2691             return mr_not_matched;
2692         }
2693
2694         if (status == mr_matched)
2695         {
2696             if (sp->m_emits != NULL) {
2697                 const byte ch = (ind <= 0) ? 0 : text[ind - 1];
2698                 if (emit_push (sp->m_emits, _BP->_F + _P, ch, save_ind, &ctx))
2699                 {
2700                     free_regbyte_ctx_stack (ctx, *rbc);
2701                     return mr_internal_error;
2702                 }
2703
2704            }
2705            _P = _P2;
2706         }
2707
2708         /* if the rule operator is a logical or, we pick up the first matching specifier */
2709         if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2710         {
2711             *index = ind;
2712             *rbc = ctx;
2713             if (!filtering_string)
2714                 *_PP = _P;
2715             return mr_matched;
2716         }
2717
2718         sp = sp->next;
2719     }
2720
2721     /* everything went fine - all specifiers match up */
2722     if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2723     {
2724         *index = ind;
2725         *rbc = ctx;
2726         if (!filtering_string)
2727             *_PP = _P;
2728         return mr_matched;
2729     }
2730
2731     free_regbyte_ctx_stack (ctx, *rbc);
2732     return mr_not_matched;
2733 }
2734
2735 static byte *
2736 error_get_token (error *er, dict *di, const byte *text, int ind)
2737 {
2738     byte *str = NULL;
2739
2740     if (er->m_token)
2741     {
2742         barray *ba;
2743       int filter_index = 0;
2744         regbyte_ctx *ctx = NULL;
2745
2746         barray_create (&ba);
2747         if (ba != NULL)
2748         {
2749             if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2750                 filter_index)
2751             {
2752                 str = (byte *) mem_alloc (filter_index + 1);
2753                 if (str != NULL)
2754                 {
2755                     str_copy_n (str, text + ind, filter_index);
2756                     str[filter_index] = '\0';
2757                 }
2758             }
2759             barray_destroy (&ba);
2760         }
2761     }
2762
2763     return str;
2764 }
2765
2766 typedef struct grammar_load_state_
2767 {
2768     dict *di;
2769     byte *syntax_symbol;
2770     byte *string_symbol;
2771     map_str *maps;
2772     map_byte *mapb;
2773     map_rule *mapr;
2774 } grammar_load_state;
2775
2776 static void grammar_load_state_create (grammar_load_state **gr)
2777 {
2778     *gr = (grammar_load_state *) mem_alloc (sizeof (grammar_load_state));
2779     if (*gr)
2780     {
2781         (**gr).di = NULL;
2782         (**gr).syntax_symbol = NULL;
2783         (**gr).string_symbol = NULL;
2784         (**gr).maps = NULL;
2785         (**gr).mapb = NULL;
2786         (**gr).mapr = NULL;
2787     }
2788 }
2789
2790 static void grammar_load_state_destroy (grammar_load_state **gr)
2791 {
2792     if (*gr)
2793     {
2794         dict_destroy (&(**gr).di);
2795         mem_free ((void **) &(**gr).syntax_symbol);
2796         mem_free ((void **) &(**gr).string_symbol);
2797         map_str_destroy (&(**gr).maps);
2798         map_byte_destroy (&(**gr).mapb);
2799         map_rule_destroy (&(**gr).mapr);
2800         mem_free ((void **) gr);
2801     }
2802 }
2803
2804
2805 static void error_msg(int line, const char *msg)
2806 {
2807    fprintf(stderr, "Error in grammar_load_from_text() at line %d: %s\n", line, msg);
2808 }
2809
2810
2811 /*
2812     the API
2813 */
2814 grammar grammar_load_from_text (const byte *text)
2815 {
2816     grammar_load_state *g = NULL;
2817     grammar id = 0;
2818
2819     clear_last_error ();
2820
2821     grammar_load_state_create (&g);
2822     if (g == NULL) {
2823         error_msg(__LINE__, "");
2824         return 0;
2825     }
2826
2827     dict_create (&g->di);
2828     if (g->di == NULL)
2829     {
2830         grammar_load_state_destroy (&g);
2831         error_msg(__LINE__, "");
2832         return 0;
2833     }
2834
2835     eat_spaces (&text);
2836
2837     /* skip ".syntax" keyword */
2838     text += 7;
2839     eat_spaces (&text);
2840
2841     /* retrieve root symbol */
2842     if (get_identifier (&text, &g->syntax_symbol))
2843     {
2844         grammar_load_state_destroy (&g);
2845         error_msg(__LINE__, "");
2846         return 0;
2847     }
2848     eat_spaces (&text);
2849
2850     /* skip semicolon */
2851     text++;
2852     eat_spaces (&text);
2853
2854     while (*text)
2855     {
2856         byte *symbol = NULL;
2857         int is_dot = *text == '.';
2858
2859         if (is_dot)
2860             text++;
2861
2862         if (get_identifier (&text, &symbol))
2863         {
2864             grammar_load_state_destroy (&g);
2865             error_msg(__LINE__, "");
2866             return 0;
2867         }
2868         eat_spaces (&text);
2869
2870         /* .emtcode */
2871         if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2872         {
2873             map_byte *ma = NULL;
2874
2875             mem_free ((void **) (void *) &symbol);
2876
2877             if (get_emtcode (&text, &ma))
2878             {
2879                 grammar_load_state_destroy (&g);
2880                 error_msg(__LINE__, "");
2881                 return 0;
2882             }
2883
2884             map_byte_append (&g->mapb, ma);
2885         }
2886         /* .regbyte */
2887         else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2888         {
2889             map_byte *ma = NULL;
2890
2891             mem_free ((void **) (void *) &symbol);
2892
2893             if (get_regbyte (&text, &ma))
2894             {
2895                 grammar_load_state_destroy (&g);
2896                 error_msg(__LINE__, "");
2897                 return 0;
2898             }
2899
2900             map_byte_append (&g->di->m_regbytes, ma);
2901         }
2902         /* .errtext */
2903         else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2904         {
2905             map_str *ma = NULL;
2906
2907             mem_free ((void **) (void *) &symbol);
2908
2909             if (get_errtext (&text, &ma))
2910             {
2911                 grammar_load_state_destroy (&g);
2912                 error_msg(__LINE__, "");
2913                 return 0;
2914             }
2915
2916             map_str_append (&g->maps, ma);
2917         }
2918         /* .string */
2919         else if (is_dot && str_equal (symbol, (byte *) "string"))
2920         {
2921             mem_free ((void **) (void *) &symbol);
2922
2923             if (g->di->m_string != NULL)
2924             {
2925                 grammar_load_state_destroy (&g);
2926                 error_msg(__LINE__, "");
2927                 return 0;
2928             }
2929
2930             if (get_identifier (&text, &g->string_symbol))
2931             {
2932                 grammar_load_state_destroy (&g);
2933                 error_msg(__LINE__, "");
2934                 return 0;
2935             }
2936
2937             /* skip semicolon */
2938             eat_spaces (&text);
2939             text++;
2940             eat_spaces (&text);
2941         }
2942         else
2943         {
2944             rule *ru = NULL;
2945             map_rule *ma = NULL;
2946
2947             if (get_rule (&text, &ru, g->maps, g->mapb))
2948             {
2949                 grammar_load_state_destroy (&g);
2950                 error_msg(__LINE__, "");
2951                 return 0;
2952             }
2953
2954             rule_append (&g->di->m_rulez, ru);
2955
2956             /* if a rule consist of only one specifier, give it an ".and" operator */
2957             if (ru->m_oper == op_none)
2958                 ru->m_oper = op_and;
2959
2960             map_rule_create (&ma);
2961             if (ma == NULL)
2962             {
2963                 grammar_load_state_destroy (&g);
2964                 error_msg(__LINE__, "");
2965                 return 0;
2966             }
2967
2968             ma->key = symbol;
2969             ma->data = ru;
2970             map_rule_append (&g->mapr, ma);
2971         }
2972     }
2973
2974     if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2975         g->di->m_regbytes))
2976     {
2977         grammar_load_state_destroy (&g);
2978         error_msg(__LINE__, "update_dependencies() failed");
2979         return 0;
2980     }
2981
2982     dict_append (&g_dicts, g->di);
2983     id = g->di->m_id;
2984     g->di = NULL;
2985
2986     grammar_load_state_destroy (&g);
2987
2988     return id;
2989 }
2990
2991 int grammar_set_reg8 (grammar id, const byte *name, byte value)
2992 {
2993     dict *di = NULL;
2994     map_byte *reg = NULL;
2995
2996     clear_last_error ();
2997
2998     dict_find (&g_dicts, id, &di);
2999     if (di == NULL)
3000     {
3001         set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3002         return 0;
3003     }
3004
3005     reg = map_byte_locate (&di->m_regbytes, name);
3006     if (reg == NULL)
3007     {
3008         set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
3009         return 0;
3010     }
3011
3012     reg->data = value;
3013     return 1;
3014 }
3015
3016 /*
3017     internal checking function used by both grammar_check and grammar_fast_check functions
3018 */
3019 static int _grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3020     unsigned int estimate_prod_size, int use_fast_path)
3021 {
3022     dict *di = NULL;
3023    int index = 0;
3024
3025     clear_last_error ();
3026
3027     dict_find (&g_dicts, id, &di);
3028     if (di == NULL)
3029     {
3030         set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3031         return 0;
3032     }
3033
3034     *prod = NULL;
3035     *size = 0;
3036
3037     if (use_fast_path)
3038     {
3039         regbyte_ctx *rbc = NULL;
3040         bytepool *bp = NULL;
3041         int _P = 0;
3042
3043         bytepool_create (&bp, estimate_prod_size);
3044         if (bp == NULL)
3045             return 0;
3046
3047         if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched)
3048         {
3049             bytepool_destroy (&bp);
3050             free_regbyte_ctx_stack (rbc, NULL);
3051             return 0;
3052         }
3053
3054         free_regbyte_ctx_stack (rbc, NULL);
3055
3056         *prod = bp->_F;
3057         *size = _P;
3058         bp->_F = NULL;
3059         bytepool_destroy (&bp);
3060     }
3061     else
3062     {
3063         regbyte_ctx *rbc = NULL;
3064         barray *ba = NULL;
3065
3066         barray_create (&ba);
3067         if (ba == NULL)
3068             return 0;
3069
3070         if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
3071         {
3072             barray_destroy (&ba);
3073             free_regbyte_ctx_stack (rbc, NULL);
3074             return 0;
3075         }
3076
3077         free_regbyte_ctx_stack (rbc, NULL);
3078
3079         *prod = (byte *) mem_alloc (ba->len * sizeof (byte));
3080         if (*prod == NULL)
3081         {
3082             barray_destroy (&ba);
3083             return 0;
3084         }
3085
3086         mem_copy (*prod, ba->data, ba->len * sizeof (byte));
3087         *size = ba->len;
3088         barray_destroy (&ba);
3089     }
3090
3091     return 1;
3092 }
3093
3094 int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
3095 {
3096     return _grammar_check (id, text, prod, size, 0, 0);
3097 }
3098
3099 int grammar_fast_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3100     unsigned int estimate_prod_size)
3101 {
3102     return _grammar_check (id, text, prod, size, estimate_prod_size, 1);
3103 }
3104
3105 int grammar_destroy (grammar id)
3106 {
3107     dict **di = &g_dicts;
3108
3109     clear_last_error ();
3110
3111     while (*di != NULL)
3112     {
3113         if ((**di).m_id == id)
3114         {
3115             dict *tmp = *di;
3116             *di = (**di).next;
3117             dict_destroy (&tmp);
3118             return 1;
3119         }
3120
3121         di = &(**di).next;
3122     }
3123
3124     set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3125     return 0;
3126 }
3127
3128 static void append_character (const char x, byte *text, int *dots_made, int *len, int size)
3129 {
3130     if (*dots_made == 0)
3131     {
3132         if (*len < size - 1)
3133         {
3134             text[(*len)++] = x;
3135             text[*len] = '\0';
3136         }
3137         else
3138         {
3139             int i;
3140             for (i = 0; i < 3; i++)
3141                 if (--(*len) >= 0)
3142                     text[*len] = '.';
3143             *dots_made = 1;
3144         }
3145     }
3146 }
3147
3148 void grammar_get_last_error (byte *text, unsigned int size, int *pos)
3149 {
3150     int len = 0, dots_made = 0;
3151     const byte *p = error_message;
3152
3153     *text = '\0';
3154
3155     if (p)
3156     {
3157         while (*p)
3158         {
3159             if (*p == '$')
3160             {
3161                 const byte *r = error_param;
3162
3163                 while (*r)
3164                 {
3165                     append_character (*r++, text, &dots_made, &len, (int) size);
3166                 }
3167
3168                 p++;
3169             }
3170             else
3171             {
3172                 append_character (*p++, text, &dots_made, &len, size);
3173             }
3174         }
3175     }
3176
3177     *pos = error_position;
3178 }