src/mesa/shader/grammar.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.2
   4  *
   5  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file grammar.c
  27  * syntax parsing engine
  28  * \author Michal Krol
  29  */
  30
  31 #ifndef GRAMMAR_PORT_BUILD
  32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
  33 #endif
  34
  35 /*
  36 */
  37
  38 /*
  39     INTRODUCTION
  40     ------------
  41
  42     The task is to check the syntax of an input string. Input string is a stream of ASCII
  43     characters terminated with a null-character ('\0'). Checking it using C language is
  44     difficult and hard to implement without bugs. It is hard to maintain and make changes when
  45     the syntax changes.
  46
  47     This is because of a high redundancy of the C code. Large blocks of code are duplicated with
  48     only small changes. Even use of macros does not solve the problem because macros cannot
  49     erase the complexity of the problem.
  50
  51     The resolution is to create a new language that will be highly oriented to our task. Once
  52     we describe a particular syntax, we are done. We can then focus on the code that implements
  53     the language. The size and complexity of it is relatively small than the code that directly
  54     checks the syntax.
  55
  56     First, we must implement our new language. Here, the language is implemented in C, but it
  57     could also be implemented in any other language. The code is listed below. We must take
  58     a good care that it is bug free. This is simple because the code is simple and clean.
  59
  60     Next, we must describe the syntax of our new language in itself. Once created and checked
  61     manually that it is correct, we can use it to check another scripts.
  62
  63     Note that our new language loading code does not have to check the syntax. It is because we
  64     assume that the script describing itself is correct, and other scripts can be syntactically
  65     checked by the former script. The loading code must only do semantic checking which leads us to
  66     simple resolving references.
  67
  68     THE LANGUAGE
  69     ------------
  70
  71     Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
  72     sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
  73     which is an identifier, and its definition. A definition is in turn a sequence of specifiers
  74     connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
  75     definition. Specifier can be a symbol, string, character, character range or a special
  76     keyword ".true" or ".false".
  77
  78     On the very beginning of the script there is a declaration of a root symbol and is in the form:
  79         .syntax <root_symbol>;
  80     The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
  81     the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
  82     the symbol evaluates to true. Definition evaluation depends on the operator used to connect
  83     specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
  84     only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
  85     true if any of the specifiers evaluates to true. If definition contains only one specifier,
  86     it is evaluated as if it was connected with ".true" keyword by ".and" operator.
  87
  88     If specifier is a ".true" keyword, it always evaluates to true.
  89
  90     If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
  91     when it does not evaluate to true.
  92
  93     Character range specifier is in the form:
  94         '<first_character>' - '<second_character>'
  95     If specifier is a character range, it evaluates to true if character in the stream is greater
  96     or equal to <first_character> and less or equal to <second_character>. In that situation
  97     the stream pointer is advanced to point to next character in the stream. All C-style escape
  98     sequences are supported although trigraph sequences are not. The comparisions are performed
  99     on 8-bit unsigned integers.
 100
 101     Character specifier is in the form:
 102         '<single_character>'
 103     It evaluates to true if the following character range specifier evaluates to true:
 104         '<single_character>' - '<single_character>'
 105
 106     String specifier is in the form:
 107         "<string>"
 108     Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
 109     <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
 110     the following character specifier evaluates to true:
 111         '<string>[i]'
 112     If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
 113
 114     Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
 115         .loop <symbol>                  (1)
 116     where <symbol> is defined as follows:
 117         <symbol> <definition>;          (2)
 118     Construction (1) is replaced by the following code:
 119         <symbol$1>
 120     and declaration (2) is replaced by the following:
 121         <symbol$1> <symbol$2> .or .true;
 122         <symbol$2> <symbol> .and <symbol$1>;
 123         <symbol> <definition>;
 124
 125     Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
 126     registers that can be accessed in the syn body. Each reg has its name and a default value.
 127     The register is one byte wide. The C code can change the default value by calling
 128     grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
 129     a sequence of specifiers joined with .and or .or operator. And now each specifier can be
 130     prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
 131     where <operator> can be == or !=. If the condition evaluates to false, the specifier
 132     evaluates to .false. Otherwise it evalutes to the specifier.
 133
 134     ESCAPE SEQUENCES
 135     ----------------
 136
 137     Synek supports all escape sequences in character specifiers. The mapping table is listed below.
 138     All occurences of the characters in the first column are replaced with the corresponding
 139     character in the second column.
 140
 141         Escape sequence         Represents
 142     ------------------------------------------------------------------------------------------------
 143         \a                      Bell (alert)
 144         \b                      Backspace
 145         \f                      Formfeed
 146         \n                      New line
 147         \r                      Carriage return
 148         \t                      Horizontal tab
 149         \v                      Vertical tab
 150         \'                      Single quotation mark
 151         \"                      Double quotation mark
 152         \\                      Backslash
 153         \?                      Literal question mark
 154         \ooo                    ASCII character in octal notation
 155         \xhhh                   ASCII character in hexadecimal notation
 156     ------------------------------------------------------------------------------------------------
 157
 158     RAISING ERRORS
 159     --------------
 160
 161     Any specifier can be followed by a special construction that is executed when the specifier
 162     evaluates to false. The construction is in the form:
 163         .error <ERROR_TEXT>
 164     <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
 165     in the form:
 166         .errtext <ERROR_TEXT> "<error_desc>"
 167     When specifier evaluates to false and this construction is present, parsing is stopped
 168     immediately and <error_desc> is returned as a result of parsing. The error position is also
 169     returned and it is meant as an offset from the beggining of the stream to the character that
 170     was valid so far. Example:
 171
 172         (**** syntax script ****)
 173
 174         .syntax program;
 175         .errtext MISSING_SEMICOLON      "missing ';'"
 176         program         declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
 177                         .loop space .and '\0';
 178         declaration     "declare" .and .loop space .and identifier;
 179         space           ' ';
 180
 181         (**** sample code ****)
 182
 183         declare foo ,
 184
 185     In the example above checking the sample code will result in error message "missing ';'" and
 186     error position 12. The sample code is not correct. Note the presence of '\0' specifier to
 187     assure that there is no code after semicolon - only spaces.
 188     <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
 189     the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
 190     the identifier name. The starting position is the error position. The lenght of the resulting
 191     string is the position after invoking the symbol.
 192
 193     PRODUCTION
 194     ----------
 195
 196     Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
 197     that evaluate to true. That is, every specifier and optional error construction can be followed
 198     by a number of emit constructions that are in the form:
 199         .emit <parameter>
 200     <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
 201     0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
 202     in the form:
 203         .emtcode <identifier> <hex_number>
 204
 205     When given specifier evaluates to true, all emits associated with the specifier are output
 206     in order they were declared. A star means that last-read character should be output instead
 207     of constant value. Example:
 208
 209         (**** syntax script ****)
 210
 211         .syntax foobar;
 212         .emtcode WORD_FOO       0x01
 213         .emtcode WORD_BAR       0x02
 214         foobar      FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
 215         FOO         "foo" .and SPACE;
 216         BAR         "bar" .and SPACE;
 217         SPACE       ' ' .or '\0';
 218
 219         (**** sample text 1 ****)
 220
 221         foo
 222
 223         (**** sample text 2 ****)
 224
 225         foobar
 226
 227     For both samples the result will be one-element array. For first sample text it will be
 228     value 1, for second - 0. Note that every text will be accepted because of presence of
 229     .true as an alternative.
 230
 231     Another example:
 232
 233         (**** syntax script ****)
 234
 235         .syntax declaration;
 236         .emtcode VARIABLE       0x01
 237         declaration     "declare" .and .loop space .and
 238                         identifier .emit VARIABLE .and          (1)
 239                         .true .emit 0x00 .and                   (2)
 240                         .loop space .and ';';
 241         space           ' ' .or '\t';
 242         identifier      .loop id_char .emit *;                  (3)
 243         id_char         'a'-'z' .or 'A'-'Z' .or '_';
 244
 245         (**** sample code ****)
 246
 247         declare    fubar;
 248
 249     In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
 250     true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
 251     to terminate the string with null to signal when the string ends. Specifier (3) outputs
 252     all characters that make declared identifier. The result of sample code will be the
 253     following array:
 254         { 1, 'f', 'u', 'b', 'a', 'r', 0 }
 255
 256     If .emit is followed by dollar $, it means that current position should be output. Current
 257     position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
 258     first character consumed by the specifier associated with the .emit instruction. Current
 259     position is stored in the output buffer in Little-Endian convention (the lowest byte comes
 260     first).
 261 */
 262
 263 static void mem_free (void **);
 264
 265 /*
 266     internal error messages
 267 */
 268 static const byte *OUT_OF_MEMORY =          (byte *) "internal error 1001: out of physical memory";
 269 static const byte *UNRESOLVED_REFERENCE =   (byte *) "internal error 1002: unresolved reference '$'";
 270 static const byte *INVALID_GRAMMAR_ID =     (byte *) "internal error 1003: invalid grammar object";
 271 static const byte *INVALID_REGISTER_NAME =  (byte *) "internal error 1004: invalid register name: '$'";
 272 static const byte *DUPLICATE_IDENTIFIER =   (byte *) "internal error 1005: identifier '$' already defined";
 273 static const byte *UNREFERENCED_IDENTIFIER =(byte *) "internal error 1006: unreferenced identifier '$'";
 274
 275 static const byte *error_message = NULL;    /* points to one of the error messages above */
 276 static byte *error_param = NULL;        /* this is inserted into error_message in place of $ */
 277 static int error_position = -1;
 278
 279 static byte *unknown = (byte *) "???";
 280
 281 static void clear_last_error (void)
 282 {
 283     /* reset error message */
 284     error_message = NULL;
 285
 286     /* free error parameter - if error_param is a "???" don't free it - it's static */
 287     if (error_param != unknown)
 288         mem_free ((void **) (void *) &error_param);
 289     else
 290         error_param = NULL;
 291
 292     /* reset error position */
 293     error_position = -1;
 294 }
 295
 296 static void set_last_error (const byte *msg, byte *param, int pos)
 297 {
 298     /* error message can be set only once */
 299     if (error_message != NULL)
 300     {
 301         mem_free ((void **) (void *) &param);
 302         return;
 303     }
 304
 305     error_message = msg;
 306
 307     /* if param is NULL, set error_param to unknown ("???") */
 308     /* note: do not try to strdup the "???" - it may be that we are here because of */
 309     /* out of memory error so strdup can fail */
 310     if (param != NULL)
 311         error_param = param;
 312     else
 313         error_param = unknown;
 314
 315     error_position = pos;
 316 }
 317
 318 /*
 319     memory management routines
 320 */
 321 static void *mem_alloc (size_t size)
 322 {
 323     void *ptr = grammar_alloc_malloc (size);
 324     if (ptr == NULL)
 325         set_last_error (OUT_OF_MEMORY, NULL, -1);
 326     return ptr;
 327 }
 328
 329 static void *mem_copy (void *dst, const void *src, size_t size)
 330 {
 331     return grammar_memory_copy (dst, src, size);
 332 }
 333
 334 static void mem_free (void **ptr)
 335 {
 336     grammar_alloc_free (*ptr);
 337     *ptr = NULL;
 338 }
 339
 340 static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
 341 {
 342     void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
 343     if (ptr2 == NULL)
 344         set_last_error (OUT_OF_MEMORY, NULL, -1);
 345     return ptr2;
 346 }
 347
 348 static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
 349 {
 350     return grammar_string_copy_n (dst, src, max_len);
 351 }
 352
 353 static byte *str_duplicate (const byte *str)
 354 {
 355     byte *new_str = grammar_string_duplicate (str);
 356     if (new_str == NULL)
 357         set_last_error (OUT_OF_MEMORY, NULL, -1);
 358     return new_str;
 359 }
 360
 361 static int str_equal (const byte *str1, const byte *str2)
 362 {
 363     return grammar_string_compare (str1, str2) == 0;
 364 }
 365
 366 static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
 367 {
 368     return grammar_string_compare_n (str1, str2, n) == 0;
 369 }
 370
 371 static unsigned int str_length (const byte *str)
 372 {
 373     return grammar_string_length (str);
 374 }
 375
 376 /*
 377     useful macros
 378 */
 379 #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
 380     static void _Ty##_append (_Ty **x, _Ty *nx) {\
 381         while (*x) x = &(**x).next;\
 382         *x = nx;\
 383     }
 384
 385 /*
 386     string to byte map typedef
 387 */
 388 typedef struct map_byte_
 389 {
 390     byte *key;
 391     byte data;
 392     struct map_byte_ *next;
 393 } map_byte;
 394
 395 static void map_byte_create (map_byte **ma)
 396 {
 397     *ma = (map_byte *) mem_alloc (sizeof (map_byte));
 398     if (*ma)
 399     {
 400         (**ma).key = NULL;
 401         (**ma).data = '\0';
 402         (**ma).next = NULL;
 403     }
 404 }
 405
 406 static void map_byte_destroy (map_byte **ma)
 407 {
 408     if (*ma)
 409     {
 410         map_byte_destroy (&(**ma).next);
 411         mem_free ((void **) &(**ma).key);
 412         mem_free ((void **) ma);
 413     }
 414 }
 415
 416 GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte)
 417
 418 /*
 419     searches the map for the specified key,
 420     returns pointer to the element with the specified key if it exists
 421     returns NULL otherwise
 422 */
 423 static map_byte *map_byte_locate (map_byte **ma, const byte *key)
 424 {
 425     while (*ma)
 426     {
 427         if (str_equal ((**ma).key, key))
 428             return *ma;
 429
 430         ma = &(**ma).next;
 431     }
 432
 433     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
 434     return NULL;
 435 }
 436
 437 /*
 438     searches the map for specified key,
 439     if the key is matched, *data is filled with data associated with the key,
 440     returns 0 if the key is matched,
 441     returns 1 otherwise
 442 */
 443 static int map_byte_find (map_byte **ma, const byte *key, byte *data)
 444 {
 445     map_byte *found = map_byte_locate (ma, key);
 446     if (found != NULL)
 447     {
 448         *data = found->data;
 449
 450         return 0;
 451     }
 452
 453     return 1;
 454 }
 455
 456 /*
 457     regbyte context typedef
 458
 459     Each regbyte consists of its name and a default value. These are static and created at
 460     grammar script compile-time, for example the following line:
 461         .regbyte vertex_blend      0x00
 462     adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
 463     When the script is executed, this regbyte can be accessed by name for read and write. When a
 464     particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
 465     stack. The new entry contains information abot which regbyte it references and its new value.
 466     When a given regbyte is accessed for read, the stack is searched top-down to find an
 467     entry that references the regbyte. The first matching entry is used to return the current
 468     value it holds. If no entry is found, the default value is returned.
 469 */
 470 typedef struct regbyte_ctx_
 471 {
 472     map_byte *m_regbyte;
 473     byte m_current_value;
 474     struct regbyte_ctx_ *m_prev;
 475 } regbyte_ctx;
 476
 477 static void regbyte_ctx_create (regbyte_ctx **re)
 478 {
 479     *re = (regbyte_ctx *) mem_alloc (sizeof (regbyte_ctx));
 480     if (*re)
 481     {
 482         (**re).m_regbyte = NULL;
 483         (**re).m_prev = NULL;
 484     }
 485 }
 486
 487 static void regbyte_ctx_destroy (regbyte_ctx **re)
 488 {
 489     if (*re)
 490     {
 491         mem_free ((void **) re);
 492     }
 493 }
 494
 495 static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
 496 {
 497     /* first lookup in the register stack */
 498     while (*re != NULL)
 499     {
 500         if ((**re).m_regbyte == reg)
 501             return (**re).m_current_value;
 502
 503         re = &(**re).m_prev;
 504     }
 505
 506     /* if not found - return the default value */
 507     return reg->data;
 508 }
 509
 510 /*
 511     emit type typedef
 512 */
 513 typedef enum emit_type_
 514 {
 515     et_byte,            /* explicit number */
 516     et_stream,          /* eaten character */
 517     et_position         /* current position */
 518 } emit_type;
 519
 520 /*
 521     emit destination typedef
 522 */
 523 typedef enum emit_dest_
 524 {
 525     ed_output,          /* write to the output buffer */
 526     ed_regbyte          /* write a particular regbyte */
 527 } emit_dest;
 528
 529 /*
 530     emit typedef
 531 */
 532 typedef struct emit_
 533 {
 534     emit_dest m_emit_dest;
 535     emit_type m_emit_type;      /* ed_output */
 536     byte m_byte;                /* et_byte */
 537     map_byte *m_regbyte;        /* ed_regbyte */
 538     byte *m_regname;            /* ed_regbyte - temporary */
 539     struct emit_ *m_next;
 540 } emit;
 541
 542 static void emit_create (emit **em)
 543 {
 544     *em = (emit *) mem_alloc (sizeof (emit));
 545     if (*em)
 546     {
 547         (**em).m_emit_dest = ed_output;
 548         (**em).m_emit_type = et_byte;
 549         (**em).m_byte = '\0';
 550         (**em).m_regbyte = NULL;
 551         (**em).m_regname = NULL;
 552         (**em).m_next = NULL;
 553     }
 554 }
 555
 556 static void emit_destroy (emit **em)
 557 {
 558     if (*em)
 559     {
 560         emit_destroy (&(**em).m_next);
 561         mem_free ((void **) &(**em).m_regname);
 562         mem_free ((void **) em);
 563     }
 564 }
 565
 566 static unsigned int emit_size (emit *_E)
 567 {
 568     unsigned int _N = 0;
 569
 570     while (_E != NULL)
 571     {
 572         if (_E->m_emit_dest == ed_output)
 573         {
 574             if (_E->m_emit_type == et_position)
 575                 _N += 4;     /* position is a 32-bit unsigned integer */
 576             else
 577                 _N++;
 578         }
 579         _E = _E->m_next;
 580     }
 581
 582     return _N;
 583 }
 584
 585 static int emit_push (emit *_E, byte *_P, byte _C, unsigned int _Pos, regbyte_ctx **_Ctx)
 586 {
 587     while (_E != NULL)
 588     {
 589         if (_E->m_emit_dest == ed_output)
 590         {
 591             if (_E->m_emit_type == et_byte)
 592                 *_P++ = _E->m_byte;
 593             else if (_E->m_emit_type == et_stream)
 594                 *_P++ = _C;
 595             else /* _Em->type == et_position */
 596             {
 597                 *_P++ = (byte) (_Pos);
 598                 *_P++ = (byte) (_Pos >> 8);
 599                 *_P++ = (byte) (_Pos >> 16);
 600                 *_P++ = (byte) (_Pos >> 24);
 601             }
 602         }
 603         else
 604         {
 605             regbyte_ctx *new_rbc;
 606             regbyte_ctx_create (&new_rbc);
 607             if (new_rbc == NULL)
 608                 return 1;
 609
 610             new_rbc->m_prev = *_Ctx;
 611             new_rbc->m_regbyte = _E->m_regbyte;
 612             *_Ctx = new_rbc;
 613
 614             if (_E->m_emit_type == et_byte)
 615                 new_rbc->m_current_value = _E->m_byte;
 616             else if (_E->m_emit_type == et_stream)
 617                 new_rbc->m_current_value = _C;
 618         }
 619
 620         _E = _E->m_next;
 621     }
 622
 623     return 0;
 624 }
 625
 626 /*
 627     error typedef
 628 */
 629 typedef struct error_
 630 {
 631     byte *m_text;
 632     byte *m_token_name;
 633     struct rule_ *m_token;
 634 } error;
 635
 636 static void error_create (error **er)
 637 {
 638     *er = (error *) mem_alloc (sizeof (error));
 639     if (*er)
 640     {
 641         (**er).m_text = NULL;
 642         (**er).m_token_name = NULL;
 643         (**er).m_token = NULL;
 644     }
 645 }
 646
 647 static void error_destroy (error **er)
 648 {
 649     if (*er)
 650     {
 651         mem_free ((void **) &(**er).m_text);
 652         mem_free ((void **) &(**er).m_token_name);
 653         mem_free ((void **) er);
 654     }
 655 }
 656
 657 struct dict_;
 658 static byte *error_get_token (error *, struct dict_ *, const byte *, unsigned int);
 659
 660 /*
 661     condition operand type typedef
 662 */
 663 typedef enum cond_oper_type_
 664 {
 665     cot_byte,               /* constant 8-bit unsigned integer */
 666     cot_regbyte             /* pointer to byte register containing the current value */
 667 } cond_oper_type;
 668
 669 /*
 670     condition operand typedef
 671 */
 672 typedef struct cond_oper_
 673 {
 674     cond_oper_type m_type;
 675     byte m_byte;            /* cot_byte */
 676     map_byte *m_regbyte;    /* cot_regbyte */
 677     byte *m_regname;        /* cot_regbyte - temporary */
 678 } cond_oper;
 679
 680 /*
 681     condition type typedef
 682 */
 683 typedef enum cond_type_
 684 {
 685     ct_equal,
 686     ct_not_equal
 687 } cond_type;
 688
 689 /*
 690     condition typedef
 691 */
 692 typedef struct cond_
 693 {
 694     cond_type m_type;
 695     cond_oper m_operands[2];
 696 } cond;
 697
 698 static void cond_create (cond **co)
 699 {
 700     *co = (cond *) mem_alloc (sizeof (cond));
 701     if (*co)
 702     {
 703         (**co).m_operands[0].m_regname = NULL;
 704         (**co).m_operands[1].m_regname = NULL;
 705     }
 706 }
 707
 708 static void cond_destroy (cond **co)
 709 {
 710     if (*co)
 711     {
 712         mem_free ((void **) &(**co).m_operands[0].m_regname);
 713         mem_free ((void **) &(**co).m_operands[1].m_regname);
 714         mem_free ((void **) co);
 715     }
 716 }
 717
 718 /*
 719     specifier type typedef
 720 */
 721 typedef enum spec_type_
 722 {
 723     st_false,
 724     st_true,
 725     st_byte,
 726     st_byte_range,
 727     st_string,
 728     st_identifier,
 729     st_identifier_loop,
 730     st_debug
 731 } spec_type;
 732
 733 /*
 734     specifier typedef
 735 */
 736 typedef struct spec_
 737 {
 738     spec_type m_spec_type;
 739     byte m_byte[2];                 /* st_byte, st_byte_range */
 740     byte *m_string;                 /* st_string */
 741     struct rule_ *m_rule;           /* st_identifier, st_identifier_loop */
 742     emit *m_emits;
 743     error *m_errtext;
 744     cond *m_cond;
 745     struct spec_ *next;
 746 } spec;
 747
 748 static void spec_create (spec **sp)
 749 {
 750     *sp = (spec *) mem_alloc (sizeof (spec));
 751     if (*sp)
 752     {
 753         (**sp).m_spec_type = st_false;
 754         (**sp).m_byte[0] = '\0';
 755         (**sp).m_byte[1] = '\0';
 756         (**sp).m_string = NULL;
 757         (**sp).m_rule = NULL;
 758         (**sp).m_emits = NULL;
 759         (**sp).m_errtext = NULL;
 760         (**sp).m_cond = NULL;
 761         (**sp).next = NULL;
 762     }
 763 }
 764
 765 static void spec_destroy (spec **sp)
 766 {
 767     if (*sp)
 768     {
 769         spec_destroy (&(**sp).next);
 770         emit_destroy (&(**sp).m_emits);
 771         error_destroy (&(**sp).m_errtext);
 772         mem_free ((void **) &(**sp).m_string);
 773         cond_destroy (&(**sp).m_cond);
 774         mem_free ((void **) sp);
 775     }
 776 }
 777
 778 GRAMMAR_IMPLEMENT_LIST_APPEND(spec)
 779
 780 /*
 781     operator typedef
 782 */
 783 typedef enum oper_
 784 {
 785     op_none,
 786     op_and,
 787     op_or
 788 } oper;
 789
 790 /*
 791     rule typedef
 792 */
 793 typedef struct rule_
 794 {
 795     oper m_oper;
 796     spec *m_specs;
 797     struct rule_ *next;
 798     int m_referenced;
 799 } rule;
 800
 801 static void rule_create (rule **ru)
 802 {
 803     *ru = (rule *) mem_alloc (sizeof (rule));
 804     if (*ru)
 805     {
 806         (**ru).m_oper = op_none;
 807         (**ru).m_specs = NULL;
 808         (**ru).next = NULL;
 809         (**ru).m_referenced = 0;
 810     }
 811 }
 812
 813 static void rule_destroy (rule **ru)
 814 {
 815     if (*ru)
 816     {
 817         rule_destroy (&(**ru).next);
 818         spec_destroy (&(**ru).m_specs);
 819         mem_free ((void **) ru);
 820     }
 821 }
 822
 823 GRAMMAR_IMPLEMENT_LIST_APPEND(rule)
 824
 825 /*
 826     returns unique grammar id
 827 */
 828 static grammar next_valid_grammar_id (void)
 829 {
 830     static grammar id = 0;
 831
 832     return ++id;
 833 }
 834
 835 /*
 836     dictionary typedef
 837 */
 838 typedef struct dict_
 839 {
 840     rule *m_rulez;
 841     rule *m_syntax;
 842     rule *m_string;
 843     map_byte *m_regbytes;
 844     grammar m_id;
 845     struct dict_ *next;
 846 } dict;
 847
 848 static void dict_create (dict **di)
 849 {
 850     *di = (dict *) mem_alloc (sizeof (dict));
 851     if (*di)
 852     {
 853         (**di).m_rulez = NULL;
 854         (**di).m_syntax = NULL;
 855         (**di).m_string = NULL;
 856         (**di).m_regbytes = NULL;
 857         (**di).m_id = next_valid_grammar_id ();
 858         (**di).next = NULL;
 859     }
 860 }
 861
 862 static void dict_destroy (dict **di)
 863 {
 864     if (*di)
 865     {
 866         rule_destroy (&(**di).m_rulez);
 867         map_byte_destroy (&(**di).m_regbytes);
 868         mem_free ((void **) di);
 869     }
 870 }
 871
 872 GRAMMAR_IMPLEMENT_LIST_APPEND(dict)
 873
 874 static void dict_find (dict **di, grammar key, dict **data)
 875 {
 876     while (*di)
 877     {
 878         if ((**di).m_id == key)
 879         {
 880             *data = *di;
 881             return;
 882         }
 883
 884         di = &(**di).next;
 885     }
 886
 887     *data = NULL;
 888 }
 889
 890 static dict *g_dicts = NULL;
 891
 892 /*
 893     byte array typedef
 894 */
 895 typedef struct barray_
 896 {
 897     byte *data;
 898     unsigned int len;
 899 } barray;
 900
 901 static void barray_create (barray **ba)
 902 {
 903     *ba = (barray *) mem_alloc (sizeof (barray));
 904     if (*ba)
 905     {
 906         (**ba).data = NULL;
 907         (**ba).len = 0;
 908     }
 909 }
 910
 911 static void barray_destroy (barray **ba)
 912 {
 913     if (*ba)
 914     {
 915         mem_free ((void **) &(**ba).data);
 916         mem_free ((void **) ba);
 917     }
 918 }
 919
 920 /*
 921     reallocates byte array to requested size,
 922     returns 0 on success,
 923     returns 1 otherwise
 924 */
 925 static int barray_resize (barray **ba, unsigned int nlen)
 926 {
 927     byte *new_pointer;
 928
 929     if (nlen == 0)
 930     {
 931         mem_free ((void **) &(**ba).data);
 932         (**ba).data = NULL;
 933         (**ba).len = 0;
 934
 935         return 0;
 936     }
 937     else
 938     {
 939         new_pointer = (byte *) mem_realloc ((**ba).data, (**ba).len * sizeof (byte),
 940             nlen * sizeof (byte));
 941         if (new_pointer)
 942         {
 943             (**ba).data = new_pointer;
 944             (**ba).len = nlen;
 945
 946             return 0;
 947         }
 948     }
 949
 950     return 1;
 951 }
 952
 953 /*
 954     adds byte array pointed by *nb to the end of array pointed by *ba,
 955     returns 0 on success,
 956     returns 1 otherwise
 957 */
 958 static int barray_append (barray **ba, barray **nb)
 959 {
 960     const unsigned int len = (**ba).len;
 961
 962     if (barray_resize (ba, (**ba).len + (**nb).len))
 963         return 1;
 964
 965     mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
 966
 967     return 0;
 968 }
 969
 970 /*
 971     adds emit chain pointed by em to the end of array pointed by *ba,
 972     returns 0 on success,
 973     returns 1 otherwise
 974 */
 975 static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
 976 {
 977     unsigned int count = emit_size (em);
 978
 979     if (barray_resize (ba, (**ba).len + count))
 980         return 1;
 981
 982     return emit_push (em, (**ba).data + ((**ba).len - count), c, pos, rbc);
 983 }
 984
 985 /*
 986     byte pool typedef
 987 */
 988 typedef struct bytepool_
 989 {
 990     byte *_F;
 991     unsigned int _Siz;
 992 } bytepool;
 993
 994 static void bytepool_destroy (bytepool **by)
 995 {
 996     if (*by != NULL)
 997     {
 998         mem_free ((void **) &(**by)._F);
 999         mem_free ((void **) by);
1000     }
1001 }
1002
1003 static void bytepool_create (bytepool **by, int len)
1004 {
1005     *by = (bytepool *) (mem_alloc (sizeof (bytepool)));
1006     if (*by != NULL)
1007     {
1008         (**by)._F = (byte *) (mem_alloc (sizeof (byte) * len));
1009         (**by)._Siz = len;
1010
1011         if ((**by)._F == NULL)
1012             bytepool_destroy (by);
1013     }
1014 }
1015
1016 static int bytepool_reserve (bytepool *by, unsigned int _N)
1017 {
1018     byte *_P;
1019
1020     if (_N <= by->_Siz)
1021         return 0;
1022
1023     /* byte pool can only grow and at least by doubling its size */
1024     _N = _N >= by->_Siz * 2 ? _N : by->_Siz * 2;
1025
1026     /* reallocate the memory and adjust pointers to the new memory location */
1027     _P = (byte *) (mem_realloc (by->_F, sizeof (byte) * by->_Siz, sizeof (byte) * _N));
1028     if (_P != NULL)
1029     {
1030         by->_F = _P;
1031         by->_Siz = _N;
1032         return 0;
1033     }
1034
1035     return 1;
1036 }
1037
1038 /*
1039     string to string map typedef
1040 */
1041 typedef struct map_str_
1042 {
1043     byte *key;
1044     byte *data;
1045     struct map_str_ *next;
1046 } map_str;
1047
1048 static void map_str_create (map_str **ma)
1049 {
1050     *ma = (map_str *) mem_alloc (sizeof (map_str));
1051     if (*ma)
1052     {
1053         (**ma).key = NULL;
1054         (**ma).data = NULL;
1055         (**ma).next = NULL;
1056     }
1057 }
1058
1059 static void map_str_destroy (map_str **ma)
1060 {
1061     if (*ma)
1062     {
1063         map_str_destroy (&(**ma).next);
1064         mem_free ((void **) &(**ma).key);
1065         mem_free ((void **) &(**ma).data);
1066         mem_free ((void **) ma);
1067     }
1068 }
1069
1070 GRAMMAR_IMPLEMENT_LIST_APPEND(map_str)
1071
1072 /*
1073     searches the map for specified key,
1074     if the key is matched, *data is filled with data associated with the key,
1075     returns 0 if the key is matched,
1076     returns 1 otherwise
1077 */
1078 static int map_str_find (map_str **ma, const byte *key, byte **data)
1079 {
1080     while (*ma)
1081     {
1082         if (str_equal ((**ma).key, key))
1083         {
1084             *data = str_duplicate ((**ma).data);
1085             if (*data == NULL)
1086                 return 1;
1087
1088             return 0;
1089         }
1090
1091         ma = &(**ma).next;
1092     }
1093
1094     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1095     return 1;
1096 }
1097
1098 /*
1099     string to rule map typedef
1100 */
1101 typedef struct map_rule_
1102 {
1103     byte *key;
1104     rule *data;
1105     struct map_rule_ *next;
1106 } map_rule;
1107
1108 static void map_rule_create (map_rule **ma)
1109 {
1110     *ma = (map_rule *) mem_alloc (sizeof (map_rule));
1111     if (*ma)
1112     {
1113         (**ma).key = NULL;
1114         (**ma).data = NULL;
1115         (**ma).next = NULL;
1116     }
1117 }
1118
1119 static void map_rule_destroy (map_rule **ma)
1120 {
1121     if (*ma)
1122     {
1123         map_rule_destroy (&(**ma).next);
1124         mem_free ((void **) &(**ma).key);
1125         mem_free ((void **) ma);
1126     }
1127 }
1128
1129 GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule)
1130
1131 /*
1132     searches the map for specified key,
1133     if the key is matched, *data is filled with data associated with the key,
1134     returns 0 if the is matched,
1135     returns 1 otherwise
1136 */
1137 static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1138 {
1139     while (*ma)
1140     {
1141         if (str_equal ((**ma).key, key))
1142         {
1143             *data = (**ma).data;
1144
1145             return 0;
1146         }
1147
1148         ma = &(**ma).next;
1149     }
1150
1151     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1152     return 1;
1153 }
1154
1155 /*
1156     returns 1 if given character is a white space,
1157     returns 0 otherwise
1158 */
1159 static int is_space (byte c)
1160 {
1161     return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1162 }
1163
1164 /*
1165     advances text pointer by 1 if character pointed by *text is a space,
1166     returns 1 if a space has been eaten,
1167     returns 0 otherwise
1168 */
1169 static int eat_space (const byte **text)
1170 {
1171     if (is_space (**text))
1172     {
1173         (*text)++;
1174
1175         return 1;
1176     }
1177
1178     return 0;
1179 }
1180
1181 /*
1182     returns 1 if text points to C-style comment start string,
1183     returns 0 otherwise
1184 */
1185 static int is_comment_start (const byte *text)
1186 {
1187     return text[0] == '/' && text[1] == '*';
1188 }
1189
1190 /*
1191     advances text pointer to first character after C-style comment block - if any,
1192     returns 1 if C-style comment block has been encountered and eaten,
1193     returns 0 otherwise
1194 */
1195 static int eat_comment (const byte **text)
1196 {
1197     if (is_comment_start (*text))
1198     {
1199         /* *text points to comment block - skip two characters to enter comment body */
1200         *text += 2;
1201         /* skip any character except consecutive '*' and '/' */
1202         while (!((*text)[0] == '*' && (*text)[1] == '/'))
1203             (*text)++;
1204         /* skip those two terminating characters */
1205         *text += 2;
1206
1207         return 1;
1208     }
1209
1210     return 0;
1211 }
1212
1213 /*
1214     advances text pointer to first character that is neither space nor C-style comment block
1215 */
1216 static void eat_spaces (const byte **text)
1217 {
1218     while (eat_space (text) || eat_comment (text))
1219         ;
1220 }
1221
1222 /*
1223     resizes string pointed by *ptr to successfully add character c to the end of the string,
1224     returns 0 on success,
1225     returns 1 otherwise
1226 */
1227 static int string_grow (byte **ptr, unsigned int *len, byte c)
1228 {
1229     /* reallocate the string in 16-byte increments */
1230     if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1231     {
1232         byte *tmp = (byte *) mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
1233             ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1234         if (tmp == NULL)
1235             return 1;
1236
1237         *ptr = tmp;
1238     }
1239
1240     if (c)
1241     {
1242         /* append given character */
1243         (*ptr)[*len] = c;
1244         (*len)++;
1245     }
1246     (*ptr)[*len] = '\0';
1247
1248     return 0;
1249 }
1250
1251 /*
1252     returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1253     returns 0 otherwise
1254 */
1255 static int is_identifier (byte c)
1256 {
1257     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1258 }
1259
1260 /*
1261     copies characters from *text to *id until non-identifier character is encountered,
1262     assumes that *id points to NULL object - caller is responsible for later freeing the string,
1263     text pointer is advanced to point past the copied identifier,
1264     returns 0 if identifier was successfully copied,
1265     returns 1 otherwise
1266 */
1267 static int get_identifier (const byte **text, byte **id)
1268 {
1269     const byte *t = *text;
1270     byte *p = NULL;
1271     unsigned int len = 0;
1272
1273     if (string_grow (&p, &len, '\0'))
1274         return 1;
1275
1276     /* loop while next character in buffer is valid for identifiers */
1277     while (is_identifier (*t))
1278     {
1279         if (string_grow (&p, &len, *t++))
1280         {
1281             mem_free ((void **) (void *) &p);
1282             return 1;
1283         }
1284     }
1285
1286     *text = t;
1287     *id = p;
1288
1289     return 0;
1290 }
1291
1292 /*
1293     converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1294     advances text pointer past the converted sequence,
1295     returns the converted value
1296 */
1297 static unsigned int dec_convert (const byte **text)
1298 {
1299     unsigned int value = 0;
1300
1301     while (**text >= '0' && **text <= '9')
1302     {
1303         value = value * 10 + **text - '0';
1304         (*text)++;
1305     }
1306
1307     return value;
1308 }
1309
1310 /*
1311     returns 1 if given character is HEX digit 0-9, A-F or a-f,
1312     returns 0 otherwise
1313 */
1314 static int is_hex (byte c)
1315 {
1316     return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1317 }
1318
1319 /*
1320     returns value of passed character as if it was HEX digit
1321 */
1322 static unsigned int hex2dec (byte c)
1323 {
1324     if (c >= '0' && c <= '9')
1325         return c - '0';
1326     if (c >= 'A' && c <= 'F')
1327         return c - 'A' + 10;
1328     return c - 'a' + 10;
1329 }
1330
1331 /*
1332     converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1333     advances text pointer past the converted sequence,
1334     returns the converted value
1335 */
1336 static unsigned int hex_convert (const byte **text)
1337 {
1338     unsigned int value = 0;
1339
1340     while (is_hex (**text))
1341     {
1342         value = value * 0x10 + hex2dec (**text);
1343         (*text)++;
1344     }
1345
1346     return value;
1347 }
1348
1349 /*
1350     returns 1 if given character is OCT digit 0-7,
1351     returns 0 otherwise
1352 */
1353 static int is_oct (byte c)
1354 {
1355     return c >= '0' && c <= '7';
1356 }
1357
1358 /*
1359     returns value of passed character as if it was OCT digit
1360 */
1361 static int oct2dec (byte c)
1362 {
1363     return c - '0';
1364 }
1365
1366 static byte get_escape_sequence (const byte **text)
1367 {
1368     int value = 0;
1369
1370     /* skip '\' character */
1371     (*text)++;
1372
1373     switch (*(*text)++)
1374     {
1375     case '\'':
1376         return '\'';
1377     case '"':
1378         return '\"';
1379     case '?':
1380         return '\?';
1381     case '\\':
1382         return '\\';
1383     case 'a':
1384         return '\a';
1385     case 'b':
1386         return '\b';
1387     case 'f':
1388         return '\f';
1389     case 'n':
1390         return '\n';
1391     case 'r':
1392         return '\r';
1393     case 't':
1394         return '\t';
1395     case 'v':
1396         return '\v';
1397     case 'x':
1398         return (byte) hex_convert (text);
1399     }
1400
1401     (*text)--;
1402     if (is_oct (**text))
1403     {
1404         value = oct2dec (*(*text)++);
1405         if (is_oct (**text))
1406         {
1407             value = value * 010 + oct2dec (*(*text)++);
1408             if (is_oct (**text))
1409                 value = value * 010 + oct2dec (*(*text)++);
1410         }
1411     }
1412
1413     return (byte) value;
1414 }
1415
1416 /*
1417     copies characters from *text to *str until " or ' character is encountered,
1418     assumes that *str points to NULL object - caller is responsible for later freeing the string,
1419     assumes that *text points to " or ' character that starts the string,
1420     text pointer is advanced to point past the " or ' character,
1421     returns 0 if string was successfully copied,
1422     returns 1 otherwise
1423 */
1424 static int get_string (const byte **text, byte **str)
1425 {
1426     const byte *t = *text;
1427     byte *p = NULL;
1428     unsigned int len = 0;
1429     byte term_char;
1430
1431     if (string_grow (&p, &len, '\0'))
1432         return 1;
1433
1434     /* read " or ' character that starts the string */
1435     term_char = *t++;
1436     /* while next character is not the terminating character */
1437     while (*t && *t != term_char)
1438     {
1439         byte c;
1440
1441         if (*t == '\\')
1442             c = get_escape_sequence (&t);
1443         else
1444             c = *t++;
1445
1446         if (string_grow (&p, &len, c))
1447         {
1448             mem_free ((void **) (void *) &p);
1449             return 1;
1450         }
1451     }
1452     /* skip " or ' character that ends the string */
1453     t++;
1454
1455     *text = t;
1456     *str = p;
1457     return 0;
1458 }
1459
1460 /*
1461     gets emit code, the syntax is:
1462     ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1463     assumes that *text already points to <symbol>,
1464     returns 0 if emit code is successfully read,
1465     returns 1 otherwise
1466 */
1467 static int get_emtcode (const byte **text, map_byte **ma)
1468 {
1469     const byte *t = *text;
1470     map_byte *m = NULL;
1471
1472     map_byte_create (&m);
1473     if (m == NULL)
1474         return 1;
1475
1476     if (get_identifier (&t, &m->key))
1477     {
1478         map_byte_destroy (&m);
1479         return 1;
1480     }
1481     eat_spaces (&t);
1482
1483     if (*t == '\'')
1484     {
1485         byte *c;
1486
1487         if (get_string (&t, &c))
1488         {
1489             map_byte_destroy (&m);
1490             return 1;
1491         }
1492
1493         m->data = (byte) c[0];
1494         mem_free ((void **) (void *) &c);
1495     }
1496     else if (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))
1497     {
1498         /* skip HEX "0x" or "0X" prefix */
1499         t += 2;
1500         m->data = (byte) hex_convert (&t);
1501     }
1502     else
1503     {
1504         m->data = (byte) dec_convert (&t);
1505     }
1506
1507     eat_spaces (&t);
1508
1509     *text = t;
1510     *ma = m;
1511     return 0;
1512 }
1513
1514 /*
1515     gets regbyte declaration, the syntax is:
1516     ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1517     assumes that *text already points to <symbol>,
1518     returns 0 if regbyte is successfully read,
1519     returns 1 otherwise
1520 */
1521 static int get_regbyte (const byte **text, map_byte **ma)
1522 {
1523     /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
1524     return get_emtcode (text, ma);
1525 }
1526
1527 /*
1528     returns 0 on success,
1529     returns 1 otherwise
1530 */
1531 static int get_errtext (const byte **text, map_str **ma)
1532 {
1533     const byte *t = *text;
1534     map_str *m = NULL;
1535
1536     map_str_create (&m);
1537     if (m == NULL)
1538         return 1;
1539
1540     if (get_identifier (&t, &m->key))
1541     {
1542         map_str_destroy (&m);
1543         return 1;
1544     }
1545     eat_spaces (&t);
1546
1547     if (get_string (&t, &m->data))
1548     {
1549         map_str_destroy (&m);
1550         return 1;
1551     }
1552     eat_spaces (&t);
1553
1554     *text = t;
1555     *ma = m;
1556     return 0;
1557 }
1558
1559 /*
1560     returns 0 on success,
1561     returns 1 otherwise,
1562 */
1563 static int get_error (const byte **text, error **er, map_str *maps)
1564 {
1565     const byte *t = *text;
1566     byte *temp = NULL;
1567
1568     if (*t != '.')
1569         return 0;
1570
1571     t++;
1572     if (get_identifier (&t, &temp))
1573         return 1;
1574     eat_spaces (&t);
1575
1576     if (!str_equal ((byte *) "error", temp))
1577     {
1578         mem_free ((void **) (void *) &temp);
1579         return 0;
1580     }
1581
1582     mem_free ((void **) (void *) &temp);
1583
1584     error_create (er);
1585     if (*er == NULL)
1586         return 1;
1587
1588     if (*t == '\"')
1589     {
1590         if (get_string (&t, &(**er).m_text))
1591         {
1592             error_destroy (er);
1593             return 1;
1594         }
1595         eat_spaces (&t);
1596     }
1597     else
1598     {
1599         if (get_identifier (&t, &temp))
1600         {
1601             error_destroy (er);
1602             return 1;
1603         }
1604         eat_spaces (&t);
1605
1606         if (map_str_find (&maps, temp, &(**er).m_text))
1607         {
1608             mem_free ((void **) (void *) &temp);
1609             error_destroy (er);
1610             return 1;
1611         }
1612
1613         mem_free ((void **) (void *) &temp);
1614     }
1615
1616     /* try to extract "token" from "...$token$..." */
1617     {
1618         byte *processed = NULL;
1619         unsigned int len = 0, i = 0;
1620
1621         if (string_grow (&processed, &len, '\0'))
1622         {
1623             error_destroy (er);
1624             return 1;
1625         }
1626
1627         while (i < str_length ((**er).m_text))
1628         {
1629             /* check if the dollar sign is repeated - if so skip it */
1630             if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1631             {
1632                 if (string_grow (&processed, &len, '$'))
1633                 {
1634                     mem_free ((void **) (void *) &processed);
1635                     error_destroy (er);
1636                     return 1;
1637                 }
1638
1639                 i += 2;
1640             }
1641             else if ((**er).m_text[i] != '$')
1642             {
1643                 if (string_grow (&processed, &len, (**er).m_text[i]))
1644                 {
1645                     mem_free ((void **) (void *) &processed);
1646                     error_destroy (er);
1647                     return 1;
1648                 }
1649
1650                 i++;
1651             }
1652             else
1653             {
1654                 if (string_grow (&processed, &len, '$'))
1655                 {
1656                     mem_free ((void **) (void *) &processed);
1657                     error_destroy (er);
1658                     return 1;
1659                 }
1660
1661                 {
1662                     /* length of token being extracted */
1663                     unsigned int tlen = 0;
1664
1665                     if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1666                     {
1667                         mem_free ((void **) (void *) &processed);
1668                         error_destroy (er);
1669                         return 1;
1670                     }
1671
1672                     /* skip the dollar sign */
1673                     i++;
1674
1675                     while ((**er).m_text[i] != '$')
1676                     {
1677                         if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1678                         {
1679                             mem_free ((void **) (void *) &processed);
1680                             error_destroy (er);
1681                             return 1;
1682                         }
1683
1684                         i++;
1685                     }
1686
1687                     /* skip the dollar sign */
1688                     i++;
1689                 }
1690             }
1691         }
1692
1693         mem_free ((void **) &(**er).m_text);
1694         (**er).m_text = processed;
1695     }
1696
1697     *text = t;
1698     return 0;
1699 }
1700
1701 /*
1702     returns 0 on success,
1703     returns 1 otherwise,
1704 */
1705 static int get_emits (const byte **text, emit **em, map_byte *mapb)
1706 {
1707     const byte *t = *text;
1708     byte *temp = NULL;
1709     emit *e = NULL;
1710     emit_dest dest;
1711
1712     if (*t != '.')
1713         return 0;
1714
1715     t++;
1716     if (get_identifier (&t, &temp))
1717         return 1;
1718     eat_spaces (&t);
1719
1720     /* .emit */
1721     if (str_equal ((byte *) "emit", temp))
1722         dest = ed_output;
1723     /* .load */
1724     else if (str_equal ((byte *) "load", temp))
1725         dest = ed_regbyte;
1726     else
1727     {
1728         mem_free ((void **) (void *) &temp);
1729         return 0;
1730     }
1731
1732     mem_free ((void **) (void *) &temp);
1733
1734     emit_create (&e);
1735     if (e == NULL)
1736         return 1;
1737
1738     e->m_emit_dest = dest;
1739
1740     if (dest == ed_regbyte)
1741     {
1742         if (get_identifier (&t, &e->m_regname))
1743         {
1744             emit_destroy (&e);
1745             return 1;
1746         }
1747         eat_spaces (&t);
1748     }
1749
1750     /* 0xNN */
1751     if (*t == '0' && (t[1] == 'x' || t[1] == 'X'))
1752     {
1753         t += 2;
1754         e->m_byte = (byte) hex_convert (&t);
1755
1756         e->m_emit_type = et_byte;
1757     }
1758     /* NNN */
1759     else if (*t >= '0' && *t <= '9')
1760     {
1761         e->m_byte = (byte) dec_convert (&t);
1762
1763         e->m_emit_type = et_byte;
1764     }
1765     /* * */
1766     else if (*t == '*')
1767     {
1768         t++;
1769
1770         e->m_emit_type = et_stream;
1771     }
1772     /* $ */
1773     else if (*t == '$')
1774     {
1775         t++;
1776
1777         e->m_emit_type = et_position;
1778     }
1779     /* 'c' */
1780     else if (*t == '\'')
1781     {
1782         if (get_string (&t, &temp))
1783         {
1784             emit_destroy (&e);
1785             return 1;
1786         }
1787         e->m_byte = (byte) temp[0];
1788
1789         mem_free ((void **) (void *) &temp);
1790
1791         e->m_emit_type = et_byte;
1792     }
1793     else
1794     {
1795         if (get_identifier (&t, &temp))
1796         {
1797             emit_destroy (&e);
1798             return 1;
1799         }
1800
1801         if (map_byte_find (&mapb, temp, &e->m_byte))
1802         {
1803             mem_free ((void **) (void *) &temp);
1804             emit_destroy (&e);
1805             return 1;
1806         }
1807
1808         mem_free ((void **) (void *) &temp);
1809
1810         e->m_emit_type = et_byte;
1811     }
1812
1813     eat_spaces (&t);
1814
1815     if (get_emits (&t, &e->m_next, mapb))
1816     {
1817         emit_destroy (&e);
1818         return 1;
1819     }
1820
1821     *text = t;
1822     *em = e;
1823     return 0;
1824 }
1825
1826 /*
1827     returns 0 on success,
1828     returns 1 otherwise,
1829 */
1830 static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1831 {
1832     const byte *t = *text;
1833     spec *s = NULL;
1834
1835     spec_create (&s);
1836     if (s == NULL)
1837         return 1;
1838
1839     /* first - read optional .if statement */
1840     if (*t == '.')
1841     {
1842         const byte *u = t;
1843         byte *keyword = NULL;
1844
1845         /* skip the dot */
1846         u++;
1847
1848         if (get_identifier (&u, &keyword))
1849         {
1850             spec_destroy (&s);
1851             return 1;
1852         }
1853
1854         /* .if */
1855         if (str_equal ((byte *) "if", keyword))
1856         {
1857             cond_create (&s->m_cond);
1858             if (s->m_cond == NULL)
1859             {
1860                 spec_destroy (&s);
1861                 return 1;
1862             }
1863
1864             /* skip the left paren */
1865             eat_spaces (&u);
1866             u++;
1867
1868             /* get the left operand */
1869             eat_spaces (&u);
1870             if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1871             {
1872                 spec_destroy (&s);
1873                 return 1;
1874             }
1875             s->m_cond->m_operands[0].m_type = cot_regbyte;
1876
1877             /* get the operator (!= or ==) */
1878             eat_spaces (&u);
1879             if (*u == '!')
1880                 s->m_cond->m_type = ct_not_equal;
1881             else
1882                 s->m_cond->m_type = ct_equal;
1883             u += 2;
1884             eat_spaces (&u);
1885
1886             if (u[0] == '0' && (u[1] == 'x' || u[1] == 'X'))
1887             {
1888                 /* skip the 0x prefix */
1889                 u += 2;
1890
1891                 /* get the right operand */
1892                 s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1893                 s->m_cond->m_operands[1].m_type = cot_byte;
1894             }
1895             else /*if (*u >= '0' && *u <= '9')*/
1896             {
1897                 /* get the right operand */
1898                 s->m_cond->m_operands[1].m_byte = dec_convert (&u);
1899                 s->m_cond->m_operands[1].m_type = cot_byte;
1900             }
1901
1902             /* skip the right paren */
1903             eat_spaces (&u);
1904             u++;
1905
1906             eat_spaces (&u);
1907
1908             t = u;
1909         }
1910
1911         mem_free ((void **) (void *) &keyword);
1912     }
1913
1914     if (*t == '\'')
1915     {
1916         byte *temp = NULL;
1917
1918         if (get_string (&t, &temp))
1919         {
1920             spec_destroy (&s);
1921             return 1;
1922         }
1923         eat_spaces (&t);
1924
1925         if (*t == '-')
1926         {
1927             byte *temp2 = NULL;
1928
1929             /* skip the '-' character */
1930             t++;
1931             eat_spaces (&t);
1932
1933             if (get_string (&t, &temp2))
1934             {
1935                 mem_free ((void **) (void *) &temp);
1936                 spec_destroy (&s);
1937                 return 1;
1938             }
1939             eat_spaces (&t);
1940
1941             s->m_spec_type = st_byte_range;
1942             s->m_byte[0] = *temp;
1943             s->m_byte[1] = *temp2;
1944
1945             mem_free ((void **) (void *) &temp2);
1946         }
1947         else
1948         {
1949             s->m_spec_type = st_byte;
1950             *s->m_byte = *temp;
1951         }
1952
1953         mem_free ((void **) (void *) &temp);
1954     }
1955     else if (*t == '"')
1956     {
1957         if (get_string (&t, &s->m_string))
1958         {
1959             spec_destroy (&s);
1960             return 1;
1961         }
1962         eat_spaces (&t);
1963
1964         s->m_spec_type = st_string;
1965     }
1966     else if (*t == '.')
1967     {
1968         byte *keyword = NULL;
1969
1970         /* skip the dot */
1971         t++;
1972
1973         if (get_identifier (&t, &keyword))
1974         {
1975             spec_destroy (&s);
1976             return 1;
1977         }
1978         eat_spaces (&t);
1979
1980         /* .true */
1981         if (str_equal ((byte *) "true", keyword))
1982         {
1983             s->m_spec_type = st_true;
1984         }
1985         /* .false */
1986         else if (str_equal ((byte *) "false", keyword))
1987         {
1988             s->m_spec_type = st_false;
1989         }
1990         /* .debug */
1991         else if (str_equal ((byte *) "debug", keyword))
1992         {
1993             s->m_spec_type = st_debug;
1994         }
1995         /* .loop */
1996         else if (str_equal ((byte *) "loop", keyword))
1997         {
1998             if (get_identifier (&t, &s->m_string))
1999             {
2000                 mem_free ((void **) (void *) &keyword);
2001                 spec_destroy (&s);
2002                 return 1;
2003             }
2004             eat_spaces (&t);
2005
2006             s->m_spec_type = st_identifier_loop;
2007         }
2008         mem_free ((void **) (void *) &keyword);
2009     }
2010     else
2011     {
2012         if (get_identifier (&t, &s->m_string))
2013         {
2014             spec_destroy (&s);
2015             return 1;
2016         }
2017         eat_spaces (&t);
2018
2019         s->m_spec_type = st_identifier;
2020     }
2021
2022     if (get_error (&t, &s->m_errtext, maps))
2023     {
2024         spec_destroy (&s);
2025         return 1;
2026     }
2027
2028     if (get_emits (&t, &s->m_emits, mapb))
2029     {
2030         spec_destroy (&s);
2031         return 1;
2032     }
2033
2034     *text = t;
2035     *sp = s;
2036     return 0;
2037 }
2038
2039 /*
2040     returns 0 on success,
2041     returns 1 otherwise,
2042 */
2043 static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
2044 {
2045     const byte *t = *text;
2046     rule *r = NULL;
2047
2048     rule_create (&r);
2049     if (r == NULL)
2050         return 1;
2051
2052     if (get_spec (&t, &r->m_specs, maps, mapb))
2053     {
2054         rule_destroy (&r);
2055         return 1;
2056     }
2057
2058     while (*t != ';')
2059     {
2060         byte *op = NULL;
2061         spec *sp = NULL;
2062
2063         /* skip the dot that precedes "and" or "or" */
2064         t++;
2065
2066         /* read "and" or "or" keyword */
2067         if (get_identifier (&t, &op))
2068         {
2069             rule_destroy (&r);
2070             return 1;
2071         }
2072         eat_spaces (&t);
2073
2074         if (r->m_oper == op_none)
2075         {
2076             /* .and */
2077             if (str_equal ((byte *) "and", op))
2078                 r->m_oper = op_and;
2079             /* .or */
2080             else
2081                 r->m_oper = op_or;
2082         }
2083
2084         mem_free ((void **) (void *) &op);
2085
2086         if (get_spec (&t, &sp, maps, mapb))
2087         {
2088             rule_destroy (&r);
2089             return 1;
2090         }
2091
2092         spec_append (&r->m_specs, sp);
2093     }
2094
2095     /* skip the semicolon */
2096     t++;
2097     eat_spaces (&t);
2098
2099     *text = t;
2100     *ru = r;
2101     return 0;
2102 }
2103
2104 /*
2105     returns 0 on success,
2106     returns 1 otherwise,
2107 */
2108 static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2109 {
2110     if (map_rule_find (&mapr, symbol, ru))
2111         return 1;
2112
2113     (**ru).m_referenced = 1;
2114
2115     return 0;
2116 }
2117
2118 /*
2119     returns 0 on success,
2120     returns 1 otherwise,
2121 */
2122 static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2123     byte **string_symbol, map_byte *regbytes)
2124 {
2125     rule *rulez = di->m_rulez;
2126
2127     /* update dependecies for the root and lexer symbols */
2128     if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2129         (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2130         return 1;
2131
2132     mem_free ((void **) syntax_symbol);
2133     mem_free ((void **) string_symbol);
2134
2135     /* update dependecies for the rest of the rules */
2136     while (rulez)
2137     {
2138         spec *sp = rulez->m_specs;
2139
2140         /* iterate through all the specifiers */
2141         while (sp)
2142         {
2143             /* update dependency for identifier */
2144             if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2145             {
2146                 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2147                     return 1;
2148
2149                 mem_free ((void **) &sp->m_string);
2150             }
2151
2152             /* some errtexts reference to a rule */
2153             if (sp->m_errtext && sp->m_errtext->m_token_name)
2154             {
2155                 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2156                     return 1;
2157
2158                 mem_free ((void **) &sp->m_errtext->m_token_name);
2159             }
2160
2161             /* update dependency for condition */
2162             if (sp->m_cond)
2163             {
2164                 int i;
2165                 for (i = 0; i < 2; i++)
2166                     if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2167                     {
2168                         sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2169                             sp->m_cond->m_operands[i].m_regname);
2170
2171                         if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2172                             return 1;
2173
2174                         mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2175                     }
2176             }
2177
2178             /* update dependency for all .load instructions */
2179             if (sp->m_emits)
2180             {
2181                 emit *em = sp->m_emits;
2182                 while (em != NULL)
2183                 {
2184                     if (em->m_emit_dest == ed_regbyte)
2185                     {
2186                         em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2187
2188                         if (em->m_regbyte == NULL)
2189                             return 1;
2190
2191                         mem_free ((void **) &em->m_regname);
2192                     }
2193
2194                     em = em->m_next;
2195                 }
2196             }
2197
2198             sp = sp->next;
2199         }
2200
2201         rulez = rulez->next;
2202     }
2203
2204     /* check for unreferenced symbols */
2205     rulez = di->m_rulez;
2206     while (rulez != NULL)
2207     {
2208         if (!rulez->m_referenced)
2209         {
2210             map_rule *ma = mapr;
2211             while (ma)
2212             {
2213                 if (ma->data == rulez)
2214                 {
2215                     set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1);
2216                     return 1;
2217                 }
2218                 ma = ma->next;
2219             }
2220         }
2221         rulez = rulez->next;
2222     }
2223
2224     return 0;
2225 }
2226
2227 static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2228 {
2229     byte values[2];
2230     int i;
2231
2232     if (co == NULL)
2233         return 1;
2234
2235     for (i = 0; i < 2; i++)
2236         switch (co->m_operands[i].m_type)
2237         {
2238         case cot_byte:
2239             values[i] = co->m_operands[i].m_byte;
2240             break;
2241         case cot_regbyte:
2242             values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2243             break;
2244         }
2245
2246     switch (co->m_type)
2247     {
2248     case ct_equal:
2249         return values[0] == values[1];
2250     case ct_not_equal:
2251         return values[0] != values[1];
2252     }
2253
2254     return 0;
2255 }
2256
2257 static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2258 {
2259     while (top != limit)
2260     {
2261         regbyte_ctx *rbc = top->m_prev;
2262         regbyte_ctx_destroy (&top);
2263         top = rbc;
2264     }
2265 }
2266
2267 typedef enum match_result_
2268 {
2269     mr_not_matched,     /* the examined string does not match */
2270     mr_matched,         /* the examined string matches */
2271     mr_error_raised,    /* mr_not_matched + error has been raised */
2272     mr_dont_emit,       /* used by identifier loops only */
2273     mr_internal_error   /* an internal error has occured such as out of memory */
2274 } match_result;
2275
2276 /*
2277     This function does the main job. It parses the text and generates output data.
2278 */
2279 static match_result match (dict *di, const byte *text, unsigned int *index, rule *ru, barray **ba,
2280     int filtering_string, regbyte_ctx **rbc)
2281 {
2282     unsigned int ind = *index;
2283     match_result status = mr_not_matched;
2284     spec *sp = ru->m_specs;
2285     regbyte_ctx *ctx = *rbc;
2286
2287     /* for every specifier in the rule */
2288     while (sp)
2289     {
2290         unsigned int i, len, save_ind = ind;
2291         barray *array = NULL;
2292
2293         if (satisfies_condition (sp->m_cond, ctx))
2294         {
2295             switch (sp->m_spec_type)
2296             {
2297             case st_identifier:
2298                 barray_create (&array);
2299                 if (array == NULL)
2300                 {
2301                     free_regbyte_ctx_stack (ctx, *rbc);
2302                     return mr_internal_error;
2303                 }
2304
2305                 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2306
2307                 if (status == mr_internal_error)
2308                 {
2309                     free_regbyte_ctx_stack (ctx, *rbc);
2310                     barray_destroy (&array);
2311                     return mr_internal_error;
2312                 }
2313                 break;
2314             case st_string:
2315                 len = str_length (sp->m_string);
2316
2317                 /* prefilter the stream */
2318                 if (!filtering_string && di->m_string)
2319                 {
2320                     barray *ba;
2321                     unsigned int filter_index = 0;
2322                     match_result result;
2323                     regbyte_ctx *null_ctx = NULL;
2324
2325                     barray_create (&ba);
2326                     if (ba == NULL)
2327                     {
2328                         free_regbyte_ctx_stack (ctx, *rbc);
2329                         return mr_internal_error;
2330                     }
2331
2332                     result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2333
2334                     if (result == mr_internal_error)
2335                     {
2336                         free_regbyte_ctx_stack (ctx, *rbc);
2337                         barray_destroy (&ba);
2338                         return mr_internal_error;
2339                     }
2340
2341                     if (result != mr_matched)
2342                     {
2343                         barray_destroy (&ba);
2344                         status = mr_not_matched;
2345                         break;
2346                     }
2347
2348                     barray_destroy (&ba);
2349
2350                     if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2351                     {
2352                         status = mr_not_matched;
2353                         break;
2354                     }
2355
2356                     status = mr_matched;
2357                     ind += len;
2358                 }
2359                 else
2360                 {
2361                     status = mr_matched;
2362                     for (i = 0; status == mr_matched && i < len; i++)
2363                         if (text[ind + i] != sp->m_string[i])
2364                             status = mr_not_matched;
2365
2366                     if (status == mr_matched)
2367                         ind += len;
2368                 }
2369                 break;
2370             case st_byte:
2371                 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2372                 if (status == mr_matched)
2373                     ind++;
2374                 break;
2375             case st_byte_range:
2376                 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2377                     mr_matched : mr_not_matched;
2378                 if (status == mr_matched)
2379                     ind++;
2380                 break;
2381             case st_true:
2382                 status = mr_matched;
2383                 break;
2384             case st_false:
2385                 status = mr_not_matched;
2386                 break;
2387             case st_debug:
2388                 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2389                 break;
2390             case st_identifier_loop:
2391                 barray_create (&array);
2392                 if (array == NULL)
2393                 {
2394                     free_regbyte_ctx_stack (ctx, *rbc);
2395                     return mr_internal_error;
2396                 }
2397
2398                 status = mr_dont_emit;
2399                 for (;;)
2400                 {
2401                     match_result result;
2402
2403                     save_ind = ind;
2404                     result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2405
2406                     if (result == mr_error_raised)
2407                     {
2408                         status = result;
2409                         break;
2410                     }
2411                     else if (result == mr_matched)
2412                     {
2413                         if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2414                             barray_append (ba, &array))
2415                         {
2416                             free_regbyte_ctx_stack (ctx, *rbc);
2417                             barray_destroy (&array);
2418                             return mr_internal_error;
2419                         }
2420                         barray_destroy (&array);
2421                         barray_create (&array);
2422                         if (array == NULL)
2423                         {
2424                             free_regbyte_ctx_stack (ctx, *rbc);
2425                             return mr_internal_error;
2426                         }
2427                     }
2428                     else if (result == mr_internal_error)
2429                     {
2430                         free_regbyte_ctx_stack (ctx, *rbc);
2431                         barray_destroy (&array);
2432                         return mr_internal_error;
2433                     }
2434                     else
2435                         break;
2436                 }
2437                 break;
2438             }
2439         }
2440         else
2441         {
2442             status = mr_not_matched;
2443         }
2444
2445         if (status == mr_error_raised)
2446         {
2447             free_regbyte_ctx_stack (ctx, *rbc);
2448             barray_destroy (&array);
2449
2450             return mr_error_raised;
2451         }
2452
2453         if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2454         {
2455             free_regbyte_ctx_stack (ctx, *rbc);
2456             barray_destroy (&array);
2457
2458             if (sp->m_errtext)
2459             {
2460                 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2461                     ind), ind);
2462
2463                 return mr_error_raised;
2464             }
2465
2466             return mr_not_matched;
2467         }
2468
2469         if (status == mr_matched)
2470         {
2471             if (sp->m_emits)
2472                 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2473                 {
2474                     free_regbyte_ctx_stack (ctx, *rbc);
2475                     barray_destroy (&array);
2476                     return mr_internal_error;
2477                 }
2478
2479             if (array)
2480                 if (barray_append (ba, &array))
2481                 {
2482                     free_regbyte_ctx_stack (ctx, *rbc);
2483                     barray_destroy (&array);
2484                     return mr_internal_error;
2485                 }
2486         }
2487
2488         barray_destroy (&array);
2489
2490         /* if the rule operator is a logical or, we pick up the first matching specifier */
2491         if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2492         {
2493             *index = ind;
2494             *rbc = ctx;
2495             return mr_matched;
2496         }
2497
2498         sp = sp->next;
2499     }
2500
2501     /* everything went fine - all specifiers match up */
2502     if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2503     {
2504         *index = ind;
2505         *rbc = ctx;
2506         return mr_matched;
2507     }
2508
2509     free_regbyte_ctx_stack (ctx, *rbc);
2510     return mr_not_matched;
2511 }
2512
2513 static match_result fast_match (dict *di, const byte *text, unsigned int *index, rule *ru, int *_PP, bytepool *_BP,
2514     int filtering_string, regbyte_ctx **rbc)
2515 {
2516     unsigned int ind = *index;
2517     int _P = filtering_string ? 0 : *_PP;
2518     int _P2;
2519     match_result status = mr_not_matched;
2520     spec *sp = ru->m_specs;
2521     regbyte_ctx *ctx = *rbc;
2522
2523     /* for every specifier in the rule */
2524     while (sp)
2525     {
2526         unsigned int i, len, save_ind = ind;
2527
2528         _P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0);
2529         if (bytepool_reserve (_BP, _P2))
2530         {
2531             free_regbyte_ctx_stack (ctx, *rbc);
2532             return mr_internal_error;
2533         }
2534
2535         if (satisfies_condition (sp->m_cond, ctx))
2536         {
2537             switch (sp->m_spec_type)
2538             {
2539             case st_identifier:
2540                 status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2541
2542                 if (status == mr_internal_error)
2543                 {
2544                     free_regbyte_ctx_stack (ctx, *rbc);
2545                     return mr_internal_error;
2546                 }
2547                 break;
2548             case st_string:
2549                 len = str_length (sp->m_string);
2550
2551                 /* prefilter the stream */
2552                 if (!filtering_string && di->m_string)
2553                 {
2554                     unsigned int filter_index = 0;
2555                     match_result result;
2556                     regbyte_ctx *null_ctx = NULL;
2557
2558                     result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx);
2559
2560                     if (result == mr_internal_error)
2561                     {
2562                         free_regbyte_ctx_stack (ctx, *rbc);
2563                         return mr_internal_error;
2564                     }
2565
2566                     if (result != mr_matched)
2567                     {
2568                         status = mr_not_matched;
2569                         break;
2570                     }
2571
2572                     if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2573                     {
2574                         status = mr_not_matched;
2575                         break;
2576                     }
2577
2578                     status = mr_matched;
2579                     ind += len;
2580                 }
2581                 else
2582                 {
2583                     status = mr_matched;
2584                     for (i = 0; status == mr_matched && i < len; i++)
2585                         if (text[ind + i] != sp->m_string[i])
2586                             status = mr_not_matched;
2587
2588                     if (status == mr_matched)
2589                         ind += len;
2590                 }
2591                 break;
2592             case st_byte:
2593                 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2594                 if (status == mr_matched)
2595                     ind++;
2596                 break;
2597             case st_byte_range:
2598                 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2599                     mr_matched : mr_not_matched;
2600                 if (status == mr_matched)
2601                     ind++;
2602                 break;
2603             case st_true:
2604                 status = mr_matched;
2605                 break;
2606             case st_false:
2607                 status = mr_not_matched;
2608                 break;
2609             case st_debug:
2610                 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2611                 break;
2612             case st_identifier_loop:
2613                 status = mr_dont_emit;
2614                 for (;;)
2615                 {
2616                     match_result result;
2617
2618                     save_ind = ind;
2619                     result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2620
2621                     if (result == mr_error_raised)
2622                     {
2623                         status = result;
2624                         break;
2625                     }
2626                     else if (result == mr_matched)
2627                     {
2628                         if (!filtering_string)
2629                         {
2630                             if (sp->m_emits != NULL)
2631                             {
2632                                 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2633                                 {
2634                                     free_regbyte_ctx_stack (ctx, *rbc);
2635                                     return mr_internal_error;
2636                                 }
2637                             }
2638
2639                             _P = _P2;
2640                             _P2 += sp->m_emits ? emit_size (sp->m_emits) : 0;
2641                             if (bytepool_reserve (_BP, _P2))
2642                             {
2643                                 free_regbyte_ctx_stack (ctx, *rbc);
2644                                 return mr_internal_error;
2645                             }
2646                         }
2647                     }
2648                     else if (result == mr_internal_error)
2649                     {
2650                         free_regbyte_ctx_stack (ctx, *rbc);
2651                         return mr_internal_error;
2652                     }
2653                     else
2654                         break;
2655                 }
2656                 break;
2657             }
2658         }
2659         else
2660         {
2661             status = mr_not_matched;
2662         }
2663
2664         if (status == mr_error_raised)
2665         {
2666             free_regbyte_ctx_stack (ctx, *rbc);
2667
2668             return mr_error_raised;
2669         }
2670
2671         if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2672         {
2673             free_regbyte_ctx_stack (ctx, *rbc);
2674
2675             if (sp->m_errtext)
2676             {
2677                 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2678                     ind), ind);
2679
2680                 return mr_error_raised;
2681             }
2682
2683             return mr_not_matched;
2684         }
2685
2686         if (status == mr_matched)
2687         {
2688             if (sp->m_emits != NULL)
2689                 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2690                 {
2691                     free_regbyte_ctx_stack (ctx, *rbc);
2692                     return mr_internal_error;
2693                 }
2694
2695             _P = _P2;
2696         }
2697
2698         /* if the rule operator is a logical or, we pick up the first matching specifier */
2699         if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2700         {
2701             *index = ind;
2702             *rbc = ctx;
2703             if (!filtering_string)
2704                 *_PP = _P;
2705             return mr_matched;
2706         }
2707
2708         sp = sp->next;
2709     }
2710
2711     /* everything went fine - all specifiers match up */
2712     if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2713     {
2714         *index = ind;
2715         *rbc = ctx;
2716         if (!filtering_string)
2717             *_PP = _P;
2718         return mr_matched;
2719     }
2720
2721     free_regbyte_ctx_stack (ctx, *rbc);
2722     return mr_not_matched;
2723 }
2724
2725 static byte *error_get_token (error *er, dict *di, const byte *text, unsigned int ind)
2726 {
2727     byte *str = NULL;
2728
2729     if (er->m_token)
2730     {
2731         barray *ba;
2732         unsigned int filter_index = 0;
2733         regbyte_ctx *ctx = NULL;
2734
2735         barray_create (&ba);
2736         if (ba != NULL)
2737         {
2738             if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2739                 filter_index)
2740             {
2741                 str = (byte *) mem_alloc (filter_index + 1);
2742                 if (str != NULL)
2743                 {
2744                     str_copy_n (str, text + ind, filter_index);
2745                     str[filter_index] = '\0';
2746                 }
2747             }
2748             barray_destroy (&ba);
2749         }
2750     }
2751
2752     return str;
2753 }
2754
2755 typedef struct grammar_load_state_
2756 {
2757     dict *di;
2758     byte *syntax_symbol;
2759     byte *string_symbol;
2760     map_str *maps;
2761     map_byte *mapb;
2762     map_rule *mapr;
2763 } grammar_load_state;
2764
2765 static void grammar_load_state_create (grammar_load_state **gr)
2766 {
2767     *gr = (grammar_load_state *) mem_alloc (sizeof (grammar_load_state));
2768     if (*gr)
2769     {
2770         (**gr).di = NULL;
2771         (**gr).syntax_symbol = NULL;
2772         (**gr).string_symbol = NULL;
2773         (**gr).maps = NULL;
2774         (**gr).mapb = NULL;
2775         (**gr).mapr = NULL;
2776     }
2777 }
2778
2779 static void grammar_load_state_destroy (grammar_load_state **gr)
2780 {
2781     if (*gr)
2782     {
2783         dict_destroy (&(**gr).di);
2784         mem_free ((void **) &(**gr).syntax_symbol);
2785         mem_free ((void **) &(**gr).string_symbol);
2786         map_str_destroy (&(**gr).maps);
2787         map_byte_destroy (&(**gr).mapb);
2788         map_rule_destroy (&(**gr).mapr);
2789         mem_free ((void **) gr);
2790     }
2791 }
2792
2793 /*
2794     the API
2795 */
2796
2797 grammar grammar_load_from_text (const byte *text)
2798 {
2799     grammar_load_state *g = NULL;
2800     grammar id = 0;
2801
2802     clear_last_error ();
2803
2804     grammar_load_state_create (&g);
2805     if (g == NULL)
2806         return 0;
2807
2808     dict_create (&g->di);
2809     if (g->di == NULL)
2810     {
2811         grammar_load_state_destroy (&g);
2812         return 0;
2813     }
2814
2815     eat_spaces (&text);
2816
2817     /* skip ".syntax" keyword */
2818     text += 7;
2819     eat_spaces (&text);
2820
2821     /* retrieve root symbol */
2822     if (get_identifier (&text, &g->syntax_symbol))
2823     {
2824         grammar_load_state_destroy (&g);
2825         return 0;
2826     }
2827     eat_spaces (&text);
2828
2829     /* skip semicolon */
2830     text++;
2831     eat_spaces (&text);
2832
2833     while (*text)
2834     {
2835         byte *symbol = NULL;
2836         int is_dot = *text == '.';
2837
2838         if (is_dot)
2839             text++;
2840
2841         if (get_identifier (&text, &symbol))
2842         {
2843             grammar_load_state_destroy (&g);
2844             return 0;
2845         }
2846         eat_spaces (&text);
2847
2848         /* .emtcode */
2849         if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2850         {
2851             map_byte *ma = NULL;
2852
2853             mem_free ((void **) (void *) &symbol);
2854
2855             if (get_emtcode (&text, &ma))
2856             {
2857                 grammar_load_state_destroy (&g);
2858                 return 0;
2859             }
2860
2861             map_byte_append (&g->mapb, ma);
2862         }
2863         /* .regbyte */
2864         else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2865         {
2866             map_byte *ma = NULL;
2867
2868             mem_free ((void **) (void *) &symbol);
2869
2870             if (get_regbyte (&text, &ma))
2871             {
2872                 grammar_load_state_destroy (&g);
2873                 return 0;
2874             }
2875
2876             map_byte_append (&g->di->m_regbytes, ma);
2877         }
2878         /* .errtext */
2879         else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2880         {
2881             map_str *ma = NULL;
2882
2883             mem_free ((void **) (void *) &symbol);
2884
2885             if (get_errtext (&text, &ma))
2886             {
2887                 grammar_load_state_destroy (&g);
2888                 return 0;
2889             }
2890
2891             map_str_append (&g->maps, ma);
2892         }
2893         /* .string */
2894         else if (is_dot && str_equal (symbol, (byte *) "string"))
2895         {
2896             mem_free ((void **) (void *) &symbol);
2897
2898             if (g->di->m_string != NULL)
2899             {
2900                 grammar_load_state_destroy (&g);
2901                 return 0;
2902             }
2903
2904             if (get_identifier (&text, &g->string_symbol))
2905             {
2906                 grammar_load_state_destroy (&g);
2907                 return 0;
2908             }
2909
2910             /* skip semicolon */
2911             eat_spaces (&text);
2912             text++;
2913             eat_spaces (&text);
2914         }
2915         else
2916         {
2917             rule *ru = NULL;
2918             map_rule *ma = NULL;
2919
2920             if (get_rule (&text, &ru, g->maps, g->mapb))
2921             {
2922                 grammar_load_state_destroy (&g);
2923                 return 0;
2924             }
2925
2926             rule_append (&g->di->m_rulez, ru);
2927
2928             /* if a rule consist of only one specifier, give it an ".and" operator */
2929             if (ru->m_oper == op_none)
2930                 ru->m_oper = op_and;
2931
2932             map_rule_create (&ma);
2933             if (ma == NULL)
2934             {
2935                 grammar_load_state_destroy (&g);
2936                 return 0;
2937             }
2938
2939             ma->key = symbol;
2940             ma->data = ru;
2941             map_rule_append (&g->mapr, ma);
2942         }
2943     }
2944
2945     if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2946         g->di->m_regbytes))
2947     {
2948         grammar_load_state_destroy (&g);
2949         return 0;
2950     }
2951
2952     dict_append (&g_dicts, g->di);
2953     id = g->di->m_id;
2954     g->di = NULL;
2955
2956     grammar_load_state_destroy (&g);
2957
2958     return id;
2959 }
2960
2961 int grammar_set_reg8 (grammar id, const byte *name, byte value)
2962 {
2963     dict *di = NULL;
2964     map_byte *reg = NULL;
2965
2966     clear_last_error ();
2967
2968     dict_find (&g_dicts, id, &di);
2969     if (di == NULL)
2970     {
2971         set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2972         return 0;
2973     }
2974
2975     reg = map_byte_locate (&di->m_regbytes, name);
2976     if (reg == NULL)
2977     {
2978         set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
2979         return 0;
2980     }
2981
2982     reg->data = value;
2983     return 1;
2984 }
2985
2986 /*
2987     internal checking function used by both grammar_check and grammar_fast_check functions
2988 */
2989 static int _grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size,
2990     unsigned int estimate_prod_size, int use_fast_path)
2991 {
2992     dict *di = NULL;
2993     unsigned int index = 0;
2994
2995     clear_last_error ();
2996
2997     dict_find (&g_dicts, id, &di);
2998     if (di == NULL)
2999     {
3000         set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3001         return 0;
3002     }
3003
3004     *prod = NULL;
3005     *size = 0;
3006
3007     if (use_fast_path)
3008     {
3009         regbyte_ctx *rbc = NULL;
3010         bytepool *bp = NULL;
3011         int _P = 0;
3012
3013         bytepool_create (&bp, estimate_prod_size);
3014         if (bp == NULL)
3015             return 0;
3016
3017         if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched)
3018         {
3019             bytepool_destroy (&bp);
3020             free_regbyte_ctx_stack (rbc, NULL);
3021             return 0;
3022         }
3023
3024         free_regbyte_ctx_stack (rbc, NULL);
3025
3026         *prod = bp->_F;
3027         *size = _P;
3028         bp->_F = NULL;
3029         bytepool_destroy (&bp);
3030     }
3031     else
3032     {
3033         regbyte_ctx *rbc = NULL;
3034         barray *ba = NULL;
3035
3036         barray_create (&ba);
3037         if (ba == NULL)
3038             return 0;
3039
3040         if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
3041         {
3042             barray_destroy (&ba);
3043             free_regbyte_ctx_stack (rbc, NULL);
3044             return 0;
3045         }
3046
3047         free_regbyte_ctx_stack (rbc, NULL);
3048
3049         *prod = (byte *) mem_alloc (ba->len * sizeof (byte));
3050         if (*prod == NULL)
3051         {
3052             barray_destroy (&ba);
3053             return 0;
3054         }
3055
3056         mem_copy (*prod, ba->data, ba->len * sizeof (byte));
3057         *size = ba->len;
3058         barray_destroy (&ba);
3059     }
3060
3061     return 1;
3062 }
3063
3064 int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
3065 {
3066     return _grammar_check (id, text, prod, size, 0, 0);
3067 }
3068
3069 int grammar_fast_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3070     unsigned int estimate_prod_size)
3071 {
3072     return _grammar_check (id, text, prod, size, estimate_prod_size, 1);
3073 }
3074
3075 int grammar_destroy (grammar id)
3076 {
3077     dict **di = &g_dicts;
3078
3079     clear_last_error ();
3080
3081     while (*di != NULL)
3082     {
3083         if ((**di).m_id == id)
3084         {
3085             dict *tmp = *di;
3086             *di = (**di).next;
3087             dict_destroy (&tmp);
3088             return 1;
3089         }
3090
3091         di = &(**di).next;
3092     }
3093
3094     set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3095     return 0;
3096 }
3097
3098 static void append_character (const char x, byte *text, int *dots_made, int *len, int size)
3099 {
3100     if (*dots_made == 0)
3101     {
3102         if (*len < size - 1)
3103         {
3104             text[(*len)++] = x;
3105             text[*len] = '\0';
3106         }
3107         else
3108         {
3109             int i;
3110             for (i = 0; i < 3; i++)
3111                 if (--(*len) >= 0)
3112                     text[*len] = '.';
3113             *dots_made = 1;
3114         }
3115     }
3116 }
3117
3118 void grammar_get_last_error (byte *text, unsigned int size, int *pos)
3119 {
3120     int len = 0, dots_made = 0;
3121     const byte *p = error_message;
3122
3123     *text = '\0';
3124
3125     if (p)
3126     {
3127         while (*p)
3128         {
3129             if (*p == '$')
3130             {
3131                 const byte *r = error_param;
3132
3133                 while (*r)
3134                 {
3135                     append_character (*r++, text, &dots_made, &len, (int) size);
3136                 }
3137
3138                 p++;
3139             }
3140             else
3141             {
3142                 append_character (*p++, text, &dots_made, &len, size);
3143             }
3144         }
3145     }
3146
3147     *pos = error_position;
3148 }