src/mesa/shader/grammar.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.1
   4  *
   5  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file grammar.c
  27  * syntax parsing engine
  28  * \author Michal Krol
  29  */
  30
  31 #ifndef GRAMMAR_PORT_BUILD
  32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
  33 #endif
  34
  35 /*
  36     Last Modified: 2004-II-8
  37 */
  38
  39 /*
  40     INTRODUCTION
  41     ------------
  42
  43     The task is to check the syntax of an input string. Input string is a stream of ASCII
  44     characters terminated with a null-character ('\0'). Checking it using C language is
  45     difficult and hard to implement without bugs. It is hard to maintain and make changes when
  46     the syntax changes.
  47
  48     This is because of a high redundancy of the C code. Large blocks of code are duplicated with
  49     only small changes. Even use of macros does not solve the problem because macros cannot
  50     erase the complexity of the problem.
  51
  52     The resolution is to create a new language that will be highly oriented to our task. Once
  53     we describe a particular syntax, we are done. We can then focus on the code that implements
  54     the language. The size and complexity of it is relatively small than the code that directly
  55     checks the syntax.
  56
  57     First, we must implement our new language. Here, the language is implemented in C, but it
  58     could also be implemented in any other language. The code is listed below. We must take
  59     a good care that it is bug free. This is simple because the code is simple and clean.
  60
  61     Next, we must describe the syntax of our new language in itself. Once created and checked
  62     manually that it is correct, we can use it to check another scripts.
  63
  64     Note that our new language loading code does not have to check the syntax. It is because we
  65     assume that the script describing itself is correct, and other scripts can be syntactically
  66     checked by the former script. The loading code must only do semantic checking which leads us to
  67     simple resolving references.
  68
  69     THE LANGUAGE
  70     ------------
  71
  72     Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
  73     sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
  74     which is an identifier, and its definition. A definition is in turn a sequence of specifiers
  75     connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
  76     definition. Specifier can be a symbol, string, character, character range or a special
  77     keyword ".true" or ".false".
  78
  79     On the very beginning of the script there is a declaration of a root symbol and is in the form:
  80         .syntax <root_symbol>;
  81     The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
  82     the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
  83     the symbol evaluates to true. Definition evaluation depends on the operator used to connect
  84     specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
  85     only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
  86     true if any of the specifiers evaluates to true. If definition contains only one specifier,
  87     it is evaluated as if it was connected with ".true" keyword by ".and" operator.
  88
  89     If specifier is a ".true" keyword, it always evaluates to true.
  90
  91     If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
  92     when it does not evaluate to true.
  93
  94     Character range specifier is in the form:
  95         '<first_character>' - '<second_character>'
  96     If specifier is a character range, it evaluates to true if character in the stream is greater
  97     or equal to <first_character> and less or equal to <second_character>. In that situation
  98     the stream pointer is advanced to point to next character in the stream. All C-style escape
  99     sequences are supported although trigraph sequences are not. The comparisions are performed
 100     on 8-bit unsigned integers.
 101
 102     Character specifier is in the form:
 103         '<single_character>'
 104     It evaluates to true if the following character range specifier evaluates to true:
 105         '<single_character>' - '<single_character>'
 106
 107     String specifier is in the form:
 108         "<string>"
 109     Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
 110     <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
 111     the following character specifier evaluates to true:
 112         '<string>[i]'
 113     If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
 114
 115     Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
 116         .loop <symbol>                  (1)
 117     where <symbol> is defined as follows:
 118         <symbol> <definition>;          (2)
 119     Construction (1) is replaced by the following code:
 120         <symbol$1>
 121     and declaration (2) is replaced by the following:
 122         <symbol$1> <symbol$2> .or .true;
 123         <symbol$2> <symbol> .and <symbol$1>;
 124         <symbol> <definition>;
 125
 126     Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
 127     registers that can be accessed in the syn body. Each reg has its name and a default value.
 128     The register is one byte wide. The C code can change the default value by calling
 129     grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
 130     a sequence of specifiers joined with .and or .or operator. And now each specifier can be
 131     prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
 132     where <operator> can be == or !=. If the condition evaluates to false, the specifier
 133     evaluates to .false. Otherwise it evalutes to the specifier.
 134
 135     ESCAPE SEQUENCES
 136     ----------------
 137
 138     Synek supports all escape sequences in character specifiers. The mapping table is listed below.
 139     All occurences of the characters in the first column are replaced with the corresponding
 140     character in the second column.
 141
 142         Escape sequence         Represents
 143     ------------------------------------------------------------------------------------------------
 144         \a                      Bell (alert)
 145         \b                      Backspace
 146         \f                      Formfeed
 147         \n                      New line
 148         \r                      Carriage return
 149         \t                      Horizontal tab
 150         \v                      Vertical tab
 151         \'                      Single quotation mark
 152         \"                      Double quotation mark
 153         \\                      Backslash
 154         \?                      Literal question mark
 155         \ooo                    ASCII character in octal notation
 156         \xhhh                   ASCII character in hexadecimal notation
 157     ------------------------------------------------------------------------------------------------
 158
 159     RAISING ERRORS
 160     --------------
 161
 162     Any specifier can be followed by a special construction that is executed when the specifier
 163     evaluates to false. The construction is in the form:
 164         .error <ERROR_TEXT>
 165     <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
 166     in the form:
 167         .errtext <ERROR_TEXT> "<error_desc>"
 168     When specifier evaluates to false and this construction is present, parsing is stopped
 169     immediately and <error_desc> is returned as a result of parsing. The error position is also
 170     returned and it is meant as an offset from the beggining of the stream to the character that
 171     was valid so far. Example:
 172
 173         (**** syntax script ****)
 174
 175         .syntax program;
 176         .errtext MISSING_SEMICOLON      "missing ';'"
 177         program         declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
 178                         .loop space .and '\0';
 179         declaration     "declare" .and .loop space .and identifier;
 180         space           ' ';
 181
 182         (**** sample code ****)
 183
 184         declare foo ,
 185
 186     In the example above checking the sample code will result in error message "missing ';'" and
 187     error position 12. The sample code is not correct. Note the presence of '\0' specifier to
 188     assure that there is no code after semicolon - only spaces.
 189     <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
 190     the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
 191     the identifier name. The starting position is the error position. The lenght of the resulting
 192     string is the position after invoking the symbol.
 193
 194     PRODUCTION
 195     ----------
 196
 197     Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
 198     that evaluate to true. That is, every specifier and optional error construction can be followed
 199     by a number of emit constructions that are in the form:
 200         .emit <parameter>
 201     <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
 202     0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
 203     in the form:
 204         .emtcode <identifier> <hex_number>
 205
 206     When given specifier evaluates to true, all emits associated with the specifier are output
 207     in order they were declared. A star means that last-read character should be output instead
 208     of constant value. Example:
 209
 210         (**** syntax script ****)
 211
 212         .syntax foobar;
 213         .emtcode WORD_FOO       0x01
 214         .emtcode WORD_BAR       0x02
 215         foobar      FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
 216         FOO         "foo" .and SPACE;
 217         BAR         "bar" .and SPACE;
 218         SPACE       ' ' .or '\0';
 219
 220         (**** sample text 1 ****)
 221
 222         foo
 223
 224         (**** sample text 2 ****)
 225
 226         foobar
 227
 228     For both samples the result will be one-element array. For first sample text it will be
 229     value 1, for second - 0. Note that every text will be accepted because of presence of
 230     .true as an alternative.
 231
 232     Another example:
 233
 234         (**** syntax script ****)
 235
 236         .syntax declaration;
 237         .emtcode VARIABLE       0x01
 238         declaration     "declare" .and .loop space .and
 239                         identifier .emit VARIABLE .and          (1)
 240                         .true .emit 0x00 .and                   (2)
 241                         .loop space .and ';';
 242         space           ' ' .or '\t';
 243         identifier      .loop id_char .emit *;                  (3)
 244         id_char         'a'-'z' .or 'A'-'Z' .or '_';
 245
 246         (**** sample code ****)
 247
 248         declare    fubar;
 249
 250     In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
 251     true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
 252     to terminate the string with null to signal when the string ends. Specifier (3) outputs
 253     all characters that make declared identifier. The result of sample code will be the
 254     following array:
 255         { 1, 'f', 'u', 'b', 'a', 'r', 0 }
 256
 257     If .emit is followed by dollar $, it means that current position should be output. Current
 258     position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
 259     first character consumed by the specifier associated with the .emit instruction. Current
 260     position is stored in the output buffer in Little-Endian convention (the lowest byte comes
 261     first).
 262 */
 263
 264 static void mem_free (void **);
 265
 266 /*
 267     internal error messages
 268 */
 269 static const byte *OUT_OF_MEMORY =          (byte *) "internal error 1001: out of physical memory";
 270 static const byte *UNRESOLVED_REFERENCE =   (byte *) "internal error 1002: unresolved reference '$'";
 271 static const byte *INVALID_GRAMMAR_ID =     (byte *) "internal error 1003: invalid grammar object";
 272 static const byte *INVALID_REGISTER_NAME =  (byte *) "internal error 1004: invalid register name: '$'";
 273
 274 static const byte *error_message = NULL;
 275 static byte *error_param = NULL;        /* this is inserted into error_message in place of $ */
 276 static int error_position = -1;
 277
 278 static byte *unknown = (byte *) "???";
 279
 280 static void clear_last_error ()
 281 {
 282     /* reset error message */
 283     error_message = NULL;
 284
 285     /* free error parameter - if error_param is a "???" don't free it - it's static */
 286     if (error_param != unknown)
 287         mem_free ((void **) &error_param);
 288     else
 289         error_param = NULL;
 290
 291     /* reset error position */
 292     error_position = -1;
 293 }
 294
 295 static void set_last_error (const byte *msg, byte *param, int pos)
 296 {
 297     /* error message can only be set only once */
 298     if (error_message != NULL)
 299     {
 300         mem_free (&param);
 301         return;
 302     }
 303
 304     error_message = msg;
 305
 306     if (param != NULL)
 307         error_param = param;
 308     else
 309         error_param = unknown;
 310
 311     error_position = pos;
 312 }
 313
 314 /*
 315     memory management routines
 316 */
 317 static void *mem_alloc (size_t size)
 318 {
 319     void *ptr = grammar_alloc_malloc (size);
 320     if (ptr == NULL)
 321         set_last_error (OUT_OF_MEMORY, NULL, -1);
 322     return ptr;
 323 }
 324
 325 static void *mem_copy (void *dst, const void *src, size_t size)
 326 {
 327     return grammar_memory_copy (dst, src, size);
 328 }
 329
 330 static void mem_free (void **ptr)
 331 {
 332     grammar_alloc_free (*ptr);
 333     *ptr = NULL;
 334 }
 335
 336 static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
 337 {
 338     void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
 339     if (ptr2 == NULL)
 340         set_last_error (OUT_OF_MEMORY, NULL, -1);
 341     return ptr2;
 342 }
 343
 344 static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
 345 {
 346     return grammar_string_copy_n (dst, src, max_len);
 347 }
 348
 349 static byte *str_duplicate (const byte *str)
 350 {
 351     byte *new_str = grammar_string_duplicate (str);
 352     if (new_str == NULL)
 353         set_last_error (OUT_OF_MEMORY, NULL, -1);
 354     return new_str;
 355 }
 356
 357 static int str_equal (const byte *str1, const byte *str2)
 358 {
 359     return grammar_string_compare (str1, str2) == 0;
 360 }
 361
 362 static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
 363 {
 364     return grammar_string_compare_n (str1, str2, n) == 0;
 365 }
 366
 367 static unsigned int str_length (const byte *str)
 368 {
 369     return grammar_string_length (str);
 370 }
 371
 372 /*
 373     string to byte map typedef
 374 */
 375 typedef struct map_byte_
 376 {
 377     byte *key;
 378     byte data;
 379     struct map_byte_ *next;
 380 } map_byte;
 381
 382 static void map_byte_create (map_byte **ma)
 383 {
 384     *ma = mem_alloc (sizeof (map_byte));
 385     if (*ma)
 386     {
 387         (**ma).key = NULL;
 388         (**ma).data = '\0';
 389         (**ma).next = NULL;
 390     }
 391 }
 392
 393 /* XXX unfold the recursion */
 394 static void map_byte_destroy (map_byte **ma)
 395 {
 396     if (*ma)
 397     {
 398         map_byte_destroy (&(**ma).next);
 399         mem_free ((void **) &(**ma).key);
 400         mem_free ((void **) ma);
 401     }
 402 }
 403
 404 static void map_byte_append (map_byte **ma, map_byte **nm)
 405 {
 406     while (*ma)
 407         ma = &(**ma).next;
 408     *ma = *nm;
 409 }
 410
 411 /*
 412     searches the map for the specified key,
 413     returns pointer to the element with the specified key if it exists
 414     returns NULL otherwise
 415 */
 416 map_byte *map_byte_locate (map_byte **ma, const byte *key)
 417 {
 418     while (*ma)
 419     {
 420         if (str_equal ((**ma).key, key))
 421             return *ma;
 422
 423         ma = &(**ma).next;
 424     }
 425
 426     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
 427     return NULL;
 428 }
 429
 430 /*
 431     searches the map for specified key,
 432     if the key is matched, *data is filled with data associated with the key,
 433     returns 0 if the key is matched,
 434     returns 1 otherwise
 435 */
 436 static int map_byte_find (map_byte **ma, const byte *key, byte *data)
 437 {
 438     map_byte *found = map_byte_locate (ma, key);
 439     if (found != NULL)
 440     {
 441         *data = found->data;
 442
 443         return 0;
 444     }
 445
 446     return 1;
 447 }
 448
 449 /*
 450     regbyte context typedef
 451
 452     Each regbyte consists of its name and a default value. These are static and created at
 453     grammar script compile-time, for example the following line:
 454         .regbyte vertex_blend      0x00
 455     adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
 456     When the script is executed, this regbyte can be accessed by name for read and write. When a
 457     particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
 458     stack. The new entry contains information abot which regbyte it references and its new value.
 459     When a given regbyte is accessed for read, the stack is searched top-down to find an
 460     entry that references the regbyte. The first matching entry is used to return the current
 461     value it holds. If no entry is found, the default value is returned.
 462 */
 463 typedef struct regbyte_ctx_
 464 {
 465     map_byte *m_regbyte;
 466     byte m_current_value;
 467     struct regbyte_ctx_ *m_prev;
 468 } regbyte_ctx;
 469
 470 static void regbyte_ctx_create (regbyte_ctx **re)
 471 {
 472     *re = mem_alloc (sizeof (regbyte_ctx));
 473     if (*re)
 474     {
 475         (**re).m_regbyte = NULL;
 476         (**re).m_prev = NULL;
 477     }
 478 }
 479
 480 static void regbyte_ctx_destroy (regbyte_ctx **re)
 481 {
 482     if (*re)
 483     {
 484         mem_free ((void **) re);
 485     }
 486 }
 487
 488 static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
 489 {
 490     /* first lookup in the register stack */
 491     while (*re != NULL)
 492     {
 493         if ((**re).m_regbyte == reg)
 494             return (**re).m_current_value;
 495
 496         re = &(**re).m_prev;
 497     }
 498
 499     /* if not found - return the default value */
 500     return reg->data;
 501 }
 502
 503 /*
 504     emit type typedef
 505 */
 506 typedef enum emit_type_
 507 {
 508     et_byte,            /* explicit number */
 509     et_stream,          /* eaten character */
 510     et_position         /* current position */
 511 } emit_type;
 512
 513 /*
 514     emit destination typedef
 515 */
 516 typedef enum emit_dest_
 517 {
 518     ed_output,          /* write to the output buffer */
 519     ed_regbyte          /* write a particular regbyte */
 520 } emit_dest;
 521
 522 /*
 523     emit typedef
 524 */
 525 typedef struct emit_
 526 {
 527     emit_dest m_emit_dest;
 528     emit_type m_emit_type;      /* ed_output */
 529     byte m_byte;                /* et_byte */
 530     map_byte *m_regbyte;        /* ed_regbyte */
 531     byte *m_regname;            /* ed_regbyte - temporary */
 532     struct emit_ *m_next;
 533 } emit;
 534
 535 static void emit_create (emit **em)
 536 {
 537     *em = mem_alloc (sizeof (emit));
 538     if (*em)
 539     {
 540         (**em).m_emit_dest = ed_output;
 541         (**em).m_emit_type = et_byte;
 542         (**em).m_byte = '\0';
 543         (**em).m_regbyte = NULL;
 544         (**em).m_regname = NULL;
 545         (**em).m_next = NULL;
 546     }
 547 }
 548
 549 static void emit_destroy (emit **em)
 550 {
 551     if (*em)
 552     {
 553         emit_destroy (&(**em).m_next);
 554         mem_free ((void **) &(**em).m_regname);
 555         mem_free ((void **) em);
 556     }
 557 }
 558
 559 /*
 560     error typedef
 561 */
 562 typedef struct error_
 563 {
 564     byte *m_text;
 565     byte *m_token_name;
 566     struct rule_ *m_token;
 567 } error;
 568
 569 static void error_create (error **er)
 570 {
 571     *er = mem_alloc (sizeof (error));
 572     if (*er)
 573     {
 574         (**er).m_text = NULL;
 575         (**er).m_token_name = NULL;
 576         (**er).m_token = NULL;
 577     }
 578 }
 579
 580 static void error_destroy (error **er)
 581 {
 582     if (*er)
 583     {
 584         mem_free ((void **) &(**er).m_text);
 585         mem_free ((void **) &(**er).m_token_name);
 586         mem_free ((void **) er);
 587     }
 588 }
 589
 590 struct dict_;
 591 static byte *error_get_token (error *, struct dict_ *, const byte *, unsigned int);
 592
 593 /*
 594     condition operand type typedef
 595 */
 596 typedef enum cond_oper_type_
 597 {
 598     cot_byte,               /* constant 8-bit unsigned integer */
 599     cot_regbyte             /* pointer to byte register containing the current value */
 600 } cond_oper_type;
 601
 602 /*
 603     condition operand typedef
 604 */
 605 typedef struct cond_oper_
 606 {
 607     cond_oper_type m_type;
 608     byte m_byte;            /* cot_byte */
 609     map_byte *m_regbyte;    /* cot_regbyte */
 610     byte *m_regname;        /* cot_regbyte - temporary */
 611 } cond_oper;
 612
 613 /*
 614     condition type typedef
 615 */
 616 typedef enum cond_type_
 617 {
 618     ct_equal,
 619     ct_not_equal
 620 } cond_type;
 621
 622 /*
 623     condition typedef
 624 */
 625 typedef struct cond_
 626 {
 627     cond_type m_type;
 628     cond_oper m_operands[2];
 629 } cond;
 630
 631 static void cond_create (cond **co)
 632 {
 633     *co = mem_alloc (sizeof (cond));
 634     if (*co)
 635     {
 636         (**co).m_operands[0].m_regname = NULL;
 637         (**co).m_operands[1].m_regname = NULL;
 638     }
 639 }
 640
 641 static void cond_destroy (cond **co)
 642 {
 643     if (*co)
 644     {
 645         mem_free ((void **) &(**co).m_operands[0].m_regname);
 646         mem_free ((void **) &(**co).m_operands[1].m_regname);
 647         mem_free ((void **) co);
 648     }
 649 }
 650
 651 /*
 652     specifier type typedef
 653 */
 654 typedef enum spec_type_
 655 {
 656     st_false,
 657     st_true,
 658     st_byte,
 659     st_byte_range,
 660     st_string,
 661     st_identifier,
 662     st_identifier_loop,
 663     st_debug
 664 } spec_type;
 665
 666 /*
 667     specifier typedef
 668 */
 669 typedef struct spec_
 670 {
 671     spec_type m_spec_type;
 672     byte m_byte[2];                 /* st_byte, st_byte_range */
 673     byte *m_string;                 /* st_string */
 674     struct rule_ *m_rule;           /* st_identifier, st_identifier_loop */
 675     emit *m_emits;
 676     error *m_errtext;
 677     cond *m_cond;
 678     struct spec_ *m_next;
 679 } spec;
 680
 681 static void spec_create (spec **sp)
 682 {
 683     *sp = mem_alloc (sizeof (spec));
 684     if (*sp)
 685     {
 686         (**sp).m_spec_type = st_false;
 687         (**sp).m_byte[0] = '\0';
 688         (**sp).m_byte[1] = '\0';
 689         (**sp).m_string = NULL;
 690         (**sp).m_rule = NULL;
 691         (**sp).m_emits = NULL;
 692         (**sp).m_errtext = NULL;
 693         (**sp).m_cond = NULL;
 694         (**sp).m_next = NULL;
 695     }
 696 }
 697
 698 static void spec_destroy (spec **sp)
 699 {
 700     if (*sp)
 701     {
 702         spec_destroy (&(**sp).m_next);
 703         emit_destroy (&(**sp).m_emits);
 704         error_destroy (&(**sp).m_errtext);
 705         mem_free ((void **) &(**sp).m_string);
 706         cond_destroy (&(**sp).m_cond);
 707         mem_free ((void **) sp);
 708     }
 709 }
 710
 711 static void spec_append (spec **sp, spec **ns)
 712 {
 713     while (*sp)
 714         sp = &(**sp).m_next;
 715     *sp = *ns;
 716 }
 717
 718 /*
 719     operator typedef
 720 */
 721 typedef enum oper_
 722 {
 723     op_none,
 724     op_and,
 725     op_or
 726 } oper;
 727
 728 /*
 729     rule typedef
 730 */
 731 typedef struct rule_
 732 {
 733     oper m_oper;
 734     spec *m_specs;
 735     struct rule_ *m_next;
 736 /*  int m_referenced; */            /* for debugging purposes */
 737 } rule;
 738
 739 static void rule_create (rule **ru)
 740 {
 741     *ru = mem_alloc (sizeof (rule));
 742     if (*ru)
 743     {
 744         (**ru).m_oper = op_none;
 745         (**ru).m_specs = NULL;
 746         (**ru).m_next = NULL;
 747 /*      (**ru).m_referenced = 0; */
 748     }
 749 }
 750
 751 static void rule_destroy (rule **ru)
 752 {
 753     if (*ru)
 754     {
 755         rule_destroy (&(**ru).m_next);
 756         spec_destroy (&(**ru).m_specs);
 757         mem_free ((void **) ru);
 758     }
 759 }
 760
 761 static void rule_append (rule **ru, rule **nr)
 762 {
 763     while (*ru)
 764         ru = &(**ru).m_next;
 765     *ru = *nr;
 766 }
 767
 768 /*
 769     returns unique grammar id
 770 */
 771 static grammar next_valid_grammar_id ()
 772 {
 773     static grammar id = 0;
 774
 775     return ++id;
 776 }
 777
 778 /*
 779     dictionary typedef
 780 */
 781 typedef struct dict_
 782 {
 783     rule *m_rulez;
 784     rule *m_syntax;
 785     rule *m_string;
 786     map_byte *m_regbytes;
 787     grammar m_id;
 788     struct dict_ *m_next;
 789 } dict;
 790
 791 static void dict_create (dict **di)
 792 {
 793     *di = mem_alloc (sizeof (dict));
 794     if (*di)
 795     {
 796         (**di).m_rulez = NULL;
 797         (**di).m_syntax = NULL;
 798         (**di).m_string = NULL;
 799         (**di).m_regbytes = NULL;
 800         (**di).m_id = next_valid_grammar_id ();
 801         (**di).m_next = NULL;
 802     }
 803 }
 804
 805 static void dict_destroy (dict **di)
 806 {
 807     if (*di)
 808     {
 809         rule_destroy (&(**di).m_rulez);
 810         map_byte_destroy (&(**di).m_regbytes);
 811         mem_free ((void **) di);
 812     }
 813 }
 814
 815 static void dict_append (dict **di, dict **nd)
 816 {
 817     while (*di)
 818         di = &(**di).m_next;
 819     *di = *nd;
 820 }
 821
 822 static void dict_find (dict **di, grammar key, dict **data)
 823 {
 824     while (*di)
 825     {
 826         if ((**di).m_id == key)
 827         {
 828             *data = *di;
 829             return;
 830         }
 831
 832         di = &(**di).m_next;
 833     }
 834
 835     *data = NULL;
 836 }
 837
 838 static dict *g_dicts = NULL;
 839
 840 /*
 841     byte array typedef
 842
 843     XXX this class is going to be replaced by a faster one, soon
 844 */
 845 typedef struct barray_
 846 {
 847     byte *data;
 848     unsigned int len;
 849 } barray;
 850
 851 static void barray_create (barray **ba)
 852 {
 853     *ba = mem_alloc (sizeof (barray));
 854     if (*ba)
 855     {
 856         (**ba).data = NULL;
 857         (**ba).len = 0;
 858     }
 859 }
 860
 861 static void barray_destroy (barray **ba)
 862 {
 863     if (*ba)
 864     {
 865         mem_free ((void **) &(**ba).data);
 866         mem_free ((void **) ba);
 867     }
 868 }
 869
 870 /*
 871     reallocates byte array to requested size,
 872     returns 0 on success,
 873     returns 1 otherwise
 874 */
 875 static int barray_resize (barray **ba, unsigned int nlen)
 876 {
 877     byte *new_pointer;
 878
 879     if (nlen == 0)
 880     {
 881         mem_free ((void **) &(**ba).data);
 882         (**ba).data = NULL;
 883         (**ba).len = 0;
 884
 885         return 0;
 886     }
 887     else
 888     {
 889         new_pointer = mem_realloc ((**ba).data, (**ba).len * sizeof (byte), nlen * sizeof (byte));
 890         if (new_pointer)
 891         {
 892             (**ba).data = new_pointer;
 893             (**ba).len = nlen;
 894
 895             return 0;
 896         }
 897     }
 898
 899     return 1;
 900 }
 901
 902 /*
 903     adds byte array pointed by *nb to the end of array pointed by *ba,
 904     returns 0 on success,
 905     returns 1 otherwise
 906 */
 907 static int barray_append (barray **ba, barray **nb)
 908 {
 909     const unsigned int len = (**ba).len;
 910
 911     if (barray_resize (ba, (**ba).len + (**nb).len))
 912         return 1;
 913
 914     mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
 915
 916     return 0;
 917 }
 918
 919 /*
 920     adds emit chain pointed by em to the end of array pointed by *ba,
 921     returns 0 on success,
 922     returns 1 otherwise
 923 */
 924 static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
 925 {
 926     emit *temp = em;
 927     unsigned int count = 0;
 928
 929     while (temp)
 930     {
 931         if (temp->m_emit_dest == ed_output)
 932             if (temp->m_emit_type == et_position)
 933                 count += 4;     /* position is a 32-bit unsigned integer */
 934             else
 935                 count++;
 936
 937         temp = temp->m_next;
 938     }
 939
 940     if (barray_resize (ba, (**ba).len + count))
 941         return 1;
 942
 943     while (em)
 944     {
 945         if (em->m_emit_dest == ed_output)
 946         {
 947             if (em->m_emit_type == et_byte)
 948                 (**ba).data[(**ba).len - count--] = em->m_byte;
 949             else if (em->m_emit_type == et_stream)
 950                 (**ba).data[(**ba).len - count--] = c;
 951             else // em->type == et_position
 952                 (**ba).data[(**ba).len - count--] = (byte) pos,
 953                 (**ba).data[(**ba).len - count--] = (byte) (pos >> 8),
 954                 (**ba).data[(**ba).len - count--] = (byte) (pos >> 16),
 955                 (**ba).data[(**ba).len - count--] = (byte) (pos >> 24);
 956         }
 957         else
 958         {
 959             regbyte_ctx *new_rbc;
 960             regbyte_ctx_create (&new_rbc);
 961             if (new_rbc == NULL)
 962                 return 1;
 963
 964             new_rbc->m_prev = *rbc;
 965             new_rbc->m_regbyte = em->m_regbyte;
 966             *rbc = new_rbc;
 967
 968             if (em->m_emit_type == et_byte)
 969                 new_rbc->m_current_value = em->m_byte;
 970             else if (em->m_emit_type == et_stream)
 971                 new_rbc->m_current_value = c;
 972         }
 973
 974         em = em->m_next;
 975     }
 976
 977     return 0;
 978 }
 979
 980 /*
 981     string to string map typedef
 982 */
 983 typedef struct map_str_
 984 {
 985     byte *key;
 986     byte *data;
 987     struct map_str_ *next;
 988 } map_str;
 989
 990 static void map_str_create (map_str **ma)
 991 {
 992     *ma = mem_alloc (sizeof (map_str));
 993     if (*ma)
 994     {
 995         (**ma).key = NULL;
 996         (**ma).data = NULL;
 997         (**ma).next = NULL;
 998     }
 999 }
1000
1001 static void map_str_destroy (map_str **ma)
1002 {
1003     if (*ma)
1004     {
1005         map_str_destroy (&(**ma).next);
1006         mem_free ((void **) &(**ma).key);
1007         mem_free ((void **) &(**ma).data);
1008         mem_free ((void **) ma);
1009     }
1010 }
1011
1012 static void map_str_append (map_str **ma, map_str **nm)
1013 {
1014     while (*ma)
1015         ma = &(**ma).next;
1016     *ma = *nm;
1017 }
1018
1019 /*
1020     searches the map for specified key,
1021     if the key is matched, *data is filled with data associated with the key,
1022     returns 0 if the key is matched,
1023     returns 1 otherwise
1024 */
1025 static int map_str_find (map_str **ma, const byte *key, byte **data)
1026 {
1027     while (*ma)
1028     {
1029         if (str_equal ((**ma).key, key))
1030         {
1031             *data = str_duplicate ((**ma).data);
1032             if (*data == NULL)
1033                 return 1;
1034
1035             return 0;
1036         }
1037
1038         ma = &(**ma).next;
1039     }
1040
1041     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1042     return 1;
1043 }
1044
1045 /*
1046     string to rule map typedef
1047 */
1048 typedef struct map_rule_
1049 {
1050     byte *key;
1051     rule *data;
1052     struct map_rule_ *next;
1053 } map_rule;
1054
1055 static void map_rule_create (map_rule **ma)
1056 {
1057     *ma = mem_alloc (sizeof (map_rule));
1058     if (*ma)
1059     {
1060         (**ma).key = NULL;
1061         (**ma).data = NULL;
1062         (**ma).next = NULL;
1063     }
1064 }
1065
1066 static void map_rule_destroy (map_rule **ma)
1067 {
1068     if (*ma)
1069     {
1070         map_rule_destroy (&(**ma).next);
1071         mem_free ((void **) &(**ma).key);
1072         mem_free ((void **) ma);
1073     }
1074 }
1075
1076 static void map_rule_append (map_rule **ma, map_rule **nm)
1077 {
1078     while (*ma)
1079         ma = &(**ma).next;
1080     *ma = *nm;
1081 }
1082
1083 /*
1084     searches the map for specified key,
1085     if the key is matched, *data is filled with data associated with the key,
1086     returns 0 if the is matched,
1087     returns 1 otherwise
1088 */
1089 static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1090 {
1091     while (*ma)
1092     {
1093         if (str_equal ((**ma).key, key))
1094         {
1095             *data = (**ma).data;
1096
1097             return 0;
1098         }
1099
1100         ma = &(**ma).next;
1101     }
1102
1103     set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1104     return 1;
1105 }
1106
1107 /*
1108     returns 1 if given character is a white space,
1109     returns 0 otherwise
1110 */
1111 static int is_space (byte c)
1112 {
1113     return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1114 }
1115
1116 /*
1117     advances text pointer by 1 if character pointed by *text is a space,
1118     returns 1 if a space has been eaten,
1119     returns 0 otherwise
1120 */
1121 static int eat_space (const byte **text)
1122 {
1123     if (is_space (**text))
1124     {
1125         (*text)++;
1126
1127         return 1;
1128     }
1129
1130     return 0;
1131 }
1132
1133 /*
1134     returns 1 if text points to C-style comment start string "/*",
1135     returns 0 otherwise
1136 */
1137 static int is_comment_start (const byte *text)
1138 {
1139     return text[0] == '/' && text[1] == '*';
1140 }
1141
1142 /*
1143     advances text pointer to first character after C-style comment block - if any,
1144     returns 1 if C-style comment block has been encountered and eaten,
1145     returns 0 otherwise
1146 */
1147 static int eat_comment (const byte **text)
1148 {
1149     if (is_comment_start (*text))
1150     {
1151         /* *text points to comment block - skip two characters to enter comment body */
1152         *text += 2;
1153         /* skip any character except consecutive '*' and '/' */
1154         while (!((*text)[0] == '*' && (*text)[1] == '/'))
1155             (*text)++;
1156         /* skip those two terminating characters */
1157         *text += 2;
1158
1159         return 1;
1160     }
1161
1162     return 0;
1163 }
1164
1165 /*
1166     advances text pointer to first character that is neither space nor C-style comment block
1167 */
1168 static void eat_spaces (const byte **text)
1169 {
1170     while (eat_space (text) || eat_comment (text))
1171         ;
1172 }
1173
1174 /*
1175     resizes string pointed by *ptr to successfully add character c to the end of the string,
1176     returns 0 on success,
1177     returns 1 otherwise
1178 */
1179 static int string_grow (byte **ptr, unsigned int *len, byte c)
1180 {
1181     /* reallocate the string in 16-byte increments */
1182     if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1183     {
1184         byte *tmp = mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
1185             ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1186         if (tmp == NULL)
1187             return 1;
1188
1189         *ptr = tmp;
1190     }
1191
1192     if (c)
1193     {
1194         /* append given character */
1195         (*ptr)[*len] = c;
1196         (*len)++;
1197     }
1198     (*ptr)[*len] = '\0';
1199
1200     return 0;
1201 }
1202
1203 /*
1204     returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1205     returns 0 otherwise
1206 */
1207 static int is_identifier (byte c)
1208 {
1209     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1210 }
1211
1212 /*
1213     copies characters from *text to *id until non-identifier character is encountered,
1214     assumes that *id points to NULL object - caller is responsible for later freeing the string,
1215     text pointer is advanced to point past the copied identifier,
1216     returns 0 if identifier was successfully copied,
1217     returns 1 otherwise
1218 */
1219 static int get_identifier (const byte **text, byte **id)
1220 {
1221     const byte *t = *text;
1222     byte *p = NULL;
1223     unsigned int len = 0;
1224
1225     if (string_grow (&p, &len, '\0'))
1226         return 1;
1227
1228     /* loop while next character in buffer is valid for identifiers */
1229     while (is_identifier (*t))
1230     {
1231         if (string_grow (&p, &len, *t++))
1232         {
1233             mem_free ((void **) &p);
1234             return 1;
1235         }
1236     }
1237
1238     *text = t;
1239     *id = p;
1240
1241     return 0;
1242 }
1243
1244 /*
1245     returns 1 if given character is HEX digit 0-9, A-F or a-f,
1246     returns 0 otherwise
1247 */
1248 static int is_hex (byte c)
1249 {
1250     return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1251 }
1252
1253 /*
1254     returns value of passed character as if it was HEX digit
1255 */
1256 static unsigned int hex2dec (byte c)
1257 {
1258     if (c >= '0' && c <= '9')
1259         return c - '0';
1260     if (c >= 'A' && c <= 'F')
1261         return c - 'A' + 10;
1262     return c - 'a' + 10;
1263 }
1264
1265 /*
1266     converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1267     advances text pointer past the converted sequence,
1268     returns the converted value
1269 */
1270 static unsigned int hex_convert (const byte **text)
1271 {
1272     unsigned int value = 0;
1273
1274     while (is_hex (**text))
1275     {
1276         value = value * 0x10 + hex2dec (**text);
1277         (*text)++;
1278     }
1279
1280     return value;
1281 }
1282
1283 /*
1284     returns 1 if given character is OCT digit 0-7,
1285     returns 0 otherwise
1286 */
1287 static int is_oct (byte c)
1288 {
1289     return c >= '0' && c <= '7';
1290 }
1291
1292 /*
1293     returns value of passed character as if it was OCT digit
1294 */
1295 static int oct2dec (byte c)
1296 {
1297     return c - '0';
1298 }
1299
1300 static byte get_escape_sequence (const byte **text)
1301 {
1302     int value = 0;
1303
1304     /* skip '\' character */
1305     (*text)++;
1306
1307     switch (*(*text)++)
1308     {
1309     case '\'':
1310         return '\'';
1311     case '"':
1312         return '\"';
1313     case '?':
1314         return '\?';
1315     case '\\':
1316         return '\\';
1317     case 'a':
1318         return '\a';
1319     case 'b':
1320         return '\b';
1321     case 'f':
1322         return '\f';
1323     case 'n':
1324         return '\n';
1325     case 'r':
1326         return '\r';
1327     case 't':
1328         return '\t';
1329     case 'v':
1330         return '\v';
1331     case 'x':
1332         return (byte) hex_convert (text);
1333     }
1334
1335     (*text)--;
1336     if (is_oct (**text))
1337     {
1338         value = oct2dec (*(*text)++);
1339         if (is_oct (**text))
1340         {
1341             value = value * 010 + oct2dec (*(*text)++);
1342             if (is_oct (**text))
1343                 value = value * 010 + oct2dec (*(*text)++);
1344         }
1345     }
1346
1347     return (byte) value;
1348 }
1349
1350 /*
1351     copies characters from *text to *str until " or ' character is encountered,
1352     assumes that *str points to NULL object - caller is responsible for later freeing the string,
1353     assumes that *text points to " or ' character that starts the string,
1354     text pointer is advanced to point past the " or ' character,
1355     returns 0 if string was successfully copied,
1356     returns 1 otherwise
1357 */
1358 static int get_string (const byte **text, byte **str)
1359 {
1360     const byte *t = *text;
1361     byte *p = NULL;
1362     unsigned int len = 0;
1363     byte term_char;
1364
1365     if (string_grow (&p, &len, '\0'))
1366         return 1;
1367
1368     /* read " or ' character that starts the string */
1369     term_char = *t++;
1370     /* while next character is not the terminating character */
1371     while (*t && *t != term_char)
1372     {
1373         byte c;
1374
1375         if (*t == '\\')
1376             c = get_escape_sequence (&t);
1377         else
1378             c = *t++;
1379
1380         if (string_grow (&p, &len, c))
1381         {
1382             mem_free ((void **) &p);
1383             return 1;
1384         }
1385     }
1386     /* skip " or ' character that ends the string */
1387     t++;
1388
1389     *text = t;
1390     *str = p;
1391     return 0;
1392 }
1393
1394 /*
1395     gets emit code, the syntax is: ".emtcode" " " <symbol> " " ("0x" | "0X") <hex_value>
1396     assumes that *text already points to <symbol>,
1397     returns 0 if emit code is successfully read,
1398     returns 1 otherwise
1399 */
1400 static int get_emtcode (const byte **text, map_byte **ma)
1401 {
1402     const byte *t = *text;
1403     map_byte *m = NULL;
1404
1405     map_byte_create (&m);
1406     if (m == NULL)
1407         return 1;
1408
1409     if (get_identifier (&t, &m->key))
1410     {
1411         map_byte_destroy (&m);
1412         return 1;
1413     }
1414     eat_spaces (&t);
1415
1416     if (*t == '\'')
1417     {
1418         byte *c;
1419
1420         if (get_string (&t, &c))
1421         {
1422             map_byte_destroy (&m);
1423             return 1;
1424         }
1425
1426         m->data = (byte) c[0];
1427         mem_free ((void **) &c);
1428     }
1429     else
1430     {
1431         /* skip HEX "0x" or "0X" prefix */
1432         t += 2;
1433         m->data = (byte) hex_convert (&t);
1434     }
1435
1436     eat_spaces (&t);
1437
1438     *text = t;
1439     *ma = m;
1440     return 0;
1441 }
1442
1443 /*
1444     gets regbyte declaration, the syntax is: ".regbyte" " " <symbol> " " ("0x" | "0X") <hex_value>
1445     assumes that *text already points to <symbol>,
1446     returns 0 if regbyte is successfully read,
1447     returns 1 otherwise
1448 */
1449 static int get_regbyte (const byte **text, map_byte **ma)
1450 {
1451     return get_emtcode (text, ma);
1452 }
1453
1454 /*
1455     returns 0 on success,
1456     returns 1 otherwise
1457 */
1458 static int get_errtext (const byte **text, map_str **ma)
1459 {
1460     const byte *t = *text;
1461     map_str *m = NULL;
1462
1463     map_str_create (&m);
1464     if (m == NULL)
1465         return 1;
1466
1467     if (get_identifier (&t, &m->key))
1468     {
1469         map_str_destroy (&m);
1470         return 1;
1471     }
1472     eat_spaces (&t);
1473
1474     if (get_string (&t, &m->data))
1475     {
1476         map_str_destroy (&m);
1477         return 1;
1478     }
1479     eat_spaces (&t);
1480
1481     *text = t;
1482     *ma = m;
1483     return 0;
1484 }
1485
1486 /*
1487     returns 0 on success,
1488     returns 1 otherwise,
1489 */
1490 static int get_error (const byte **text, error **er, map_str *maps)
1491 {
1492     const byte *t = *text;
1493     byte *temp = NULL;
1494
1495     if (*t != '.')
1496         return 0;
1497
1498     t++;
1499     if (get_identifier (&t, &temp))
1500         return 1;
1501     eat_spaces (&t);
1502
1503     if (!str_equal ((byte *) "error", temp))
1504     {
1505         mem_free ((void **) &temp);
1506         return 0;
1507     }
1508
1509     mem_free ((void **) &temp);
1510
1511     error_create (er);
1512     if (*er == NULL)
1513         return 1;
1514
1515     if (*t == '\"')
1516     {
1517         if (get_string (&t, &(**er).m_text))
1518         {
1519             error_destroy (er);
1520             return 1;
1521         }
1522         eat_spaces (&t);
1523     }
1524     else
1525     {
1526         if (get_identifier (&t, &temp))
1527         {
1528             error_destroy (er);
1529             return 1;
1530         }
1531         eat_spaces (&t);
1532
1533         if (map_str_find (&maps, temp, &(**er).m_text))
1534         {
1535             mem_free ((void **) &temp);
1536             error_destroy (er);
1537             return 1;
1538         }
1539
1540         mem_free ((void **) &temp);
1541     }
1542
1543     /* try to extract "token" from "...$token$..." */
1544     {
1545         byte *processed = NULL;
1546         unsigned int len = 0, i = 0;
1547
1548         if (string_grow (&processed, &len, '\0'))
1549         {
1550             error_destroy (er);
1551             return 1;
1552         }
1553
1554         while (i < str_length ((**er).m_text))
1555         {
1556             /* check if the dollar sign is repeated - if so skip it */
1557             if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1558             {
1559                 if (string_grow (&processed, &len, '$'))
1560                 {
1561                     mem_free ((void **) &processed);
1562                     error_destroy (er);
1563                     return 1;
1564                 }
1565
1566                 i += 2;
1567             }
1568             else if ((**er).m_text[i] != '$')
1569             {
1570                 if (string_grow (&processed, &len, (**er).m_text[i]))
1571                 {
1572                     mem_free ((void **) &processed);
1573                     error_destroy (er);
1574                     return 1;
1575                 }
1576
1577                 i++;
1578             }
1579             else
1580             {
1581                 if (string_grow (&processed, &len, '$'))
1582                 {
1583                     mem_free ((void **) &processed);
1584                     error_destroy (er);
1585                     return 1;
1586                 }
1587
1588                 {
1589                     /* length of token being extracted */
1590                     unsigned int tlen = 0;
1591
1592                     if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1593                     {
1594                         mem_free ((void **) &processed);
1595                         error_destroy (er);
1596                         return 1;
1597                     }
1598
1599                     /* skip the dollar sign */
1600                     i++;
1601
1602                     while ((**er).m_text[i] != '$')
1603                     {
1604                         if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1605                         {
1606                             mem_free ((void **) &processed);
1607                             error_destroy (er);
1608                             return 1;
1609                         }
1610
1611                         i++;
1612                     }
1613
1614                     /* skip the dollar sign */
1615                     i++;
1616                 }
1617             }
1618         }
1619
1620         mem_free ((void **) &(**er).m_text);
1621         (**er).m_text = processed;
1622     }
1623
1624     *text = t;
1625     return 0;
1626 }
1627
1628 /*
1629     returns 0 on success,
1630     returns 1 otherwise,
1631 */
1632 static int get_emits (const byte **text, emit **em, map_byte *mapb)
1633 {
1634     const byte *t = *text;
1635     byte *temp = NULL;
1636     emit *e = NULL;
1637     emit_dest dest;
1638
1639     if (*t != '.')
1640         return 0;
1641
1642     t++;
1643     if (get_identifier (&t, &temp))
1644         return 1;
1645     eat_spaces (&t);
1646
1647     /* .emit */
1648     if (str_equal ((byte *) "emit", temp))
1649         dest = ed_output;
1650     /* .load */
1651     else if (str_equal ((byte *) "load", temp))
1652         dest = ed_regbyte;
1653     else
1654     {
1655         mem_free ((void **) &temp);
1656         return 0;
1657     }
1658
1659     mem_free ((void **) &temp);
1660
1661     emit_create (&e);
1662     if (e == NULL)
1663         return 1;
1664
1665     e->m_emit_dest = dest;
1666
1667     if (dest == ed_regbyte)
1668     {
1669         if (get_identifier (&t, &e->m_regname))
1670         {
1671             emit_destroy (&e);
1672             return 1;
1673         }
1674         eat_spaces (&t);
1675     }
1676
1677     /* 0xNN */
1678     if (*t == '0')
1679     {
1680         t += 2;
1681         e->m_byte = (byte) hex_convert (&t);
1682
1683         e->m_emit_type = et_byte;
1684     }
1685     /* * */
1686     else if (*t == '*')
1687     {
1688         t++;
1689
1690         e->m_emit_type = et_stream;
1691     }
1692     /* $ */
1693     else if (*t == '$')
1694     {
1695         t++;
1696
1697         e->m_emit_type = et_position;
1698     }
1699     /* 'c' */
1700     else if (*t == '\'')
1701     {
1702         if (get_string (&t, &temp))
1703         {
1704             emit_destroy (&e);
1705             return 1;
1706         }
1707         e->m_byte = (byte) temp[0];
1708
1709         mem_free ((void **) &temp);
1710
1711         e->m_emit_type = et_byte;
1712     }
1713     else
1714     {
1715         if (get_identifier (&t, &temp))
1716         {
1717             emit_destroy (&e);
1718             return 1;
1719         }
1720
1721         if (map_byte_find (&mapb, temp, &e->m_byte))
1722         {
1723             mem_free ((void **) &temp);
1724             emit_destroy (&e);
1725             return 1;
1726         }
1727
1728         mem_free ((void **) &temp);
1729
1730         e->m_emit_type = et_byte;
1731     }
1732
1733     eat_spaces (&t);
1734
1735     if (get_emits (&t, &e->m_next, mapb))
1736     {
1737         emit_destroy (&e);
1738         return 1;
1739     }
1740
1741     *text = t;
1742     *em = e;
1743     return 0;
1744 }
1745
1746 /*
1747     returns 0 on success,
1748     returns 1 otherwise,
1749 */
1750 static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1751 {
1752     const byte *t = *text;
1753     spec *s = NULL;
1754
1755     spec_create (&s);
1756     if (s == NULL)
1757         return 1;
1758
1759     /* first - read optional .if statement */
1760     if (*t == '.')
1761     {
1762         const byte *u = t;
1763         byte *keyword = NULL;
1764
1765         /* skip the dot */
1766         u++;
1767
1768         if (get_identifier (&u, &keyword))
1769         {
1770             spec_destroy (&s);
1771             return 1;
1772         }
1773
1774         /* .if */
1775         if (str_equal ((byte *) "if", keyword))
1776         {
1777             cond_create (&s->m_cond);
1778             if (s->m_cond == NULL)
1779             {
1780                 spec_destroy (&s);
1781                 return 1;
1782             }
1783
1784             /* skip the left paren */
1785             eat_spaces (&u);
1786             u++;
1787
1788             /* get the left operand */
1789             eat_spaces (&u);
1790             if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1791             {
1792                 spec_destroy (&s);
1793                 return 1;
1794             }
1795             s->m_cond->m_operands[0].m_type = cot_regbyte;
1796
1797             /* get the operator (!= or ==) */
1798             eat_spaces (&u);
1799             if (*u == '!')
1800                 s->m_cond->m_type = ct_not_equal;
1801             else
1802                 s->m_cond->m_type = ct_equal;
1803             u += 2;
1804
1805             /* skip the 0x prefix */
1806             eat_spaces (&u);
1807             u += 2;
1808
1809             /* get the right operand */
1810             s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1811             s->m_cond->m_operands[1].m_type = cot_byte;
1812
1813             /* skip the right paren */
1814             eat_spaces (&u);
1815             u++;
1816
1817             eat_spaces (&u);
1818
1819             t = u;
1820         }
1821
1822         mem_free ((void **) &keyword);
1823     }
1824
1825     if (*t == '\'')
1826     {
1827         byte *temp = NULL;
1828
1829         if (get_string (&t, &temp))
1830         {
1831             spec_destroy (&s);
1832             return 1;
1833         }
1834         eat_spaces (&t);
1835
1836         if (*t == '-')
1837         {
1838             byte *temp2 = NULL;
1839
1840             /* skip the '-' character */
1841             t++;
1842             eat_spaces (&t);
1843
1844             if (get_string (&t, &temp2))
1845             {
1846                 mem_free ((void **) &temp);
1847                 spec_destroy (&s);
1848                 return 1;
1849             }
1850             eat_spaces (&t);
1851
1852             s->m_spec_type = st_byte_range;
1853             s->m_byte[0] = *temp;
1854             s->m_byte[1] = *temp2;
1855
1856             mem_free ((void **) &temp2);
1857         }
1858         else
1859         {
1860             s->m_spec_type = st_byte;
1861             *s->m_byte = *temp;
1862         }
1863
1864         mem_free ((void **) &temp);
1865     }
1866     else if (*t == '"')
1867     {
1868         if (get_string (&t, &s->m_string))
1869         {
1870             spec_destroy (&s);
1871             return 1;
1872         }
1873         eat_spaces (&t);
1874
1875         s->m_spec_type = st_string;
1876     }
1877     else if (*t == '.')
1878     {
1879         byte *keyword = NULL;
1880
1881         /* skip the dot */
1882         t++;
1883
1884         if (get_identifier (&t, &keyword))
1885         {
1886             spec_destroy (&s);
1887             return 1;
1888         }
1889         eat_spaces (&t);
1890
1891         /* .true */
1892         if (str_equal ((byte *) "true", keyword))
1893         {
1894             s->m_spec_type = st_true;
1895         }
1896         /* .false */
1897         else if (str_equal ((byte *) "false", keyword))
1898         {
1899             s->m_spec_type = st_false;
1900         }
1901         /* .debug */
1902         else if (str_equal ((byte *) "debug", keyword))
1903         {
1904             s->m_spec_type = st_debug;
1905         }
1906         /* .loop */
1907         else if (str_equal ((byte *) "loop", keyword))
1908         {
1909             if (get_identifier (&t, &s->m_string))
1910             {
1911                 mem_free ((void **) &keyword);
1912                 spec_destroy (&s);
1913                 return 1;
1914             }
1915             eat_spaces (&t);
1916
1917             s->m_spec_type = st_identifier_loop;
1918         }
1919
1920         mem_free ((void **) &keyword);
1921     }
1922     else
1923     {
1924         if (get_identifier (&t, &s->m_string))
1925         {
1926             spec_destroy (&s);
1927             return 1;
1928         }
1929         eat_spaces (&t);
1930
1931         s->m_spec_type = st_identifier;
1932     }
1933
1934     if (get_error (&t, &s->m_errtext, maps))
1935     {
1936         spec_destroy (&s);
1937         return 1;
1938     }
1939
1940     if (get_emits (&t, &s->m_emits, mapb))
1941     {
1942         spec_destroy (&s);
1943         return 1;
1944     }
1945
1946     *text = t;
1947     *sp = s;
1948     return 0;
1949 }
1950
1951 /*
1952     returns 0 on success,
1953     returns 1 otherwise,
1954 */
1955 static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
1956 {
1957     const byte *t = *text;
1958     rule *r = NULL;
1959
1960     rule_create (&r);
1961     if (r == NULL)
1962         return 1;
1963
1964     if (get_spec (&t, &r->m_specs, maps, mapb))
1965     {
1966         rule_destroy (&r);
1967         return 1;
1968     }
1969
1970     while (*t != ';')
1971     {
1972         byte *op = NULL;
1973         spec *sp = NULL;
1974
1975         /* skip the dot that precedes "and" or "or" */
1976         t++;
1977
1978         /* read "and" or "or" keyword */
1979         if (get_identifier (&t, &op))
1980         {
1981             rule_destroy (&r);
1982             return 1;
1983         }
1984         eat_spaces (&t);
1985
1986         if (r->m_oper == op_none)
1987         {
1988             /* .and */
1989             if (str_equal ((byte *) "and", op))
1990                 r->m_oper = op_and;
1991             /* .or */
1992             else
1993                 r->m_oper = op_or;
1994         }
1995
1996         mem_free ((void **) &op);
1997
1998         if (get_spec (&t, &sp, maps, mapb))
1999         {
2000             rule_destroy (&r);
2001             return 1;
2002         }
2003
2004         spec_append (&r->m_specs, &sp);
2005     }
2006
2007     /* skip the semicolon */
2008     t++;
2009     eat_spaces (&t);
2010
2011     *text = t;
2012     *ru = r;
2013     return 0;
2014 }
2015
2016 /*
2017     returns 0 on success,
2018     returns 1 otherwise,
2019 */
2020 static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2021 {
2022     if (map_rule_find (&mapr, symbol, ru))
2023         return 1;
2024
2025 /*  (**ru).m_referenced = 1; */
2026
2027     return 0;
2028 }
2029
2030 /*
2031     returns 0 on success,
2032     returns 1 otherwise,
2033 */
2034 static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2035     byte **string_symbol, map_byte *regbytes)
2036 {
2037     rule *rulez = di->m_rulez;
2038
2039     /* update dependecies for the root and lexer symbols */
2040     if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2041         (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2042         return 1;
2043
2044     mem_free ((void **) syntax_symbol);
2045     mem_free ((void **) string_symbol);
2046
2047     /* update dependecies for the rest of the rules */
2048     while (rulez)
2049     {
2050         spec *sp = rulez->m_specs;
2051
2052         /* iterate through all the specifiers */
2053         while (sp)
2054         {
2055             /* update dependency for identifier */
2056             if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2057             {
2058                 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2059                     return 1;
2060
2061                 mem_free ((void **) &sp->m_string);
2062             }
2063
2064             /* some errtexts reference to a rule */
2065             if (sp->m_errtext && sp->m_errtext->m_token_name)
2066             {
2067                 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2068                     return 1;
2069
2070                 mem_free ((void **) &sp->m_errtext->m_token_name);
2071             }
2072
2073             /* update dependency for condition */
2074             if (sp->m_cond)
2075             {
2076                 int i;
2077                 for (i = 0; i < 2; i++)
2078                     if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2079                     {
2080                         sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2081                             sp->m_cond->m_operands[i].m_regname);
2082
2083                         if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2084                             return 1;
2085
2086                         mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2087                     }
2088             }
2089
2090             /* update dependency for all .load instructions */
2091             if (sp->m_emits)
2092             {
2093                 emit *em = sp->m_emits;
2094                 while (em != NULL)
2095                 {
2096                     if (em->m_emit_dest == ed_regbyte)
2097                     {
2098                         em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2099
2100                         if (em->m_regbyte == NULL)
2101                             return 1;
2102
2103                         mem_free ((void **) &em->m_regname);
2104                     }
2105
2106                     em = em->m_next;
2107                 }
2108             }
2109
2110             sp = sp->m_next;
2111         }
2112
2113         rulez = rulez->m_next;
2114     }
2115
2116 /* check for unreferenced symbols */
2117 /*  de = di->m_defntns;
2118     while (de)
2119     {
2120         if (!de->m_referenced)
2121         {
2122             map_def *ma = mapd;
2123             while (ma)
2124             {
2125                 if (ma->data == de)
2126                 {
2127                     assert (0);
2128                     break;
2129                 }
2130                 ma = ma->next;
2131             }
2132         }
2133         de = de->m_next;
2134     }
2135 */
2136     return 0;
2137 }
2138
2139 static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2140 {
2141     byte values[2];
2142     int i;
2143
2144     if (co == NULL)
2145         return 1;
2146
2147     for (i = 0; i < 2; i++)
2148         switch (co->m_operands[i].m_type)
2149         {
2150         case cot_byte:
2151             values[i] = co->m_operands[i].m_byte;
2152             break;
2153         case cot_regbyte:
2154             values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2155             break;
2156         }
2157
2158     switch (co->m_type)
2159     {
2160     case ct_equal:
2161         return values[0] == values[1];
2162     case ct_not_equal:
2163         return values[0] != values[1];
2164     }
2165
2166     return 0;
2167 }
2168
2169 static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2170 {
2171     while (top != limit)
2172     {
2173         regbyte_ctx *rbc = top->m_prev;
2174         regbyte_ctx_destroy (&top);
2175         top = rbc;
2176     }
2177 }
2178
2179 typedef enum match_result_
2180 {
2181     mr_not_matched,     /* the examined string does not match */
2182     mr_matched,         /* the examined string matches */
2183     mr_error_raised,    /* mr_not_matched + error has been raised */
2184     mr_dont_emit,       /* used by identifier loops only */
2185     mr_internal_error   /* an internal error has occured such as out of memory */
2186 } match_result;
2187
2188 /*
2189     This function does the main job. It parses the text and generates output data.
2190
2191     XXX optimize it - the barray seems to be the bottleneck
2192 */
2193 static match_result match (dict *di, const byte *text, unsigned int *index, rule *ru, barray **ba,
2194     int filtering_string, regbyte_ctx **rbc)
2195 {
2196     unsigned int ind = *index;
2197     match_result status = mr_not_matched;
2198     spec *sp = ru->m_specs;
2199     regbyte_ctx *ctx = *rbc;
2200
2201     /* for every specifier in the rule */
2202     while (sp)
2203     {
2204         unsigned int i, len, save_ind = ind;
2205         barray *array = NULL;
2206
2207         if (satisfies_condition (sp->m_cond, ctx))
2208         {
2209             switch (sp->m_spec_type)
2210             {
2211             case st_identifier:
2212                 barray_create (&array);
2213                 if (array == NULL)
2214                 {
2215                     free_regbyte_ctx_stack (ctx, *rbc);
2216                     return mr_internal_error;
2217                 }
2218
2219                 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2220                 if (status == mr_internal_error)
2221                 {
2222                     free_regbyte_ctx_stack (ctx, *rbc);
2223                     barray_destroy (&array);
2224                     return mr_internal_error;
2225                 }
2226                 break;
2227             case st_string:
2228                 len = str_length (sp->m_string);
2229
2230                 /* prefilter the stream */
2231                 if (!filtering_string && di->m_string)
2232                 {
2233                     barray *ba;
2234                     unsigned int filter_index = 0;
2235                     match_result result;
2236                     regbyte_ctx *null_ctx = NULL;
2237
2238                     barray_create (&ba);
2239                     if (ba == NULL)
2240                     {
2241                         free_regbyte_ctx_stack (ctx, *rbc);
2242                         return mr_internal_error;
2243                     }
2244
2245                     result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2246
2247                     if (result == mr_internal_error)
2248                     {
2249                         free_regbyte_ctx_stack (ctx, *rbc);
2250                         barray_destroy (&ba);
2251                         return mr_internal_error;
2252                     }
2253
2254                     if (result != mr_matched)
2255                     {
2256                         barray_destroy (&ba);
2257                         status = mr_not_matched;
2258                         break;
2259                     }
2260
2261                     barray_destroy (&ba);
2262
2263                     if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2264                     {
2265                         status = mr_not_matched;
2266                         break;
2267                     }
2268
2269                     status = mr_matched;
2270                     ind += len;
2271                 }
2272                 else
2273                 {
2274                     status = mr_matched;
2275                     for (i = 0; status == mr_matched && i < len; i++)
2276                         if (text[ind + i] != sp->m_string[i])
2277                             status = mr_not_matched;
2278                     if (status == mr_matched)
2279                         ind += len;
2280                 }
2281                 break;
2282             case st_byte:
2283                 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2284                 if (status == mr_matched)
2285                     ind++;
2286                 break;
2287             case st_byte_range:
2288                 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2289                     mr_matched : mr_not_matched;
2290                 if (status == mr_matched)
2291                     ind++;
2292                 break;
2293             case st_true:
2294                 status = mr_matched;
2295                 break;
2296             case st_false:
2297                 status = mr_not_matched;
2298                 break;
2299             case st_debug:
2300                 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2301                 break;
2302             case st_identifier_loop:
2303                 barray_create (&array);
2304                 if (array == NULL)
2305                 {
2306                     free_regbyte_ctx_stack (ctx, *rbc);
2307                     return mr_internal_error;
2308                 }
2309
2310                 status = mr_dont_emit;
2311                 for (;;)
2312                 {
2313                     match_result result;
2314
2315                     save_ind = ind;
2316                     result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2317
2318                     if (result == mr_error_raised)
2319                     {
2320                         status = result;
2321                         break;
2322                     }
2323                     else if (result == mr_matched)
2324                     {
2325                         if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2326                             barray_append (ba, &array))
2327                         {
2328                             free_regbyte_ctx_stack (ctx, *rbc);
2329                             barray_destroy (&array);
2330                             return mr_internal_error;
2331                         }
2332                         barray_destroy (&array);
2333                         barray_create (&array);
2334                         if (array == NULL)
2335                         {
2336                             free_regbyte_ctx_stack (ctx, *rbc);
2337                             return mr_internal_error;
2338                         }
2339                     }
2340                     else if (result == mr_internal_error)
2341                     {
2342                         free_regbyte_ctx_stack (ctx, *rbc);
2343                         barray_destroy (&array);
2344                         return mr_internal_error;
2345                     }
2346                     else
2347                         break;
2348                 }
2349                 break;
2350             }
2351         }
2352         else
2353         {
2354             status = mr_not_matched;
2355         }
2356
2357         if (status == mr_error_raised)
2358         {
2359             free_regbyte_ctx_stack (ctx, *rbc);
2360             barray_destroy (&array);
2361
2362             return mr_error_raised;
2363         }
2364
2365         if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2366         {
2367             free_regbyte_ctx_stack (ctx, *rbc);
2368             barray_destroy (&array);
2369
2370             if (sp->m_errtext)
2371             {
2372                 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2373                     ind), ind);
2374
2375                 return mr_error_raised;
2376             }
2377
2378             return mr_not_matched;
2379         }
2380
2381         if (status == mr_matched)
2382         {
2383             if (sp->m_emits)
2384                 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2385                 {
2386                     free_regbyte_ctx_stack (ctx, *rbc);
2387                     barray_destroy (&array);
2388                     return mr_internal_error;
2389                 }
2390
2391             if (array)
2392                 if (barray_append (ba, &array))
2393                 {
2394                     free_regbyte_ctx_stack (ctx, *rbc);
2395                     barray_destroy (&array);
2396                     return mr_internal_error;
2397                 }
2398         }
2399
2400         barray_destroy (&array);
2401
2402         /* if the rule operator is a logical or, we pick up the first matching specifier */
2403         if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2404         {
2405             *index = ind;
2406             *rbc = ctx;
2407             return mr_matched;
2408         }
2409
2410         sp = sp->m_next;
2411     }
2412
2413     /* everything went fine - all specifiers match up */
2414     if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2415     {
2416         *index = ind;
2417         *rbc = ctx;
2418         return mr_matched;
2419     }
2420
2421     free_regbyte_ctx_stack (ctx, *rbc);
2422     return mr_not_matched;
2423 }
2424
2425 static byte *error_get_token (error *er, dict *di, const byte *text, unsigned int ind)
2426 {
2427     byte *str = NULL;
2428
2429     if (er->m_token)
2430     {
2431         barray *ba;
2432         unsigned int filter_index = 0;
2433         regbyte_ctx *ctx = NULL;
2434
2435         barray_create (&ba);
2436         if (ba != NULL)
2437         {
2438             if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2439                 filter_index)
2440             {
2441                 str = mem_alloc (filter_index + 1);
2442                 if (str != NULL)
2443                 {
2444                     str_copy_n (str, text + ind, filter_index);
2445                     str[filter_index] = '\0';
2446                 }
2447             }
2448             barray_destroy (&ba);
2449         }
2450     }
2451
2452     return str;
2453 }
2454
2455 typedef struct grammar_load_state_
2456 {
2457     dict *di;
2458     byte *syntax_symbol;
2459     byte *string_symbol;
2460     map_str *maps;
2461     map_byte *mapb;
2462     map_rule *mapr;
2463 } grammar_load_state;
2464
2465 static void grammar_load_state_create (grammar_load_state **gr)
2466 {
2467     *gr = mem_alloc (sizeof (grammar_load_state));
2468     if (*gr)
2469     {
2470         (**gr).di = NULL;
2471         (**gr).syntax_symbol = NULL;
2472         (**gr).string_symbol = NULL;
2473         (**gr).maps = NULL;
2474         (**gr).mapb = NULL;
2475         (**gr).mapr = NULL;
2476     }
2477 }
2478
2479 static void grammar_load_state_destroy (grammar_load_state **gr)
2480 {
2481     if (*gr)
2482     {
2483         dict_destroy (&(**gr).di);
2484         mem_free ((void **) &(**gr).syntax_symbol);
2485         mem_free ((void **) &(**gr).string_symbol);
2486         map_str_destroy (&(**gr).maps);
2487         map_byte_destroy (&(**gr).mapb);
2488         map_rule_destroy (&(**gr).mapr);
2489         mem_free ((void **) gr);
2490     }
2491 }
2492
2493 /*
2494     the API
2495 */
2496
2497 grammar grammar_load_from_text (const byte *text)
2498 {
2499     grammar_load_state *g = NULL;
2500     grammar id = 0;
2501
2502     clear_last_error ();
2503
2504     grammar_load_state_create (&g);
2505     if (g == NULL)
2506         return 0;
2507
2508     dict_create (&g->di);
2509     if (g->di == NULL)
2510     {
2511         grammar_load_state_destroy (&g);
2512         return 0;
2513     }
2514
2515     eat_spaces (&text);
2516
2517     /* skip ".syntax" keyword */
2518     text += 7;
2519     eat_spaces (&text);
2520
2521     /* retrieve root symbol */
2522     if (get_identifier (&text, &g->syntax_symbol))
2523     {
2524         grammar_load_state_destroy (&g);
2525         return 0;
2526     }
2527     eat_spaces (&text);
2528
2529     /* skip semicolon */
2530     text++;
2531     eat_spaces (&text);
2532
2533     while (*text)
2534     {
2535         byte *symbol = NULL;
2536         int is_dot = *text == '.';
2537
2538         if (is_dot)
2539             text++;
2540
2541         if (get_identifier (&text, &symbol))
2542         {
2543             grammar_load_state_destroy (&g);
2544             return 0;
2545         }
2546         eat_spaces (&text);
2547
2548         /* .emtcode */
2549         if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2550         {
2551             map_byte *ma = NULL;
2552
2553             mem_free ((void **) &symbol);
2554
2555             if (get_emtcode (&text, &ma))
2556             {
2557                 grammar_load_state_destroy (&g);
2558                 return 0;
2559             }
2560
2561             map_byte_append (&g->mapb, &ma);
2562         }
2563         /* .regbyte */
2564         else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2565         {
2566             map_byte *ma = NULL;
2567
2568             mem_free ((void **) &symbol);
2569
2570             if (get_regbyte (&text, &ma))
2571             {
2572                 grammar_load_state_destroy (&g);
2573                 return 0;
2574             }
2575
2576             map_byte_append (&g->di->m_regbytes, &ma);
2577         }
2578         /* .errtext */
2579         else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2580         {
2581             map_str *ma = NULL;
2582
2583             mem_free ((void **) &symbol);
2584
2585             if (get_errtext (&text, &ma))
2586             {
2587                 grammar_load_state_destroy (&g);
2588                 return 0;
2589             }
2590
2591             map_str_append (&g->maps, &ma);
2592         }
2593         /* .string */
2594         else if (is_dot && str_equal (symbol, (byte *) "string"))
2595         {
2596             mem_free ((void **) &symbol);
2597
2598             if (g->di->m_string != NULL)
2599             {
2600                 grammar_load_state_destroy (&g);
2601                 return 0;
2602             }
2603
2604             if (get_identifier (&text, &g->string_symbol))
2605             {
2606                 grammar_load_state_destroy (&g);
2607                 return 0;
2608             }
2609
2610             /* skip semicolon */
2611             eat_spaces (&text);
2612             text++;
2613             eat_spaces (&text);
2614         }
2615         else
2616         {
2617             rule *ru = NULL;
2618             map_rule *ma = NULL;
2619
2620             if (get_rule (&text, &ru, g->maps, g->mapb))
2621             {
2622                 grammar_load_state_destroy (&g);
2623                 return 0;
2624             }
2625
2626             rule_append (&g->di->m_rulez, &ru);
2627
2628             /* if a rule consist of only one specifier, give it an ".and" operator */
2629             if (ru->m_oper == op_none)
2630                 ru->m_oper = op_and;
2631
2632             map_rule_create (&ma);
2633             if (ma == NULL)
2634             {
2635                 grammar_load_state_destroy (&g);
2636                 return 0;
2637             }
2638
2639             ma->key = symbol;
2640             ma->data = ru;
2641             map_rule_append (&g->mapr, &ma);
2642         }
2643     }
2644
2645     if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2646         g->di->m_regbytes))
2647     {
2648         grammar_load_state_destroy (&g);
2649         return 0;
2650     }
2651
2652     dict_append (&g_dicts, &g->di);
2653     id = g->di->m_id;
2654     g->di = NULL;
2655
2656     grammar_load_state_destroy (&g);
2657
2658     return id;
2659 }
2660
2661 int grammar_set_reg8 (grammar id, const byte *name, byte value)
2662 {
2663     dict *di = NULL;
2664     map_byte *reg = NULL;
2665
2666     clear_last_error ();
2667
2668     dict_find (&g_dicts, id, &di);
2669     if (di == NULL)
2670     {
2671         set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2672         return 0;
2673     }
2674
2675     reg = map_byte_locate (&di->m_regbytes, name);
2676     if (reg == NULL)
2677     {
2678         set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
2679         return 0;
2680     }
2681
2682     reg->data = value;
2683     return 1;
2684 }
2685
2686 int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
2687 {
2688     dict *di = NULL;
2689     barray *ba = NULL;
2690     unsigned int index = 0;
2691     regbyte_ctx *rbc = NULL;
2692
2693     clear_last_error ();
2694
2695     dict_find (&g_dicts, id, &di);
2696     if (di == NULL)
2697     {
2698         set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2699         return 0;
2700     }
2701
2702     barray_create (&ba);
2703     if (ba == NULL)
2704         return 0;
2705
2706     *prod = NULL;
2707     *size = 0;
2708
2709     if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
2710     {
2711         barray_destroy (&ba);
2712         free_regbyte_ctx_stack (rbc, NULL);
2713         return 0;
2714     }
2715
2716     free_regbyte_ctx_stack (rbc, NULL);
2717
2718     *prod = mem_alloc (ba->len * sizeof (byte));
2719     if (*prod == NULL)
2720     {
2721         barray_destroy (&ba);
2722         return 0;
2723     }
2724
2725     mem_copy (*prod, ba->data, ba->len * sizeof (byte));
2726     *size = ba->len;
2727     barray_destroy (&ba);
2728
2729     return 1;
2730 }
2731
2732 int grammar_destroy (grammar id)
2733 {
2734     dict **di = &g_dicts;
2735
2736     clear_last_error ();
2737
2738     while (*di != NULL)
2739     {
2740         if ((**di).m_id == id)
2741         {
2742             dict *tmp = *di;
2743             *di = (**di).m_next;
2744             dict_destroy (&tmp);
2745             return 1;
2746         }
2747
2748         di = &(**di).m_next;
2749     }
2750
2751     set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2752     return 0;
2753 }
2754
2755 void grammar_get_last_error (byte *text, unsigned int size, int *pos)
2756 {
2757     unsigned int len = 0, dots_made = 0;
2758     const byte *p = error_message;
2759
2760     *text = '\0';
2761
2762 #define APPEND_CHARACTER(x) if (dots_made == 0) {\
2763                                 if (len < size - 1) {\
2764                                     text[len++] = (x); text[len] = '\0';\
2765                                 } else {\
2766                                     int i;\
2767                                     for (i = 0; i < 3; i++)\
2768                                         if (--len >= 0)\
2769                                             text[len] = '.';\
2770                                     dots_made = 1;\
2771                                 }\
2772                             }
2773
2774     if (p)
2775         while (*p)
2776             if (*p == '$')
2777             {
2778                 const byte *r = error_param;
2779
2780                 while (*r)
2781                 {
2782                     APPEND_CHARACTER(*r)
2783                     r++;
2784                 }
2785
2786                 p++;
2787             }
2788             else
2789             {
2790                 APPEND_CHARACTER(*p)
2791                 p++;
2792             }
2793
2794     *pos = error_position;
2795
2796 #undef APPEND_CHARACTER
2797
2798 }
2799