Merge commit 'origin/master' into gallium-0.2
[mesa.git] / src / mesa / shader / grammar / grammar.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.6
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file grammar.c
27 * syntax parsing engine
28 * \author Michal Krol
29 */
30
31 #ifndef GRAMMAR_PORT_BUILD
32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
33 #endif
34
35 /*
36 */
37
38 /*
39 INTRODUCTION
40 ------------
41
42 The task is to check the syntax of an input string. Input string is a stream of ASCII
43 characters terminated with a null-character ('\0'). Checking it using C language is
44 difficult and hard to implement without bugs. It is hard to maintain and make changes when
45 the syntax changes.
46
47 This is because of a high redundancy of the C code. Large blocks of code are duplicated with
48 only small changes. Even use of macros does not solve the problem because macros cannot
49 erase the complexity of the problem.
50
51 The resolution is to create a new language that will be highly oriented to our task. Once
52 we describe a particular syntax, we are done. We can then focus on the code that implements
53 the language. The size and complexity of it is relatively small than the code that directly
54 checks the syntax.
55
56 First, we must implement our new language. Here, the language is implemented in C, but it
57 could also be implemented in any other language. The code is listed below. We must take
58 a good care that it is bug free. This is simple because the code is simple and clean.
59
60 Next, we must describe the syntax of our new language in itself. Once created and checked
61 manually that it is correct, we can use it to check another scripts.
62
63 Note that our new language loading code does not have to check the syntax. It is because we
64 assume that the script describing itself is correct, and other scripts can be syntactically
65 checked by the former script. The loading code must only do semantic checking which leads us to
66 simple resolving references.
67
68 THE LANGUAGE
69 ------------
70
71 Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
72 sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
73 which is an identifier, and its definition. A definition is in turn a sequence of specifiers
74 connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
75 definition. Specifier can be a symbol, string, character, character range or a special
76 keyword ".true" or ".false".
77
78 On the very beginning of the script there is a declaration of a root symbol and is in the form:
79 .syntax <root_symbol>;
80 The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
81 the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
82 the symbol evaluates to true. Definition evaluation depends on the operator used to connect
83 specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
84 only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
85 true if any of the specifiers evaluates to true. If definition contains only one specifier,
86 it is evaluated as if it was connected with ".true" keyword by ".and" operator.
87
88 If specifier is a ".true" keyword, it always evaluates to true.
89
90 If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
91 when it does not evaluate to true.
92
93 Character range specifier is in the form:
94 '<first_character>' - '<second_character>'
95 If specifier is a character range, it evaluates to true if character in the stream is greater
96 or equal to <first_character> and less or equal to <second_character>. In that situation
97 the stream pointer is advanced to point to next character in the stream. All C-style escape
98 sequences are supported although trigraph sequences are not. The comparisions are performed
99 on 8-bit unsigned integers.
100
101 Character specifier is in the form:
102 '<single_character>'
103 It evaluates to true if the following character range specifier evaluates to true:
104 '<single_character>' - '<single_character>'
105
106 String specifier is in the form:
107 "<string>"
108 Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
109 <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
110 the following character specifier evaluates to true:
111 '<string>[i]'
112 If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
113
114 Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
115 .loop <symbol> (1)
116 where <symbol> is defined as follows:
117 <symbol> <definition>; (2)
118 Construction (1) is replaced by the following code:
119 <symbol$1>
120 and declaration (2) is replaced by the following:
121 <symbol$1> <symbol$2> .or .true;
122 <symbol$2> <symbol> .and <symbol$1>;
123 <symbol> <definition>;
124
125 Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
126 registers that can be accessed in the syn body. Each reg has its name and a default value.
127 The register is one byte wide. The C code can change the default value by calling
128 grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
129 a sequence of specifiers joined with .and or .or operator. And now each specifier can be
130 prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
131 where <operator> can be == or !=. If the condition evaluates to false, the specifier
132 evaluates to .false. Otherwise it evalutes to the specifier.
133
134 ESCAPE SEQUENCES
135 ----------------
136
137 Synek supports all escape sequences in character specifiers. The mapping table is listed below.
138 All occurences of the characters in the first column are replaced with the corresponding
139 character in the second column.
140
141 Escape sequence Represents
142 ------------------------------------------------------------------------------------------------
143 \a Bell (alert)
144 \b Backspace
145 \f Formfeed
146 \n New line
147 \r Carriage return
148 \t Horizontal tab
149 \v Vertical tab
150 \' Single quotation mark
151 \" Double quotation mark
152 \\ Backslash
153 \? Literal question mark
154 \ooo ASCII character in octal notation
155 \xhhh ASCII character in hexadecimal notation
156 ------------------------------------------------------------------------------------------------
157
158 RAISING ERRORS
159 --------------
160
161 Any specifier can be followed by a special construction that is executed when the specifier
162 evaluates to false. The construction is in the form:
163 .error <ERROR_TEXT>
164 <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
165 in the form:
166 .errtext <ERROR_TEXT> "<error_desc>"
167 When specifier evaluates to false and this construction is present, parsing is stopped
168 immediately and <error_desc> is returned as a result of parsing. The error position is also
169 returned and it is meant as an offset from the beggining of the stream to the character that
170 was valid so far. Example:
171
172 (**** syntax script ****)
173
174 .syntax program;
175 .errtext MISSING_SEMICOLON "missing ';'"
176 program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
177 .loop space .and '\0';
178 declaration "declare" .and .loop space .and identifier;
179 space ' ';
180
181 (**** sample code ****)
182
183 declare foo ,
184
185 In the example above checking the sample code will result in error message "missing ';'" and
186 error position 12. The sample code is not correct. Note the presence of '\0' specifier to
187 assure that there is no code after semicolon - only spaces.
188 <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
189 the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
190 the identifier name. The starting position is the error position. The lenght of the resulting
191 string is the position after invoking the symbol.
192
193 PRODUCTION
194 ----------
195
196 Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
197 that evaluate to true. That is, every specifier and optional error construction can be followed
198 by a number of emit constructions that are in the form:
199 .emit <parameter>
200 <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
201 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
202 in the form:
203 .emtcode <identifier> <hex_number>
204
205 When given specifier evaluates to true, all emits associated with the specifier are output
206 in order they were declared. A star means that last-read character should be output instead
207 of constant value. Example:
208
209 (**** syntax script ****)
210
211 .syntax foobar;
212 .emtcode WORD_FOO 0x01
213 .emtcode WORD_BAR 0x02
214 foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
215 FOO "foo" .and SPACE;
216 BAR "bar" .and SPACE;
217 SPACE ' ' .or '\0';
218
219 (**** sample text 1 ****)
220
221 foo
222
223 (**** sample text 2 ****)
224
225 foobar
226
227 For both samples the result will be one-element array. For first sample text it will be
228 value 1, for second - 0. Note that every text will be accepted because of presence of
229 .true as an alternative.
230
231 Another example:
232
233 (**** syntax script ****)
234
235 .syntax declaration;
236 .emtcode VARIABLE 0x01
237 declaration "declare" .and .loop space .and
238 identifier .emit VARIABLE .and (1)
239 .true .emit 0x00 .and (2)
240 .loop space .and ';';
241 space ' ' .or '\t';
242 identifier .loop id_char .emit *; (3)
243 id_char 'a'-'z' .or 'A'-'Z' .or '_';
244
245 (**** sample code ****)
246
247 declare fubar;
248
249 In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
250 true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
251 to terminate the string with null to signal when the string ends. Specifier (3) outputs
252 all characters that make declared identifier. The result of sample code will be the
253 following array:
254 { 1, 'f', 'u', 'b', 'a', 'r', 0 }
255
256 If .emit is followed by dollar $, it means that current position should be output. Current
257 position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
258 first character consumed by the specifier associated with the .emit instruction. Current
259 position is stored in the output buffer in Little-Endian convention (the lowest byte comes
260 first).
261 */
262
263 #include <stdio.h>
264
265 static void mem_free (void **);
266
267 /*
268 internal error messages
269 */
270 static const byte *OUT_OF_MEMORY = (byte *) "internal error 1001: out of physical memory";
271 static const byte *UNRESOLVED_REFERENCE = (byte *) "internal error 1002: unresolved reference '$'";
272 static const byte *INVALID_GRAMMAR_ID = (byte *) "internal error 1003: invalid grammar object";
273 static const byte *INVALID_REGISTER_NAME = (byte *) "internal error 1004: invalid register name: '$'";
274 /*static const byte *DUPLICATE_IDENTIFIER = (byte *) "internal error 1005: identifier '$' already defined";*/
275 static const byte *UNREFERENCED_IDENTIFIER =(byte *) "internal error 1006: unreferenced identifier '$'";
276
277 static const byte *error_message = NULL; /* points to one of the error messages above */
278 static byte *error_param = NULL; /* this is inserted into error_message in place of $ */
279 static int error_position = -1;
280
281 static byte *unknown = (byte *) "???";
282
283 static void clear_last_error (void)
284 {
285 /* reset error message */
286 error_message = NULL;
287
288 /* free error parameter - if error_param is a "???" don't free it - it's static */
289 if (error_param != unknown)
290 mem_free ((void **) (void *) &error_param);
291 else
292 error_param = NULL;
293
294 /* reset error position */
295 error_position = -1;
296 }
297
298 static void set_last_error (const byte *msg, byte *param, int pos)
299 {
300 /* error message can be set only once */
301 if (error_message != NULL)
302 {
303 mem_free ((void **) (void *) &param);
304 return;
305 }
306
307 error_message = msg;
308
309 /* if param is NULL, set error_param to unknown ("???") */
310 /* note: do not try to strdup the "???" - it may be that we are here because of */
311 /* out of memory error so strdup can fail */
312 if (param != NULL)
313 error_param = param;
314 else
315 error_param = unknown;
316
317 error_position = pos;
318 }
319
320 /*
321 memory management routines
322 */
323 static void *mem_alloc (size_t size)
324 {
325 void *ptr = grammar_alloc_malloc (size);
326 if (ptr == NULL)
327 set_last_error (OUT_OF_MEMORY, NULL, -1);
328 return ptr;
329 }
330
331 static void *mem_copy (void *dst, const void *src, size_t size)
332 {
333 return grammar_memory_copy (dst, src, size);
334 }
335
336 static void mem_free (void **ptr)
337 {
338 grammar_alloc_free (*ptr);
339 *ptr = NULL;
340 }
341
342 static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
343 {
344 void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
345 if (ptr2 == NULL)
346 set_last_error (OUT_OF_MEMORY, NULL, -1);
347 return ptr2;
348 }
349
350 static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
351 {
352 return grammar_string_copy_n (dst, src, max_len);
353 }
354
355 static byte *str_duplicate (const byte *str)
356 {
357 byte *new_str = grammar_string_duplicate (str);
358 if (new_str == NULL)
359 set_last_error (OUT_OF_MEMORY, NULL, -1);
360 return new_str;
361 }
362
363 static int str_equal (const byte *str1, const byte *str2)
364 {
365 return grammar_string_compare (str1, str2) == 0;
366 }
367
368 static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
369 {
370 return grammar_string_compare_n (str1, str2, n) == 0;
371 }
372
373 static int
374 str_length (const byte *str)
375 {
376 return (int) (grammar_string_length (str));
377 }
378
379 /*
380 useful macros
381 */
382 #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
383 static void _Ty##_append (_Ty **x, _Ty *nx) {\
384 while (*x) x = &(**x).next;\
385 *x = nx;\
386 }
387
388 /*
389 string to byte map typedef
390 */
391 typedef struct map_byte_
392 {
393 byte *key;
394 byte data;
395 struct map_byte_ *next;
396 } map_byte;
397
398 static void map_byte_create (map_byte **ma)
399 {
400 *ma = (map_byte *) mem_alloc (sizeof (map_byte));
401 if (*ma)
402 {
403 (**ma).key = NULL;
404 (**ma).data = '\0';
405 (**ma).next = NULL;
406 }
407 }
408
409 static void map_byte_destroy (map_byte **ma)
410 {
411 if (*ma)
412 {
413 map_byte_destroy (&(**ma).next);
414 mem_free ((void **) &(**ma).key);
415 mem_free ((void **) ma);
416 }
417 }
418
419 GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte)
420
421 /*
422 searches the map for the specified key,
423 returns pointer to the element with the specified key if it exists
424 returns NULL otherwise
425 */
426 static map_byte *map_byte_locate (map_byte **ma, const byte *key)
427 {
428 while (*ma)
429 {
430 if (str_equal ((**ma).key, key))
431 return *ma;
432
433 ma = &(**ma).next;
434 }
435
436 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
437 return NULL;
438 }
439
440 /*
441 searches the map for specified key,
442 if the key is matched, *data is filled with data associated with the key,
443 returns 0 if the key is matched,
444 returns 1 otherwise
445 */
446 static int map_byte_find (map_byte **ma, const byte *key, byte *data)
447 {
448 map_byte *found = map_byte_locate (ma, key);
449 if (found != NULL)
450 {
451 *data = found->data;
452
453 return 0;
454 }
455
456 return 1;
457 }
458
459 /*
460 regbyte context typedef
461
462 Each regbyte consists of its name and a default value. These are static and created at
463 grammar script compile-time, for example the following line:
464 .regbyte vertex_blend 0x00
465 adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
466 When the script is executed, this regbyte can be accessed by name for read and write. When a
467 particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
468 stack. The new entry contains information abot which regbyte it references and its new value.
469 When a given regbyte is accessed for read, the stack is searched top-down to find an
470 entry that references the regbyte. The first matching entry is used to return the current
471 value it holds. If no entry is found, the default value is returned.
472 */
473 typedef struct regbyte_ctx_
474 {
475 map_byte *m_regbyte;
476 byte m_current_value;
477 struct regbyte_ctx_ *m_prev;
478 } regbyte_ctx;
479
480 static void regbyte_ctx_create (regbyte_ctx **re)
481 {
482 *re = (regbyte_ctx *) mem_alloc (sizeof (regbyte_ctx));
483 if (*re)
484 {
485 (**re).m_regbyte = NULL;
486 (**re).m_prev = NULL;
487 }
488 }
489
490 static void regbyte_ctx_destroy (regbyte_ctx **re)
491 {
492 if (*re)
493 {
494 mem_free ((void **) re);
495 }
496 }
497
498 static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
499 {
500 /* first lookup in the register stack */
501 while (*re != NULL)
502 {
503 if ((**re).m_regbyte == reg)
504 return (**re).m_current_value;
505
506 re = &(**re).m_prev;
507 }
508
509 /* if not found - return the default value */
510 return reg->data;
511 }
512
513 /*
514 emit type typedef
515 */
516 typedef enum emit_type_
517 {
518 et_byte, /* explicit number */
519 et_stream, /* eaten character */
520 et_position /* current position */
521 } emit_type;
522
523 /*
524 emit destination typedef
525 */
526 typedef enum emit_dest_
527 {
528 ed_output, /* write to the output buffer */
529 ed_regbyte /* write a particular regbyte */
530 } emit_dest;
531
532 /*
533 emit typedef
534 */
535 typedef struct emit_
536 {
537 emit_dest m_emit_dest;
538 emit_type m_emit_type; /* ed_output */
539 byte m_byte; /* et_byte */
540 map_byte *m_regbyte; /* ed_regbyte */
541 byte *m_regname; /* ed_regbyte - temporary */
542 struct emit_ *m_next;
543 } emit;
544
545 static void emit_create (emit **em)
546 {
547 *em = (emit *) mem_alloc (sizeof (emit));
548 if (*em)
549 {
550 (**em).m_emit_dest = ed_output;
551 (**em).m_emit_type = et_byte;
552 (**em).m_byte = '\0';
553 (**em).m_regbyte = NULL;
554 (**em).m_regname = NULL;
555 (**em).m_next = NULL;
556 }
557 }
558
559 static void emit_destroy (emit **em)
560 {
561 if (*em)
562 {
563 emit_destroy (&(**em).m_next);
564 mem_free ((void **) &(**em).m_regname);
565 mem_free ((void **) em);
566 }
567 }
568
569 static unsigned int emit_size (emit *_E)
570 {
571 unsigned int n = 0;
572
573 while (_E != NULL)
574 {
575 if (_E->m_emit_dest == ed_output)
576 {
577 if (_E->m_emit_type == et_position)
578 n += 4; /* position is a 32-bit unsigned integer */
579 else
580 n++;
581 }
582 _E = _E->m_next;
583 }
584
585 return n;
586 }
587
588 static int emit_push (emit *_E, byte *_P, byte c, unsigned int _Pos, regbyte_ctx **_Ctx)
589 {
590 while (_E != NULL)
591 {
592 if (_E->m_emit_dest == ed_output)
593 {
594 if (_E->m_emit_type == et_byte)
595 *_P++ = _E->m_byte;
596 else if (_E->m_emit_type == et_stream)
597 *_P++ = c;
598 else /* _Em->type == et_position */
599 {
600 *_P++ = (byte) (_Pos);
601 *_P++ = (byte) (_Pos >> 8);
602 *_P++ = (byte) (_Pos >> 16);
603 *_P++ = (byte) (_Pos >> 24);
604 }
605 }
606 else
607 {
608 regbyte_ctx *new_rbc;
609 regbyte_ctx_create (&new_rbc);
610 if (new_rbc == NULL)
611 return 1;
612
613 new_rbc->m_prev = *_Ctx;
614 new_rbc->m_regbyte = _E->m_regbyte;
615 *_Ctx = new_rbc;
616
617 if (_E->m_emit_type == et_byte)
618 new_rbc->m_current_value = _E->m_byte;
619 else if (_E->m_emit_type == et_stream)
620 new_rbc->m_current_value = c;
621 }
622
623 _E = _E->m_next;
624 }
625
626 return 0;
627 }
628
629 /*
630 error typedef
631 */
632 typedef struct error_
633 {
634 byte *m_text;
635 byte *m_token_name;
636 struct rule_ *m_token;
637 } error;
638
639 static void error_create (error **er)
640 {
641 *er = (error *) mem_alloc (sizeof (error));
642 if (*er)
643 {
644 (**er).m_text = NULL;
645 (**er).m_token_name = NULL;
646 (**er).m_token = NULL;
647 }
648 }
649
650 static void error_destroy (error **er)
651 {
652 if (*er)
653 {
654 mem_free ((void **) &(**er).m_text);
655 mem_free ((void **) &(**er).m_token_name);
656 mem_free ((void **) er);
657 }
658 }
659
660 struct dict_;
661
662 static byte *
663 error_get_token (error *, struct dict_ *, const byte *, int);
664
665 /*
666 condition operand type typedef
667 */
668 typedef enum cond_oper_type_
669 {
670 cot_byte, /* constant 8-bit unsigned integer */
671 cot_regbyte /* pointer to byte register containing the current value */
672 } cond_oper_type;
673
674 /*
675 condition operand typedef
676 */
677 typedef struct cond_oper_
678 {
679 cond_oper_type m_type;
680 byte m_byte; /* cot_byte */
681 map_byte *m_regbyte; /* cot_regbyte */
682 byte *m_regname; /* cot_regbyte - temporary */
683 } cond_oper;
684
685 /*
686 condition type typedef
687 */
688 typedef enum cond_type_
689 {
690 ct_equal,
691 ct_not_equal
692 } cond_type;
693
694 /*
695 condition typedef
696 */
697 typedef struct cond_
698 {
699 cond_type m_type;
700 cond_oper m_operands[2];
701 } cond;
702
703 static void cond_create (cond **co)
704 {
705 *co = (cond *) mem_alloc (sizeof (cond));
706 if (*co)
707 {
708 (**co).m_operands[0].m_regname = NULL;
709 (**co).m_operands[1].m_regname = NULL;
710 }
711 }
712
713 static void cond_destroy (cond **co)
714 {
715 if (*co)
716 {
717 mem_free ((void **) &(**co).m_operands[0].m_regname);
718 mem_free ((void **) &(**co).m_operands[1].m_regname);
719 mem_free ((void **) co);
720 }
721 }
722
723 /*
724 specifier type typedef
725 */
726 typedef enum spec_type_
727 {
728 st_false,
729 st_true,
730 st_byte,
731 st_byte_range,
732 st_string,
733 st_identifier,
734 st_identifier_loop,
735 st_debug
736 } spec_type;
737
738 /*
739 specifier typedef
740 */
741 typedef struct spec_
742 {
743 spec_type m_spec_type;
744 byte m_byte[2]; /* st_byte, st_byte_range */
745 byte *m_string; /* st_string */
746 struct rule_ *m_rule; /* st_identifier, st_identifier_loop */
747 emit *m_emits;
748 error *m_errtext;
749 cond *m_cond;
750 struct spec_ *next;
751 } spec;
752
753 static void spec_create (spec **sp)
754 {
755 *sp = (spec *) mem_alloc (sizeof (spec));
756 if (*sp)
757 {
758 (**sp).m_spec_type = st_false;
759 (**sp).m_byte[0] = '\0';
760 (**sp).m_byte[1] = '\0';
761 (**sp).m_string = NULL;
762 (**sp).m_rule = NULL;
763 (**sp).m_emits = NULL;
764 (**sp).m_errtext = NULL;
765 (**sp).m_cond = NULL;
766 (**sp).next = NULL;
767 }
768 }
769
770 static void spec_destroy (spec **sp)
771 {
772 if (*sp)
773 {
774 spec_destroy (&(**sp).next);
775 emit_destroy (&(**sp).m_emits);
776 error_destroy (&(**sp).m_errtext);
777 mem_free ((void **) &(**sp).m_string);
778 cond_destroy (&(**sp).m_cond);
779 mem_free ((void **) sp);
780 }
781 }
782
783 GRAMMAR_IMPLEMENT_LIST_APPEND(spec)
784
785 /*
786 operator typedef
787 */
788 typedef enum oper_
789 {
790 op_none,
791 op_and,
792 op_or
793 } oper;
794
795 /*
796 rule typedef
797 */
798 typedef struct rule_
799 {
800 oper m_oper;
801 spec *m_specs;
802 struct rule_ *next;
803 int m_referenced;
804 } rule;
805
806 static void rule_create (rule **ru)
807 {
808 *ru = (rule *) mem_alloc (sizeof (rule));
809 if (*ru)
810 {
811 (**ru).m_oper = op_none;
812 (**ru).m_specs = NULL;
813 (**ru).next = NULL;
814 (**ru).m_referenced = 0;
815 }
816 }
817
818 static void rule_destroy (rule **ru)
819 {
820 if (*ru)
821 {
822 rule_destroy (&(**ru).next);
823 spec_destroy (&(**ru).m_specs);
824 mem_free ((void **) ru);
825 }
826 }
827
828 GRAMMAR_IMPLEMENT_LIST_APPEND(rule)
829
830 /*
831 returns unique grammar id
832 */
833 static grammar next_valid_grammar_id (void)
834 {
835 static grammar id = 0;
836
837 return ++id;
838 }
839
840 /*
841 dictionary typedef
842 */
843 typedef struct dict_
844 {
845 rule *m_rulez;
846 rule *m_syntax;
847 rule *m_string;
848 map_byte *m_regbytes;
849 grammar m_id;
850 struct dict_ *next;
851 } dict;
852
853 static void dict_create (dict **di)
854 {
855 *di = (dict *) mem_alloc (sizeof (dict));
856 if (*di)
857 {
858 (**di).m_rulez = NULL;
859 (**di).m_syntax = NULL;
860 (**di).m_string = NULL;
861 (**di).m_regbytes = NULL;
862 (**di).m_id = next_valid_grammar_id ();
863 (**di).next = NULL;
864 }
865 }
866
867 static void dict_destroy (dict **di)
868 {
869 if (*di)
870 {
871 rule_destroy (&(**di).m_rulez);
872 map_byte_destroy (&(**di).m_regbytes);
873 mem_free ((void **) di);
874 }
875 }
876
877 GRAMMAR_IMPLEMENT_LIST_APPEND(dict)
878
879 static void dict_find (dict **di, grammar key, dict **data)
880 {
881 while (*di)
882 {
883 if ((**di).m_id == key)
884 {
885 *data = *di;
886 return;
887 }
888
889 di = &(**di).next;
890 }
891
892 *data = NULL;
893 }
894
895 static dict *g_dicts = NULL;
896
897 /*
898 byte array typedef
899 */
900 typedef struct barray_
901 {
902 byte *data;
903 unsigned int len;
904 } barray;
905
906 static void barray_create (barray **ba)
907 {
908 *ba = (barray *) mem_alloc (sizeof (barray));
909 if (*ba)
910 {
911 (**ba).data = NULL;
912 (**ba).len = 0;
913 }
914 }
915
916 static void barray_destroy (barray **ba)
917 {
918 if (*ba)
919 {
920 mem_free ((void **) &(**ba).data);
921 mem_free ((void **) ba);
922 }
923 }
924
925 /*
926 reallocates byte array to requested size,
927 returns 0 on success,
928 returns 1 otherwise
929 */
930 static int barray_resize (barray **ba, unsigned int nlen)
931 {
932 byte *new_pointer;
933
934 if (nlen == 0)
935 {
936 mem_free ((void **) &(**ba).data);
937 (**ba).data = NULL;
938 (**ba).len = 0;
939
940 return 0;
941 }
942 else
943 {
944 new_pointer = (byte *) mem_realloc ((**ba).data, (**ba).len * sizeof (byte),
945 nlen * sizeof (byte));
946 if (new_pointer)
947 {
948 (**ba).data = new_pointer;
949 (**ba).len = nlen;
950
951 return 0;
952 }
953 }
954
955 return 1;
956 }
957
958 /*
959 adds byte array pointed by *nb to the end of array pointed by *ba,
960 returns 0 on success,
961 returns 1 otherwise
962 */
963 static int barray_append (barray **ba, barray **nb)
964 {
965 const unsigned int len = (**ba).len;
966
967 if (barray_resize (ba, (**ba).len + (**nb).len))
968 return 1;
969
970 mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
971
972 return 0;
973 }
974
975 /*
976 adds emit chain pointed by em to the end of array pointed by *ba,
977 returns 0 on success,
978 returns 1 otherwise
979 */
980 static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
981 {
982 unsigned int count = emit_size (em);
983
984 if (barray_resize (ba, (**ba).len + count))
985 return 1;
986
987 return emit_push (em, (**ba).data + ((**ba).len - count), c, pos, rbc);
988 }
989
990 /*
991 byte pool typedef
992 */
993 typedef struct bytepool_
994 {
995 byte *_F;
996 unsigned int _Siz;
997 } bytepool;
998
999 static void bytepool_destroy (bytepool **by)
1000 {
1001 if (*by != NULL)
1002 {
1003 mem_free ((void **) &(**by)._F);
1004 mem_free ((void **) by);
1005 }
1006 }
1007
1008 static void bytepool_create (bytepool **by, int len)
1009 {
1010 *by = (bytepool *) (mem_alloc (sizeof (bytepool)));
1011 if (*by != NULL)
1012 {
1013 (**by)._F = (byte *) (mem_alloc (sizeof (byte) * len));
1014 (**by)._Siz = len;
1015
1016 if ((**by)._F == NULL)
1017 bytepool_destroy (by);
1018 }
1019 }
1020
1021 static int bytepool_reserve (bytepool *by, unsigned int n)
1022 {
1023 byte *_P;
1024
1025 if (n <= by->_Siz)
1026 return 0;
1027
1028 /* byte pool can only grow and at least by doubling its size */
1029 n = n >= by->_Siz * 2 ? n : by->_Siz * 2;
1030
1031 /* reallocate the memory and adjust pointers to the new memory location */
1032 _P = (byte *) (mem_realloc (by->_F, sizeof (byte) * by->_Siz, sizeof (byte) * n));
1033 if (_P != NULL)
1034 {
1035 by->_F = _P;
1036 by->_Siz = n;
1037 return 0;
1038 }
1039
1040 return 1;
1041 }
1042
1043 /*
1044 string to string map typedef
1045 */
1046 typedef struct map_str_
1047 {
1048 byte *key;
1049 byte *data;
1050 struct map_str_ *next;
1051 } map_str;
1052
1053 static void map_str_create (map_str **ma)
1054 {
1055 *ma = (map_str *) mem_alloc (sizeof (map_str));
1056 if (*ma)
1057 {
1058 (**ma).key = NULL;
1059 (**ma).data = NULL;
1060 (**ma).next = NULL;
1061 }
1062 }
1063
1064 static void map_str_destroy (map_str **ma)
1065 {
1066 if (*ma)
1067 {
1068 map_str_destroy (&(**ma).next);
1069 mem_free ((void **) &(**ma).key);
1070 mem_free ((void **) &(**ma).data);
1071 mem_free ((void **) ma);
1072 }
1073 }
1074
1075 GRAMMAR_IMPLEMENT_LIST_APPEND(map_str)
1076
1077 /*
1078 searches the map for specified key,
1079 if the key is matched, *data is filled with data associated with the key,
1080 returns 0 if the key is matched,
1081 returns 1 otherwise
1082 */
1083 static int map_str_find (map_str **ma, const byte *key, byte **data)
1084 {
1085 while (*ma)
1086 {
1087 if (str_equal ((**ma).key, key))
1088 {
1089 *data = str_duplicate ((**ma).data);
1090 if (*data == NULL)
1091 return 1;
1092
1093 return 0;
1094 }
1095
1096 ma = &(**ma).next;
1097 }
1098
1099 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1100 return 1;
1101 }
1102
1103 /*
1104 string to rule map typedef
1105 */
1106 typedef struct map_rule_
1107 {
1108 byte *key;
1109 rule *data;
1110 struct map_rule_ *next;
1111 } map_rule;
1112
1113 static void map_rule_create (map_rule **ma)
1114 {
1115 *ma = (map_rule *) mem_alloc (sizeof (map_rule));
1116 if (*ma)
1117 {
1118 (**ma).key = NULL;
1119 (**ma).data = NULL;
1120 (**ma).next = NULL;
1121 }
1122 }
1123
1124 static void map_rule_destroy (map_rule **ma)
1125 {
1126 if (*ma)
1127 {
1128 map_rule_destroy (&(**ma).next);
1129 mem_free ((void **) &(**ma).key);
1130 mem_free ((void **) ma);
1131 }
1132 }
1133
1134 GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule)
1135
1136 /*
1137 searches the map for specified key,
1138 if the key is matched, *data is filled with data associated with the key,
1139 returns 0 if the is matched,
1140 returns 1 otherwise
1141 */
1142 static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1143 {
1144 while (*ma)
1145 {
1146 if (str_equal ((**ma).key, key))
1147 {
1148 *data = (**ma).data;
1149
1150 return 0;
1151 }
1152
1153 ma = &(**ma).next;
1154 }
1155
1156 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1157 return 1;
1158 }
1159
1160 /*
1161 returns 1 if given character is a white space,
1162 returns 0 otherwise
1163 */
1164 static int is_space (byte c)
1165 {
1166 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1167 }
1168
1169 /*
1170 advances text pointer by 1 if character pointed by *text is a space,
1171 returns 1 if a space has been eaten,
1172 returns 0 otherwise
1173 */
1174 static int eat_space (const byte **text)
1175 {
1176 if (is_space (**text))
1177 {
1178 (*text)++;
1179
1180 return 1;
1181 }
1182
1183 return 0;
1184 }
1185
1186 /*
1187 returns 1 if text points to C-style comment start string,
1188 returns 0 otherwise
1189 */
1190 static int is_comment_start (const byte *text)
1191 {
1192 return text[0] == '/' && text[1] == '*';
1193 }
1194
1195 /*
1196 advances text pointer to first character after C-style comment block - if any,
1197 returns 1 if C-style comment block has been encountered and eaten,
1198 returns 0 otherwise
1199 */
1200 static int eat_comment (const byte **text)
1201 {
1202 if (is_comment_start (*text))
1203 {
1204 /* *text points to comment block - skip two characters to enter comment body */
1205 *text += 2;
1206 /* skip any character except consecutive '*' and '/' */
1207 while (!((*text)[0] == '*' && (*text)[1] == '/'))
1208 (*text)++;
1209 /* skip those two terminating characters */
1210 *text += 2;
1211
1212 return 1;
1213 }
1214
1215 return 0;
1216 }
1217
1218 /*
1219 advances text pointer to first character that is neither space nor C-style comment block
1220 */
1221 static void eat_spaces (const byte **text)
1222 {
1223 while (eat_space (text) || eat_comment (text))
1224 ;
1225 }
1226
1227 /*
1228 resizes string pointed by *ptr to successfully add character c to the end of the string,
1229 returns 0 on success,
1230 returns 1 otherwise
1231 */
1232 static int string_grow (byte **ptr, unsigned int *len, byte c)
1233 {
1234 /* reallocate the string in 16-byte increments */
1235 if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1236 {
1237 byte *tmp = (byte *) mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
1238 ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1239 if (tmp == NULL)
1240 return 1;
1241
1242 *ptr = tmp;
1243 }
1244
1245 if (c)
1246 {
1247 /* append given character */
1248 (*ptr)[*len] = c;
1249 (*len)++;
1250 }
1251 (*ptr)[*len] = '\0';
1252
1253 return 0;
1254 }
1255
1256 /*
1257 returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1258 returns 0 otherwise
1259 */
1260 static int is_identifier (byte c)
1261 {
1262 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1263 }
1264
1265 /*
1266 copies characters from *text to *id until non-identifier character is encountered,
1267 assumes that *id points to NULL object - caller is responsible for later freeing the string,
1268 text pointer is advanced to point past the copied identifier,
1269 returns 0 if identifier was successfully copied,
1270 returns 1 otherwise
1271 */
1272 static int get_identifier (const byte **text, byte **id)
1273 {
1274 const byte *t = *text;
1275 byte *p = NULL;
1276 unsigned int len = 0;
1277
1278 if (string_grow (&p, &len, '\0'))
1279 return 1;
1280
1281 /* loop while next character in buffer is valid for identifiers */
1282 while (is_identifier (*t))
1283 {
1284 if (string_grow (&p, &len, *t++))
1285 {
1286 mem_free ((void **) (void *) &p);
1287 return 1;
1288 }
1289 }
1290
1291 *text = t;
1292 *id = p;
1293
1294 return 0;
1295 }
1296
1297 /*
1298 converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1299 advances text pointer past the converted sequence,
1300 returns the converted value
1301 */
1302 static unsigned int dec_convert (const byte **text)
1303 {
1304 unsigned int value = 0;
1305
1306 while (**text >= '0' && **text <= '9')
1307 {
1308 value = value * 10 + **text - '0';
1309 (*text)++;
1310 }
1311
1312 return value;
1313 }
1314
1315 /*
1316 returns 1 if given character is HEX digit 0-9, A-F or a-f,
1317 returns 0 otherwise
1318 */
1319 static int is_hex (byte c)
1320 {
1321 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1322 }
1323
1324 /*
1325 returns value of passed character as if it was HEX digit
1326 */
1327 static unsigned int hex2dec (byte c)
1328 {
1329 if (c >= '0' && c <= '9')
1330 return c - '0';
1331 if (c >= 'A' && c <= 'F')
1332 return c - 'A' + 10;
1333 return c - 'a' + 10;
1334 }
1335
1336 /*
1337 converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1338 advances text pointer past the converted sequence,
1339 returns the converted value
1340 */
1341 static unsigned int hex_convert (const byte **text)
1342 {
1343 unsigned int value = 0;
1344
1345 while (is_hex (**text))
1346 {
1347 value = value * 0x10 + hex2dec (**text);
1348 (*text)++;
1349 }
1350
1351 return value;
1352 }
1353
1354 /*
1355 returns 1 if given character is OCT digit 0-7,
1356 returns 0 otherwise
1357 */
1358 static int is_oct (byte c)
1359 {
1360 return c >= '0' && c <= '7';
1361 }
1362
1363 /*
1364 returns value of passed character as if it was OCT digit
1365 */
1366 static int oct2dec (byte c)
1367 {
1368 return c - '0';
1369 }
1370
1371 static byte get_escape_sequence (const byte **text)
1372 {
1373 int value = 0;
1374
1375 /* skip '\' character */
1376 (*text)++;
1377
1378 switch (*(*text)++)
1379 {
1380 case '\'':
1381 return '\'';
1382 case '"':
1383 return '\"';
1384 case '?':
1385 return '\?';
1386 case '\\':
1387 return '\\';
1388 case 'a':
1389 return '\a';
1390 case 'b':
1391 return '\b';
1392 case 'f':
1393 return '\f';
1394 case 'n':
1395 return '\n';
1396 case 'r':
1397 return '\r';
1398 case 't':
1399 return '\t';
1400 case 'v':
1401 return '\v';
1402 case 'x':
1403 return (byte) hex_convert (text);
1404 }
1405
1406 (*text)--;
1407 if (is_oct (**text))
1408 {
1409 value = oct2dec (*(*text)++);
1410 if (is_oct (**text))
1411 {
1412 value = value * 010 + oct2dec (*(*text)++);
1413 if (is_oct (**text))
1414 value = value * 010 + oct2dec (*(*text)++);
1415 }
1416 }
1417
1418 return (byte) value;
1419 }
1420
1421 /*
1422 copies characters from *text to *str until " or ' character is encountered,
1423 assumes that *str points to NULL object - caller is responsible for later freeing the string,
1424 assumes that *text points to " or ' character that starts the string,
1425 text pointer is advanced to point past the " or ' character,
1426 returns 0 if string was successfully copied,
1427 returns 1 otherwise
1428 */
1429 static int get_string (const byte **text, byte **str)
1430 {
1431 const byte *t = *text;
1432 byte *p = NULL;
1433 unsigned int len = 0;
1434 byte term_char;
1435
1436 if (string_grow (&p, &len, '\0'))
1437 return 1;
1438
1439 /* read " or ' character that starts the string */
1440 term_char = *t++;
1441 /* while next character is not the terminating character */
1442 while (*t && *t != term_char)
1443 {
1444 byte c;
1445
1446 if (*t == '\\')
1447 c = get_escape_sequence (&t);
1448 else
1449 c = *t++;
1450
1451 if (string_grow (&p, &len, c))
1452 {
1453 mem_free ((void **) (void *) &p);
1454 return 1;
1455 }
1456 }
1457 /* skip " or ' character that ends the string */
1458 t++;
1459
1460 *text = t;
1461 *str = p;
1462 return 0;
1463 }
1464
1465 /*
1466 gets emit code, the syntax is:
1467 ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1468 assumes that *text already points to <symbol>,
1469 returns 0 if emit code is successfully read,
1470 returns 1 otherwise
1471 */
1472 static int get_emtcode (const byte **text, map_byte **ma)
1473 {
1474 const byte *t = *text;
1475 map_byte *m = NULL;
1476
1477 map_byte_create (&m);
1478 if (m == NULL)
1479 return 1;
1480
1481 if (get_identifier (&t, &m->key))
1482 {
1483 map_byte_destroy (&m);
1484 return 1;
1485 }
1486 eat_spaces (&t);
1487
1488 if (*t == '\'')
1489 {
1490 byte *c;
1491
1492 if (get_string (&t, &c))
1493 {
1494 map_byte_destroy (&m);
1495 return 1;
1496 }
1497
1498 m->data = (byte) c[0];
1499 mem_free ((void **) (void *) &c);
1500 }
1501 else if (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))
1502 {
1503 /* skip HEX "0x" or "0X" prefix */
1504 t += 2;
1505 m->data = (byte) hex_convert (&t);
1506 }
1507 else
1508 {
1509 m->data = (byte) dec_convert (&t);
1510 }
1511
1512 eat_spaces (&t);
1513
1514 *text = t;
1515 *ma = m;
1516 return 0;
1517 }
1518
1519 /*
1520 gets regbyte declaration, the syntax is:
1521 ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1522 assumes that *text already points to <symbol>,
1523 returns 0 if regbyte is successfully read,
1524 returns 1 otherwise
1525 */
1526 static int get_regbyte (const byte **text, map_byte **ma)
1527 {
1528 /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
1529 return get_emtcode (text, ma);
1530 }
1531
1532 /*
1533 returns 0 on success,
1534 returns 1 otherwise
1535 */
1536 static int get_errtext (const byte **text, map_str **ma)
1537 {
1538 const byte *t = *text;
1539 map_str *m = NULL;
1540
1541 map_str_create (&m);
1542 if (m == NULL)
1543 return 1;
1544
1545 if (get_identifier (&t, &m->key))
1546 {
1547 map_str_destroy (&m);
1548 return 1;
1549 }
1550 eat_spaces (&t);
1551
1552 if (get_string (&t, &m->data))
1553 {
1554 map_str_destroy (&m);
1555 return 1;
1556 }
1557 eat_spaces (&t);
1558
1559 *text = t;
1560 *ma = m;
1561 return 0;
1562 }
1563
1564 /*
1565 returns 0 on success,
1566 returns 1 otherwise,
1567 */
1568 static int get_error (const byte **text, error **er, map_str *maps)
1569 {
1570 const byte *t = *text;
1571 byte *temp = NULL;
1572
1573 if (*t != '.')
1574 return 0;
1575
1576 t++;
1577 if (get_identifier (&t, &temp))
1578 return 1;
1579 eat_spaces (&t);
1580
1581 if (!str_equal ((byte *) "error", temp))
1582 {
1583 mem_free ((void **) (void *) &temp);
1584 return 0;
1585 }
1586
1587 mem_free ((void **) (void *) &temp);
1588
1589 error_create (er);
1590 if (*er == NULL)
1591 return 1;
1592
1593 if (*t == '\"')
1594 {
1595 if (get_string (&t, &(**er).m_text))
1596 {
1597 error_destroy (er);
1598 return 1;
1599 }
1600 eat_spaces (&t);
1601 }
1602 else
1603 {
1604 if (get_identifier (&t, &temp))
1605 {
1606 error_destroy (er);
1607 return 1;
1608 }
1609 eat_spaces (&t);
1610
1611 if (map_str_find (&maps, temp, &(**er).m_text))
1612 {
1613 mem_free ((void **) (void *) &temp);
1614 error_destroy (er);
1615 return 1;
1616 }
1617
1618 mem_free ((void **) (void *) &temp);
1619 }
1620
1621 /* try to extract "token" from "...$token$..." */
1622 {
1623 byte *processed = NULL;
1624 unsigned int len = 0;
1625 int i = 0;
1626
1627 if (string_grow (&processed, &len, '\0'))
1628 {
1629 error_destroy (er);
1630 return 1;
1631 }
1632
1633 while (i < str_length ((**er).m_text))
1634 {
1635 /* check if the dollar sign is repeated - if so skip it */
1636 if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1637 {
1638 if (string_grow (&processed, &len, '$'))
1639 {
1640 mem_free ((void **) (void *) &processed);
1641 error_destroy (er);
1642 return 1;
1643 }
1644
1645 i += 2;
1646 }
1647 else if ((**er).m_text[i] != '$')
1648 {
1649 if (string_grow (&processed, &len, (**er).m_text[i]))
1650 {
1651 mem_free ((void **) (void *) &processed);
1652 error_destroy (er);
1653 return 1;
1654 }
1655
1656 i++;
1657 }
1658 else
1659 {
1660 if (string_grow (&processed, &len, '$'))
1661 {
1662 mem_free ((void **) (void *) &processed);
1663 error_destroy (er);
1664 return 1;
1665 }
1666
1667 {
1668 /* length of token being extracted */
1669 unsigned int tlen = 0;
1670
1671 if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1672 {
1673 mem_free ((void **) (void *) &processed);
1674 error_destroy (er);
1675 return 1;
1676 }
1677
1678 /* skip the dollar sign */
1679 i++;
1680
1681 while ((**er).m_text[i] != '$')
1682 {
1683 if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1684 {
1685 mem_free ((void **) (void *) &processed);
1686 error_destroy (er);
1687 return 1;
1688 }
1689
1690 i++;
1691 }
1692
1693 /* skip the dollar sign */
1694 i++;
1695 }
1696 }
1697 }
1698
1699 mem_free ((void **) &(**er).m_text);
1700 (**er).m_text = processed;
1701 }
1702
1703 *text = t;
1704 return 0;
1705 }
1706
1707 /*
1708 returns 0 on success,
1709 returns 1 otherwise,
1710 */
1711 static int get_emits (const byte **text, emit **em, map_byte *mapb)
1712 {
1713 const byte *t = *text;
1714 byte *temp = NULL;
1715 emit *e = NULL;
1716 emit_dest dest;
1717
1718 if (*t != '.')
1719 return 0;
1720
1721 t++;
1722 if (get_identifier (&t, &temp))
1723 return 1;
1724 eat_spaces (&t);
1725
1726 /* .emit */
1727 if (str_equal ((byte *) "emit", temp))
1728 dest = ed_output;
1729 /* .load */
1730 else if (str_equal ((byte *) "load", temp))
1731 dest = ed_regbyte;
1732 else
1733 {
1734 mem_free ((void **) (void *) &temp);
1735 return 0;
1736 }
1737
1738 mem_free ((void **) (void *) &temp);
1739
1740 emit_create (&e);
1741 if (e == NULL)
1742 return 1;
1743
1744 e->m_emit_dest = dest;
1745
1746 if (dest == ed_regbyte)
1747 {
1748 if (get_identifier (&t, &e->m_regname))
1749 {
1750 emit_destroy (&e);
1751 return 1;
1752 }
1753 eat_spaces (&t);
1754 }
1755
1756 /* 0xNN */
1757 if (*t == '0' && (t[1] == 'x' || t[1] == 'X'))
1758 {
1759 t += 2;
1760 e->m_byte = (byte) hex_convert (&t);
1761
1762 e->m_emit_type = et_byte;
1763 }
1764 /* NNN */
1765 else if (*t >= '0' && *t <= '9')
1766 {
1767 e->m_byte = (byte) dec_convert (&t);
1768
1769 e->m_emit_type = et_byte;
1770 }
1771 /* * */
1772 else if (*t == '*')
1773 {
1774 t++;
1775
1776 e->m_emit_type = et_stream;
1777 }
1778 /* $ */
1779 else if (*t == '$')
1780 {
1781 t++;
1782
1783 e->m_emit_type = et_position;
1784 }
1785 /* 'c' */
1786 else if (*t == '\'')
1787 {
1788 if (get_string (&t, &temp))
1789 {
1790 emit_destroy (&e);
1791 return 1;
1792 }
1793 e->m_byte = (byte) temp[0];
1794
1795 mem_free ((void **) (void *) &temp);
1796
1797 e->m_emit_type = et_byte;
1798 }
1799 else
1800 {
1801 if (get_identifier (&t, &temp))
1802 {
1803 emit_destroy (&e);
1804 return 1;
1805 }
1806
1807 if (map_byte_find (&mapb, temp, &e->m_byte))
1808 {
1809 mem_free ((void **) (void *) &temp);
1810 emit_destroy (&e);
1811 return 1;
1812 }
1813
1814 mem_free ((void **) (void *) &temp);
1815
1816 e->m_emit_type = et_byte;
1817 }
1818
1819 eat_spaces (&t);
1820
1821 if (get_emits (&t, &e->m_next, mapb))
1822 {
1823 emit_destroy (&e);
1824 return 1;
1825 }
1826
1827 *text = t;
1828 *em = e;
1829 return 0;
1830 }
1831
1832 /*
1833 returns 0 on success,
1834 returns 1 otherwise,
1835 */
1836 static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1837 {
1838 const byte *t = *text;
1839 spec *s = NULL;
1840
1841 spec_create (&s);
1842 if (s == NULL)
1843 return 1;
1844
1845 /* first - read optional .if statement */
1846 if (*t == '.')
1847 {
1848 const byte *u = t;
1849 byte *keyword = NULL;
1850
1851 /* skip the dot */
1852 u++;
1853
1854 if (get_identifier (&u, &keyword))
1855 {
1856 spec_destroy (&s);
1857 return 1;
1858 }
1859
1860 /* .if */
1861 if (str_equal ((byte *) "if", keyword))
1862 {
1863 cond_create (&s->m_cond);
1864 if (s->m_cond == NULL)
1865 {
1866 spec_destroy (&s);
1867 return 1;
1868 }
1869
1870 /* skip the left paren */
1871 eat_spaces (&u);
1872 u++;
1873
1874 /* get the left operand */
1875 eat_spaces (&u);
1876 if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1877 {
1878 spec_destroy (&s);
1879 return 1;
1880 }
1881 s->m_cond->m_operands[0].m_type = cot_regbyte;
1882
1883 /* get the operator (!= or ==) */
1884 eat_spaces (&u);
1885 if (*u == '!')
1886 s->m_cond->m_type = ct_not_equal;
1887 else
1888 s->m_cond->m_type = ct_equal;
1889 u += 2;
1890 eat_spaces (&u);
1891
1892 if (u[0] == '0' && (u[1] == 'x' || u[1] == 'X'))
1893 {
1894 /* skip the 0x prefix */
1895 u += 2;
1896
1897 /* get the right operand */
1898 s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1899 s->m_cond->m_operands[1].m_type = cot_byte;
1900 }
1901 else /*if (*u >= '0' && *u <= '9')*/
1902 {
1903 /* get the right operand */
1904 s->m_cond->m_operands[1].m_byte = dec_convert (&u);
1905 s->m_cond->m_operands[1].m_type = cot_byte;
1906 }
1907
1908 /* skip the right paren */
1909 eat_spaces (&u);
1910 u++;
1911
1912 eat_spaces (&u);
1913
1914 t = u;
1915 }
1916
1917 mem_free ((void **) (void *) &keyword);
1918 }
1919
1920 if (*t == '\'')
1921 {
1922 byte *temp = NULL;
1923
1924 if (get_string (&t, &temp))
1925 {
1926 spec_destroy (&s);
1927 return 1;
1928 }
1929 eat_spaces (&t);
1930
1931 if (*t == '-')
1932 {
1933 byte *temp2 = NULL;
1934
1935 /* skip the '-' character */
1936 t++;
1937 eat_spaces (&t);
1938
1939 if (get_string (&t, &temp2))
1940 {
1941 mem_free ((void **) (void *) &temp);
1942 spec_destroy (&s);
1943 return 1;
1944 }
1945 eat_spaces (&t);
1946
1947 s->m_spec_type = st_byte_range;
1948 s->m_byte[0] = *temp;
1949 s->m_byte[1] = *temp2;
1950
1951 mem_free ((void **) (void *) &temp2);
1952 }
1953 else
1954 {
1955 s->m_spec_type = st_byte;
1956 *s->m_byte = *temp;
1957 }
1958
1959 mem_free ((void **) (void *) &temp);
1960 }
1961 else if (*t == '"')
1962 {
1963 if (get_string (&t, &s->m_string))
1964 {
1965 spec_destroy (&s);
1966 return 1;
1967 }
1968 eat_spaces (&t);
1969
1970 s->m_spec_type = st_string;
1971 }
1972 else if (*t == '.')
1973 {
1974 byte *keyword = NULL;
1975
1976 /* skip the dot */
1977 t++;
1978
1979 if (get_identifier (&t, &keyword))
1980 {
1981 spec_destroy (&s);
1982 return 1;
1983 }
1984 eat_spaces (&t);
1985
1986 /* .true */
1987 if (str_equal ((byte *) "true", keyword))
1988 {
1989 s->m_spec_type = st_true;
1990 }
1991 /* .false */
1992 else if (str_equal ((byte *) "false", keyword))
1993 {
1994 s->m_spec_type = st_false;
1995 }
1996 /* .debug */
1997 else if (str_equal ((byte *) "debug", keyword))
1998 {
1999 s->m_spec_type = st_debug;
2000 }
2001 /* .loop */
2002 else if (str_equal ((byte *) "loop", keyword))
2003 {
2004 if (get_identifier (&t, &s->m_string))
2005 {
2006 mem_free ((void **) (void *) &keyword);
2007 spec_destroy (&s);
2008 return 1;
2009 }
2010 eat_spaces (&t);
2011
2012 s->m_spec_type = st_identifier_loop;
2013 }
2014 mem_free ((void **) (void *) &keyword);
2015 }
2016 else
2017 {
2018 if (get_identifier (&t, &s->m_string))
2019 {
2020 spec_destroy (&s);
2021 return 1;
2022 }
2023 eat_spaces (&t);
2024
2025 s->m_spec_type = st_identifier;
2026 }
2027
2028 if (get_error (&t, &s->m_errtext, maps))
2029 {
2030 spec_destroy (&s);
2031 return 1;
2032 }
2033
2034 if (get_emits (&t, &s->m_emits, mapb))
2035 {
2036 spec_destroy (&s);
2037 return 1;
2038 }
2039
2040 *text = t;
2041 *sp = s;
2042 return 0;
2043 }
2044
2045 /*
2046 returns 0 on success,
2047 returns 1 otherwise,
2048 */
2049 static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
2050 {
2051 const byte *t = *text;
2052 rule *r = NULL;
2053
2054 rule_create (&r);
2055 if (r == NULL)
2056 return 1;
2057
2058 if (get_spec (&t, &r->m_specs, maps, mapb))
2059 {
2060 rule_destroy (&r);
2061 return 1;
2062 }
2063
2064 while (*t != ';')
2065 {
2066 byte *op = NULL;
2067 spec *sp = NULL;
2068
2069 /* skip the dot that precedes "and" or "or" */
2070 t++;
2071
2072 /* read "and" or "or" keyword */
2073 if (get_identifier (&t, &op))
2074 {
2075 rule_destroy (&r);
2076 return 1;
2077 }
2078 eat_spaces (&t);
2079
2080 if (r->m_oper == op_none)
2081 {
2082 /* .and */
2083 if (str_equal ((byte *) "and", op))
2084 r->m_oper = op_and;
2085 /* .or */
2086 else
2087 r->m_oper = op_or;
2088 }
2089
2090 mem_free ((void **) (void *) &op);
2091
2092 if (get_spec (&t, &sp, maps, mapb))
2093 {
2094 rule_destroy (&r);
2095 return 1;
2096 }
2097
2098 spec_append (&r->m_specs, sp);
2099 }
2100
2101 /* skip the semicolon */
2102 t++;
2103 eat_spaces (&t);
2104
2105 *text = t;
2106 *ru = r;
2107 return 0;
2108 }
2109
2110 /*
2111 returns 0 on success,
2112 returns 1 otherwise,
2113 */
2114 static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2115 {
2116 if (map_rule_find (&mapr, symbol, ru))
2117 return 1;
2118
2119 (**ru).m_referenced = 1;
2120
2121 return 0;
2122 }
2123
2124 /*
2125 returns 0 on success,
2126 returns 1 otherwise,
2127 */
2128 static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2129 byte **string_symbol, map_byte *regbytes)
2130 {
2131 rule *rulez = di->m_rulez;
2132
2133 /* update dependecies for the root and lexer symbols */
2134 if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2135 (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2136 return 1;
2137
2138 mem_free ((void **) syntax_symbol);
2139 mem_free ((void **) string_symbol);
2140
2141 /* update dependecies for the rest of the rules */
2142 while (rulez)
2143 {
2144 spec *sp = rulez->m_specs;
2145
2146 /* iterate through all the specifiers */
2147 while (sp)
2148 {
2149 /* update dependency for identifier */
2150 if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2151 {
2152 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2153 return 1;
2154
2155 mem_free ((void **) &sp->m_string);
2156 }
2157
2158 /* some errtexts reference to a rule */
2159 if (sp->m_errtext && sp->m_errtext->m_token_name)
2160 {
2161 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2162 return 1;
2163
2164 mem_free ((void **) &sp->m_errtext->m_token_name);
2165 }
2166
2167 /* update dependency for condition */
2168 if (sp->m_cond)
2169 {
2170 int i;
2171 for (i = 0; i < 2; i++)
2172 if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2173 {
2174 sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2175 sp->m_cond->m_operands[i].m_regname);
2176
2177 if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2178 return 1;
2179
2180 mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2181 }
2182 }
2183
2184 /* update dependency for all .load instructions */
2185 if (sp->m_emits)
2186 {
2187 emit *em = sp->m_emits;
2188 while (em != NULL)
2189 {
2190 if (em->m_emit_dest == ed_regbyte)
2191 {
2192 em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2193
2194 if (em->m_regbyte == NULL)
2195 return 1;
2196
2197 mem_free ((void **) &em->m_regname);
2198 }
2199
2200 em = em->m_next;
2201 }
2202 }
2203
2204 sp = sp->next;
2205 }
2206
2207 rulez = rulez->next;
2208 }
2209
2210 /* check for unreferenced symbols */
2211 rulez = di->m_rulez;
2212 while (rulez != NULL)
2213 {
2214 if (!rulez->m_referenced)
2215 {
2216 map_rule *ma = mapr;
2217 while (ma)
2218 {
2219 if (ma->data == rulez)
2220 {
2221 set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1);
2222 return 1;
2223 }
2224 ma = ma->next;
2225 }
2226 }
2227 rulez = rulez->next;
2228 }
2229
2230 return 0;
2231 }
2232
2233 static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2234 {
2235 byte values[2];
2236 int i;
2237
2238 if (co == NULL)
2239 return 1;
2240
2241 for (i = 0; i < 2; i++)
2242 switch (co->m_operands[i].m_type)
2243 {
2244 case cot_byte:
2245 values[i] = co->m_operands[i].m_byte;
2246 break;
2247 case cot_regbyte:
2248 values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2249 break;
2250 }
2251
2252 switch (co->m_type)
2253 {
2254 case ct_equal:
2255 return values[0] == values[1];
2256 case ct_not_equal:
2257 return values[0] != values[1];
2258 }
2259
2260 return 0;
2261 }
2262
2263 static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2264 {
2265 while (top != limit)
2266 {
2267 regbyte_ctx *rbc = top->m_prev;
2268 regbyte_ctx_destroy (&top);
2269 top = rbc;
2270 }
2271 }
2272
2273 typedef enum match_result_
2274 {
2275 mr_not_matched, /* the examined string does not match */
2276 mr_matched, /* the examined string matches */
2277 mr_error_raised, /* mr_not_matched + error has been raised */
2278 mr_dont_emit, /* used by identifier loops only */
2279 mr_internal_error /* an internal error has occured such as out of memory */
2280 } match_result;
2281
2282 /*
2283 * This function does the main job. It parses the text and generates output data.
2284 */
2285 static match_result
2286 match (dict *di, const byte *text, int *index, rule *ru, barray **ba, int filtering_string,
2287 regbyte_ctx **rbc)
2288 {
2289 int ind = *index;
2290 match_result status = mr_not_matched;
2291 spec *sp = ru->m_specs;
2292 regbyte_ctx *ctx = *rbc;
2293
2294 /* for every specifier in the rule */
2295 while (sp)
2296 {
2297 int i, len, save_ind = ind;
2298 barray *array = NULL;
2299
2300 if (satisfies_condition (sp->m_cond, ctx))
2301 {
2302 switch (sp->m_spec_type)
2303 {
2304 case st_identifier:
2305 barray_create (&array);
2306 if (array == NULL)
2307 {
2308 free_regbyte_ctx_stack (ctx, *rbc);
2309 return mr_internal_error;
2310 }
2311
2312 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2313
2314 if (status == mr_internal_error)
2315 {
2316 free_regbyte_ctx_stack (ctx, *rbc);
2317 barray_destroy (&array);
2318 return mr_internal_error;
2319 }
2320 break;
2321 case st_string:
2322 len = str_length (sp->m_string);
2323
2324 /* prefilter the stream */
2325 if (!filtering_string && di->m_string)
2326 {
2327 barray *ba;
2328 int filter_index = 0;
2329 match_result result;
2330 regbyte_ctx *null_ctx = NULL;
2331
2332 barray_create (&ba);
2333 if (ba == NULL)
2334 {
2335 free_regbyte_ctx_stack (ctx, *rbc);
2336 return mr_internal_error;
2337 }
2338
2339 result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2340
2341 if (result == mr_internal_error)
2342 {
2343 free_regbyte_ctx_stack (ctx, *rbc);
2344 barray_destroy (&ba);
2345 return mr_internal_error;
2346 }
2347
2348 if (result != mr_matched)
2349 {
2350 barray_destroy (&ba);
2351 status = mr_not_matched;
2352 break;
2353 }
2354
2355 barray_destroy (&ba);
2356
2357 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2358 {
2359 status = mr_not_matched;
2360 break;
2361 }
2362
2363 status = mr_matched;
2364 ind += len;
2365 }
2366 else
2367 {
2368 status = mr_matched;
2369 for (i = 0; status == mr_matched && i < len; i++)
2370 if (text[ind + i] != sp->m_string[i])
2371 status = mr_not_matched;
2372
2373 if (status == mr_matched)
2374 ind += len;
2375 }
2376 break;
2377 case st_byte:
2378 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2379 if (status == mr_matched)
2380 ind++;
2381 break;
2382 case st_byte_range:
2383 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2384 mr_matched : mr_not_matched;
2385 if (status == mr_matched)
2386 ind++;
2387 break;
2388 case st_true:
2389 status = mr_matched;
2390 break;
2391 case st_false:
2392 status = mr_not_matched;
2393 break;
2394 case st_debug:
2395 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2396 break;
2397 case st_identifier_loop:
2398 barray_create (&array);
2399 if (array == NULL)
2400 {
2401 free_regbyte_ctx_stack (ctx, *rbc);
2402 return mr_internal_error;
2403 }
2404
2405 status = mr_dont_emit;
2406 for (;;)
2407 {
2408 match_result result;
2409
2410 save_ind = ind;
2411 result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2412
2413 if (result == mr_error_raised)
2414 {
2415 status = result;
2416 break;
2417 }
2418 else if (result == mr_matched)
2419 {
2420 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2421 barray_append (ba, &array))
2422 {
2423 free_regbyte_ctx_stack (ctx, *rbc);
2424 barray_destroy (&array);
2425 return mr_internal_error;
2426 }
2427 barray_destroy (&array);
2428 barray_create (&array);
2429 if (array == NULL)
2430 {
2431 free_regbyte_ctx_stack (ctx, *rbc);
2432 return mr_internal_error;
2433 }
2434 }
2435 else if (result == mr_internal_error)
2436 {
2437 free_regbyte_ctx_stack (ctx, *rbc);
2438 barray_destroy (&array);
2439 return mr_internal_error;
2440 }
2441 else
2442 break;
2443 }
2444 break;
2445 }
2446 }
2447 else
2448 {
2449 status = mr_not_matched;
2450 }
2451
2452 if (status == mr_error_raised)
2453 {
2454 free_regbyte_ctx_stack (ctx, *rbc);
2455 barray_destroy (&array);
2456
2457 return mr_error_raised;
2458 }
2459
2460 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2461 {
2462 free_regbyte_ctx_stack (ctx, *rbc);
2463 barray_destroy (&array);
2464
2465 if (sp->m_errtext)
2466 {
2467 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2468 ind), ind);
2469
2470 return mr_error_raised;
2471 }
2472
2473 return mr_not_matched;
2474 }
2475
2476 if (status == mr_matched)
2477 {
2478 if (sp->m_emits)
2479 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2480 {
2481 free_regbyte_ctx_stack (ctx, *rbc);
2482 barray_destroy (&array);
2483 return mr_internal_error;
2484 }
2485
2486 if (array)
2487 if (barray_append (ba, &array))
2488 {
2489 free_regbyte_ctx_stack (ctx, *rbc);
2490 barray_destroy (&array);
2491 return mr_internal_error;
2492 }
2493 }
2494
2495 barray_destroy (&array);
2496
2497 /* if the rule operator is a logical or, we pick up the first matching specifier */
2498 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2499 {
2500 *index = ind;
2501 *rbc = ctx;
2502 return mr_matched;
2503 }
2504
2505 sp = sp->next;
2506 }
2507
2508 /* everything went fine - all specifiers match up */
2509 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2510 {
2511 *index = ind;
2512 *rbc = ctx;
2513 return mr_matched;
2514 }
2515
2516 free_regbyte_ctx_stack (ctx, *rbc);
2517 return mr_not_matched;
2518 }
2519
2520 static match_result
2521 fast_match (dict *di, const byte *text, int *index, rule *ru, int *_PP, bytepool *_BP,
2522 int filtering_string, regbyte_ctx **rbc)
2523 {
2524 int ind = *index;
2525 int _P = filtering_string ? 0 : *_PP;
2526 int _P2;
2527 match_result status = mr_not_matched;
2528 spec *sp = ru->m_specs;
2529 regbyte_ctx *ctx = *rbc;
2530
2531 /* for every specifier in the rule */
2532 while (sp)
2533 {
2534 int i, len, save_ind = ind;
2535
2536 _P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0);
2537 if (bytepool_reserve (_BP, _P2))
2538 {
2539 free_regbyte_ctx_stack (ctx, *rbc);
2540 return mr_internal_error;
2541 }
2542
2543 if (satisfies_condition (sp->m_cond, ctx))
2544 {
2545 switch (sp->m_spec_type)
2546 {
2547 case st_identifier:
2548 status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2549
2550 if (status == mr_internal_error)
2551 {
2552 free_regbyte_ctx_stack (ctx, *rbc);
2553 return mr_internal_error;
2554 }
2555 break;
2556 case st_string:
2557 len = str_length (sp->m_string);
2558
2559 /* prefilter the stream */
2560 if (!filtering_string && di->m_string)
2561 {
2562 int filter_index = 0;
2563 match_result result;
2564 regbyte_ctx *null_ctx = NULL;
2565
2566 result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx);
2567
2568 if (result == mr_internal_error)
2569 {
2570 free_regbyte_ctx_stack (ctx, *rbc);
2571 return mr_internal_error;
2572 }
2573
2574 if (result != mr_matched)
2575 {
2576 status = mr_not_matched;
2577 break;
2578 }
2579
2580 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2581 {
2582 status = mr_not_matched;
2583 break;
2584 }
2585
2586 status = mr_matched;
2587 ind += len;
2588 }
2589 else
2590 {
2591 status = mr_matched;
2592 for (i = 0; status == mr_matched && i < len; i++)
2593 if (text[ind + i] != sp->m_string[i])
2594 status = mr_not_matched;
2595
2596 if (status == mr_matched)
2597 ind += len;
2598 }
2599 break;
2600 case st_byte:
2601 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2602 if (status == mr_matched)
2603 ind++;
2604 break;
2605 case st_byte_range:
2606 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2607 mr_matched : mr_not_matched;
2608 if (status == mr_matched)
2609 ind++;
2610 break;
2611 case st_true:
2612 status = mr_matched;
2613 break;
2614 case st_false:
2615 status = mr_not_matched;
2616 break;
2617 case st_debug:
2618 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2619 break;
2620 case st_identifier_loop:
2621 status = mr_dont_emit;
2622 for (;;)
2623 {
2624 match_result result;
2625
2626 save_ind = ind;
2627 result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2628
2629 if (result == mr_error_raised)
2630 {
2631 status = result;
2632 break;
2633 }
2634 else if (result == mr_matched)
2635 {
2636 if (!filtering_string)
2637 {
2638 if (sp->m_emits != NULL)
2639 {
2640 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2641 {
2642 free_regbyte_ctx_stack (ctx, *rbc);
2643 return mr_internal_error;
2644 }
2645 }
2646
2647 _P = _P2;
2648 _P2 += sp->m_emits ? emit_size (sp->m_emits) : 0;
2649 if (bytepool_reserve (_BP, _P2))
2650 {
2651 free_regbyte_ctx_stack (ctx, *rbc);
2652 return mr_internal_error;
2653 }
2654 }
2655 }
2656 else if (result == mr_internal_error)
2657 {
2658 free_regbyte_ctx_stack (ctx, *rbc);
2659 return mr_internal_error;
2660 }
2661 else
2662 break;
2663 }
2664 break;
2665 }
2666 }
2667 else
2668 {
2669 status = mr_not_matched;
2670 }
2671
2672 if (status == mr_error_raised)
2673 {
2674 free_regbyte_ctx_stack (ctx, *rbc);
2675
2676 return mr_error_raised;
2677 }
2678
2679 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2680 {
2681 free_regbyte_ctx_stack (ctx, *rbc);
2682
2683 if (sp->m_errtext)
2684 {
2685 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2686 ind), ind);
2687
2688 return mr_error_raised;
2689 }
2690
2691 return mr_not_matched;
2692 }
2693
2694 if (status == mr_matched)
2695 {
2696 if (sp->m_emits != NULL) {
2697 const byte ch = (ind <= 0) ? 0 : text[ind - 1];
2698 if (emit_push (sp->m_emits, _BP->_F + _P, ch, save_ind, &ctx))
2699 {
2700 free_regbyte_ctx_stack (ctx, *rbc);
2701 return mr_internal_error;
2702 }
2703
2704 }
2705 _P = _P2;
2706 }
2707
2708 /* if the rule operator is a logical or, we pick up the first matching specifier */
2709 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2710 {
2711 *index = ind;
2712 *rbc = ctx;
2713 if (!filtering_string)
2714 *_PP = _P;
2715 return mr_matched;
2716 }
2717
2718 sp = sp->next;
2719 }
2720
2721 /* everything went fine - all specifiers match up */
2722 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2723 {
2724 *index = ind;
2725 *rbc = ctx;
2726 if (!filtering_string)
2727 *_PP = _P;
2728 return mr_matched;
2729 }
2730
2731 free_regbyte_ctx_stack (ctx, *rbc);
2732 return mr_not_matched;
2733 }
2734
2735 static byte *
2736 error_get_token (error *er, dict *di, const byte *text, int ind)
2737 {
2738 byte *str = NULL;
2739
2740 if (er->m_token)
2741 {
2742 barray *ba;
2743 int filter_index = 0;
2744 regbyte_ctx *ctx = NULL;
2745
2746 barray_create (&ba);
2747 if (ba != NULL)
2748 {
2749 if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2750 filter_index)
2751 {
2752 str = (byte *) mem_alloc (filter_index + 1);
2753 if (str != NULL)
2754 {
2755 str_copy_n (str, text + ind, filter_index);
2756 str[filter_index] = '\0';
2757 }
2758 }
2759 barray_destroy (&ba);
2760 }
2761 }
2762
2763 return str;
2764 }
2765
2766 typedef struct grammar_load_state_
2767 {
2768 dict *di;
2769 byte *syntax_symbol;
2770 byte *string_symbol;
2771 map_str *maps;
2772 map_byte *mapb;
2773 map_rule *mapr;
2774 } grammar_load_state;
2775
2776 static void grammar_load_state_create (grammar_load_state **gr)
2777 {
2778 *gr = (grammar_load_state *) mem_alloc (sizeof (grammar_load_state));
2779 if (*gr)
2780 {
2781 (**gr).di = NULL;
2782 (**gr).syntax_symbol = NULL;
2783 (**gr).string_symbol = NULL;
2784 (**gr).maps = NULL;
2785 (**gr).mapb = NULL;
2786 (**gr).mapr = NULL;
2787 }
2788 }
2789
2790 static void grammar_load_state_destroy (grammar_load_state **gr)
2791 {
2792 if (*gr)
2793 {
2794 dict_destroy (&(**gr).di);
2795 mem_free ((void **) &(**gr).syntax_symbol);
2796 mem_free ((void **) &(**gr).string_symbol);
2797 map_str_destroy (&(**gr).maps);
2798 map_byte_destroy (&(**gr).mapb);
2799 map_rule_destroy (&(**gr).mapr);
2800 mem_free ((void **) gr);
2801 }
2802 }
2803
2804
2805 static void error_msg(int line, const char *msg)
2806 {
2807 fprintf(stderr, "Error in grammar_load_from_text() at line %d: %s\n", line, msg);
2808 }
2809
2810
2811 /*
2812 the API
2813 */
2814 grammar grammar_load_from_text (const byte *text)
2815 {
2816 grammar_load_state *g = NULL;
2817 grammar id = 0;
2818
2819 clear_last_error ();
2820
2821 grammar_load_state_create (&g);
2822 if (g == NULL) {
2823 error_msg(__LINE__, "");
2824 return 0;
2825 }
2826
2827 dict_create (&g->di);
2828 if (g->di == NULL)
2829 {
2830 grammar_load_state_destroy (&g);
2831 error_msg(__LINE__, "");
2832 return 0;
2833 }
2834
2835 eat_spaces (&text);
2836
2837 /* skip ".syntax" keyword */
2838 text += 7;
2839 eat_spaces (&text);
2840
2841 /* retrieve root symbol */
2842 if (get_identifier (&text, &g->syntax_symbol))
2843 {
2844 grammar_load_state_destroy (&g);
2845 error_msg(__LINE__, "");
2846 return 0;
2847 }
2848 eat_spaces (&text);
2849
2850 /* skip semicolon */
2851 text++;
2852 eat_spaces (&text);
2853
2854 while (*text)
2855 {
2856 byte *symbol = NULL;
2857 int is_dot = *text == '.';
2858
2859 if (is_dot)
2860 text++;
2861
2862 if (get_identifier (&text, &symbol))
2863 {
2864 grammar_load_state_destroy (&g);
2865 error_msg(__LINE__, "");
2866 return 0;
2867 }
2868 eat_spaces (&text);
2869
2870 /* .emtcode */
2871 if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2872 {
2873 map_byte *ma = NULL;
2874
2875 mem_free ((void **) (void *) &symbol);
2876
2877 if (get_emtcode (&text, &ma))
2878 {
2879 grammar_load_state_destroy (&g);
2880 error_msg(__LINE__, "");
2881 return 0;
2882 }
2883
2884 map_byte_append (&g->mapb, ma);
2885 }
2886 /* .regbyte */
2887 else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2888 {
2889 map_byte *ma = NULL;
2890
2891 mem_free ((void **) (void *) &symbol);
2892
2893 if (get_regbyte (&text, &ma))
2894 {
2895 grammar_load_state_destroy (&g);
2896 error_msg(__LINE__, "");
2897 return 0;
2898 }
2899
2900 map_byte_append (&g->di->m_regbytes, ma);
2901 }
2902 /* .errtext */
2903 else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2904 {
2905 map_str *ma = NULL;
2906
2907 mem_free ((void **) (void *) &symbol);
2908
2909 if (get_errtext (&text, &ma))
2910 {
2911 grammar_load_state_destroy (&g);
2912 error_msg(__LINE__, "");
2913 return 0;
2914 }
2915
2916 map_str_append (&g->maps, ma);
2917 }
2918 /* .string */
2919 else if (is_dot && str_equal (symbol, (byte *) "string"))
2920 {
2921 mem_free ((void **) (void *) &symbol);
2922
2923 if (g->di->m_string != NULL)
2924 {
2925 grammar_load_state_destroy (&g);
2926 error_msg(__LINE__, "");
2927 return 0;
2928 }
2929
2930 if (get_identifier (&text, &g->string_symbol))
2931 {
2932 grammar_load_state_destroy (&g);
2933 error_msg(__LINE__, "");
2934 return 0;
2935 }
2936
2937 /* skip semicolon */
2938 eat_spaces (&text);
2939 text++;
2940 eat_spaces (&text);
2941 }
2942 else
2943 {
2944 rule *ru = NULL;
2945 map_rule *ma = NULL;
2946
2947 if (get_rule (&text, &ru, g->maps, g->mapb))
2948 {
2949 grammar_load_state_destroy (&g);
2950 error_msg(__LINE__, "");
2951 return 0;
2952 }
2953
2954 rule_append (&g->di->m_rulez, ru);
2955
2956 /* if a rule consist of only one specifier, give it an ".and" operator */
2957 if (ru->m_oper == op_none)
2958 ru->m_oper = op_and;
2959
2960 map_rule_create (&ma);
2961 if (ma == NULL)
2962 {
2963 grammar_load_state_destroy (&g);
2964 error_msg(__LINE__, "");
2965 return 0;
2966 }
2967
2968 ma->key = symbol;
2969 ma->data = ru;
2970 map_rule_append (&g->mapr, ma);
2971 }
2972 }
2973
2974 if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2975 g->di->m_regbytes))
2976 {
2977 grammar_load_state_destroy (&g);
2978 error_msg(__LINE__, "update_dependencies() failed");
2979 return 0;
2980 }
2981
2982 dict_append (&g_dicts, g->di);
2983 id = g->di->m_id;
2984 g->di = NULL;
2985
2986 grammar_load_state_destroy (&g);
2987
2988 return id;
2989 }
2990
2991 int grammar_set_reg8 (grammar id, const byte *name, byte value)
2992 {
2993 dict *di = NULL;
2994 map_byte *reg = NULL;
2995
2996 clear_last_error ();
2997
2998 dict_find (&g_dicts, id, &di);
2999 if (di == NULL)
3000 {
3001 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3002 return 0;
3003 }
3004
3005 reg = map_byte_locate (&di->m_regbytes, name);
3006 if (reg == NULL)
3007 {
3008 set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
3009 return 0;
3010 }
3011
3012 reg->data = value;
3013 return 1;
3014 }
3015
3016 /*
3017 internal checking function used by both grammar_check and grammar_fast_check functions
3018 */
3019 static int _grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3020 unsigned int estimate_prod_size, int use_fast_path)
3021 {
3022 dict *di = NULL;
3023 int index = 0;
3024
3025 clear_last_error ();
3026
3027 dict_find (&g_dicts, id, &di);
3028 if (di == NULL)
3029 {
3030 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3031 return 0;
3032 }
3033
3034 *prod = NULL;
3035 *size = 0;
3036
3037 if (use_fast_path)
3038 {
3039 regbyte_ctx *rbc = NULL;
3040 bytepool *bp = NULL;
3041 int _P = 0;
3042
3043 bytepool_create (&bp, estimate_prod_size);
3044 if (bp == NULL)
3045 return 0;
3046
3047 if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched)
3048 {
3049 bytepool_destroy (&bp);
3050 free_regbyte_ctx_stack (rbc, NULL);
3051 return 0;
3052 }
3053
3054 free_regbyte_ctx_stack (rbc, NULL);
3055
3056 *prod = bp->_F;
3057 *size = _P;
3058 bp->_F = NULL;
3059 bytepool_destroy (&bp);
3060 }
3061 else
3062 {
3063 regbyte_ctx *rbc = NULL;
3064 barray *ba = NULL;
3065
3066 barray_create (&ba);
3067 if (ba == NULL)
3068 return 0;
3069
3070 if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
3071 {
3072 barray_destroy (&ba);
3073 free_regbyte_ctx_stack (rbc, NULL);
3074 return 0;
3075 }
3076
3077 free_regbyte_ctx_stack (rbc, NULL);
3078
3079 *prod = (byte *) mem_alloc (ba->len * sizeof (byte));
3080 if (*prod == NULL)
3081 {
3082 barray_destroy (&ba);
3083 return 0;
3084 }
3085
3086 mem_copy (*prod, ba->data, ba->len * sizeof (byte));
3087 *size = ba->len;
3088 barray_destroy (&ba);
3089 }
3090
3091 return 1;
3092 }
3093
3094 int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
3095 {
3096 return _grammar_check (id, text, prod, size, 0, 0);
3097 }
3098
3099 int grammar_fast_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3100 unsigned int estimate_prod_size)
3101 {
3102 return _grammar_check (id, text, prod, size, estimate_prod_size, 1);
3103 }
3104
3105 int grammar_destroy (grammar id)
3106 {
3107 dict **di = &g_dicts;
3108
3109 clear_last_error ();
3110
3111 while (*di != NULL)
3112 {
3113 if ((**di).m_id == id)
3114 {
3115 dict *tmp = *di;
3116 *di = (**di).next;
3117 dict_destroy (&tmp);
3118 return 1;
3119 }
3120
3121 di = &(**di).next;
3122 }
3123
3124 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3125 return 0;
3126 }
3127
3128 static void append_character (const char x, byte *text, int *dots_made, int *len, int size)
3129 {
3130 if (*dots_made == 0)
3131 {
3132 if (*len < size - 1)
3133 {
3134 text[(*len)++] = x;
3135 text[*len] = '\0';
3136 }
3137 else
3138 {
3139 int i;
3140 for (i = 0; i < 3; i++)
3141 if (--(*len) >= 0)
3142 text[*len] = '.';
3143 *dots_made = 1;
3144 }
3145 }
3146 }
3147
3148 void grammar_get_last_error (byte *text, unsigned int size, int *pos)
3149 {
3150 int len = 0, dots_made = 0;
3151 const byte *p = error_message;
3152
3153 *text = '\0';
3154
3155 if (p)
3156 {
3157 while (*p)
3158 {
3159 if (*p == '$')
3160 {
3161 const byte *r = error_param;
3162
3163 while (*r)
3164 {
3165 append_character (*r++, text, &dots_made, &len, (int) size);
3166 }
3167
3168 p++;
3169 }
3170 else
3171 {
3172 append_character (*p++, text, &dots_made, &len, size);
3173 }
3174 }
3175 }
3176
3177 *pos = error_position;
3178 }