remove stray tab
[mesa.git] / src / mesa / shader / grammar / grammar.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.6
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file grammar.c
27 * syntax parsing engine
28 * \author Michal Krol
29 */
30
31 #ifndef GRAMMAR_PORT_BUILD
32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
33 #endif
34
35 /*
36 */
37
38 /*
39 INTRODUCTION
40 ------------
41
42 The task is to check the syntax of an input string. Input string is a stream of ASCII
43 characters terminated with a null-character ('\0'). Checking it using C language is
44 difficult and hard to implement without bugs. It is hard to maintain and make changes when
45 the syntax changes.
46
47 This is because of a high redundancy of the C code. Large blocks of code are duplicated with
48 only small changes. Even use of macros does not solve the problem because macros cannot
49 erase the complexity of the problem.
50
51 The resolution is to create a new language that will be highly oriented to our task. Once
52 we describe a particular syntax, we are done. We can then focus on the code that implements
53 the language. The size and complexity of it is relatively small than the code that directly
54 checks the syntax.
55
56 First, we must implement our new language. Here, the language is implemented in C, but it
57 could also be implemented in any other language. The code is listed below. We must take
58 a good care that it is bug free. This is simple because the code is simple and clean.
59
60 Next, we must describe the syntax of our new language in itself. Once created and checked
61 manually that it is correct, we can use it to check another scripts.
62
63 Note that our new language loading code does not have to check the syntax. It is because we
64 assume that the script describing itself is correct, and other scripts can be syntactically
65 checked by the former script. The loading code must only do semantic checking which leads us to
66 simple resolving references.
67
68 THE LANGUAGE
69 ------------
70
71 Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
72 sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
73 which is an identifier, and its definition. A definition is in turn a sequence of specifiers
74 connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
75 definition. Specifier can be a symbol, string, character, character range or a special
76 keyword ".true" or ".false".
77
78 On the very beginning of the script there is a declaration of a root symbol and is in the form:
79 .syntax <root_symbol>;
80 The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
81 the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
82 the symbol evaluates to true. Definition evaluation depends on the operator used to connect
83 specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
84 only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
85 true if any of the specifiers evaluates to true. If definition contains only one specifier,
86 it is evaluated as if it was connected with ".true" keyword by ".and" operator.
87
88 If specifier is a ".true" keyword, it always evaluates to true.
89
90 If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
91 when it does not evaluate to true.
92
93 Character range specifier is in the form:
94 '<first_character>' - '<second_character>'
95 If specifier is a character range, it evaluates to true if character in the stream is greater
96 or equal to <first_character> and less or equal to <second_character>. In that situation
97 the stream pointer is advanced to point to next character in the stream. All C-style escape
98 sequences are supported although trigraph sequences are not. The comparisions are performed
99 on 8-bit unsigned integers.
100
101 Character specifier is in the form:
102 '<single_character>'
103 It evaluates to true if the following character range specifier evaluates to true:
104 '<single_character>' - '<single_character>'
105
106 String specifier is in the form:
107 "<string>"
108 Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
109 <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
110 the following character specifier evaluates to true:
111 '<string>[i]'
112 If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
113
114 Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
115 .loop <symbol> (1)
116 where <symbol> is defined as follows:
117 <symbol> <definition>; (2)
118 Construction (1) is replaced by the following code:
119 <symbol$1>
120 and declaration (2) is replaced by the following:
121 <symbol$1> <symbol$2> .or .true;
122 <symbol$2> <symbol> .and <symbol$1>;
123 <symbol> <definition>;
124
125 Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
126 registers that can be accessed in the syn body. Each reg has its name and a default value.
127 The register is one byte wide. The C code can change the default value by calling
128 grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
129 a sequence of specifiers joined with .and or .or operator. And now each specifier can be
130 prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
131 where <operator> can be == or !=. If the condition evaluates to false, the specifier
132 evaluates to .false. Otherwise it evalutes to the specifier.
133
134 ESCAPE SEQUENCES
135 ----------------
136
137 Synek supports all escape sequences in character specifiers. The mapping table is listed below.
138 All occurences of the characters in the first column are replaced with the corresponding
139 character in the second column.
140
141 Escape sequence Represents
142 ------------------------------------------------------------------------------------------------
143 \a Bell (alert)
144 \b Backspace
145 \f Formfeed
146 \n New line
147 \r Carriage return
148 \t Horizontal tab
149 \v Vertical tab
150 \' Single quotation mark
151 \" Double quotation mark
152 \\ Backslash
153 \? Literal question mark
154 \ooo ASCII character in octal notation
155 \xhhh ASCII character in hexadecimal notation
156 ------------------------------------------------------------------------------------------------
157
158 RAISING ERRORS
159 --------------
160
161 Any specifier can be followed by a special construction that is executed when the specifier
162 evaluates to false. The construction is in the form:
163 .error <ERROR_TEXT>
164 <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
165 in the form:
166 .errtext <ERROR_TEXT> "<error_desc>"
167 When specifier evaluates to false and this construction is present, parsing is stopped
168 immediately and <error_desc> is returned as a result of parsing. The error position is also
169 returned and it is meant as an offset from the beggining of the stream to the character that
170 was valid so far. Example:
171
172 (**** syntax script ****)
173
174 .syntax program;
175 .errtext MISSING_SEMICOLON "missing ';'"
176 program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
177 .loop space .and '\0';
178 declaration "declare" .and .loop space .and identifier;
179 space ' ';
180
181 (**** sample code ****)
182
183 declare foo ,
184
185 In the example above checking the sample code will result in error message "missing ';'" and
186 error position 12. The sample code is not correct. Note the presence of '\0' specifier to
187 assure that there is no code after semicolon - only spaces.
188 <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
189 the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
190 the identifier name. The starting position is the error position. The lenght of the resulting
191 string is the position after invoking the symbol.
192
193 PRODUCTION
194 ----------
195
196 Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
197 that evaluate to true. That is, every specifier and optional error construction can be followed
198 by a number of emit constructions that are in the form:
199 .emit <parameter>
200 <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
201 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
202 in the form:
203 .emtcode <identifier> <hex_number>
204
205 When given specifier evaluates to true, all emits associated with the specifier are output
206 in order they were declared. A star means that last-read character should be output instead
207 of constant value. Example:
208
209 (**** syntax script ****)
210
211 .syntax foobar;
212 .emtcode WORD_FOO 0x01
213 .emtcode WORD_BAR 0x02
214 foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
215 FOO "foo" .and SPACE;
216 BAR "bar" .and SPACE;
217 SPACE ' ' .or '\0';
218
219 (**** sample text 1 ****)
220
221 foo
222
223 (**** sample text 2 ****)
224
225 foobar
226
227 For both samples the result will be one-element array. For first sample text it will be
228 value 1, for second - 0. Note that every text will be accepted because of presence of
229 .true as an alternative.
230
231 Another example:
232
233 (**** syntax script ****)
234
235 .syntax declaration;
236 .emtcode VARIABLE 0x01
237 declaration "declare" .and .loop space .and
238 identifier .emit VARIABLE .and (1)
239 .true .emit 0x00 .and (2)
240 .loop space .and ';';
241 space ' ' .or '\t';
242 identifier .loop id_char .emit *; (3)
243 id_char 'a'-'z' .or 'A'-'Z' .or '_';
244
245 (**** sample code ****)
246
247 declare fubar;
248
249 In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
250 true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
251 to terminate the string with null to signal when the string ends. Specifier (3) outputs
252 all characters that make declared identifier. The result of sample code will be the
253 following array:
254 { 1, 'f', 'u', 'b', 'a', 'r', 0 }
255
256 If .emit is followed by dollar $, it means that current position should be output. Current
257 position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
258 first character consumed by the specifier associated with the .emit instruction. Current
259 position is stored in the output buffer in Little-Endian convention (the lowest byte comes
260 first).
261 */
262
263 static void mem_free (void **);
264
265 /*
266 internal error messages
267 */
268 static const byte *OUT_OF_MEMORY = (byte *) "internal error 1001: out of physical memory";
269 static const byte *UNRESOLVED_REFERENCE = (byte *) "internal error 1002: unresolved reference '$'";
270 static const byte *INVALID_GRAMMAR_ID = (byte *) "internal error 1003: invalid grammar object";
271 static const byte *INVALID_REGISTER_NAME = (byte *) "internal error 1004: invalid register name: '$'";
272 /*static const byte *DUPLICATE_IDENTIFIER = (byte *) "internal error 1005: identifier '$' already defined";*/
273 static const byte *UNREFERENCED_IDENTIFIER =(byte *) "internal error 1006: unreferenced identifier '$'";
274
275 static const byte *error_message = NULL; /* points to one of the error messages above */
276 static byte *error_param = NULL; /* this is inserted into error_message in place of $ */
277 static int error_position = -1;
278
279 static byte *unknown = (byte *) "???";
280
281 static void clear_last_error (void)
282 {
283 /* reset error message */
284 error_message = NULL;
285
286 /* free error parameter - if error_param is a "???" don't free it - it's static */
287 if (error_param != unknown)
288 mem_free ((void **) (void *) &error_param);
289 else
290 error_param = NULL;
291
292 /* reset error position */
293 error_position = -1;
294 }
295
296 static void set_last_error (const byte *msg, byte *param, int pos)
297 {
298 /* error message can be set only once */
299 if (error_message != NULL)
300 {
301 mem_free ((void **) (void *) &param);
302 return;
303 }
304
305 error_message = msg;
306
307 /* if param is NULL, set error_param to unknown ("???") */
308 /* note: do not try to strdup the "???" - it may be that we are here because of */
309 /* out of memory error so strdup can fail */
310 if (param != NULL)
311 error_param = param;
312 else
313 error_param = unknown;
314
315 error_position = pos;
316 }
317
318 /*
319 memory management routines
320 */
321 static void *mem_alloc (size_t size)
322 {
323 void *ptr = grammar_alloc_malloc (size);
324 if (ptr == NULL)
325 set_last_error (OUT_OF_MEMORY, NULL, -1);
326 return ptr;
327 }
328
329 static void *mem_copy (void *dst, const void *src, size_t size)
330 {
331 return grammar_memory_copy (dst, src, size);
332 }
333
334 static void mem_free (void **ptr)
335 {
336 grammar_alloc_free (*ptr);
337 *ptr = NULL;
338 }
339
340 static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
341 {
342 void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
343 if (ptr2 == NULL)
344 set_last_error (OUT_OF_MEMORY, NULL, -1);
345 return ptr2;
346 }
347
348 static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
349 {
350 return grammar_string_copy_n (dst, src, max_len);
351 }
352
353 static byte *str_duplicate (const byte *str)
354 {
355 byte *new_str = grammar_string_duplicate (str);
356 if (new_str == NULL)
357 set_last_error (OUT_OF_MEMORY, NULL, -1);
358 return new_str;
359 }
360
361 static int str_equal (const byte *str1, const byte *str2)
362 {
363 return grammar_string_compare (str1, str2) == 0;
364 }
365
366 static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
367 {
368 return grammar_string_compare_n (str1, str2, n) == 0;
369 }
370
371 static int
372 str_length (const byte *str)
373 {
374 return (int) (grammar_string_length (str));
375 }
376
377 /*
378 useful macros
379 */
380 #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
381 static void _Ty##_append (_Ty **x, _Ty *nx) {\
382 while (*x) x = &(**x).next;\
383 *x = nx;\
384 }
385
386 /*
387 string to byte map typedef
388 */
389 typedef struct map_byte_
390 {
391 byte *key;
392 byte data;
393 struct map_byte_ *next;
394 } map_byte;
395
396 static void map_byte_create (map_byte **ma)
397 {
398 *ma = (map_byte *) mem_alloc (sizeof (map_byte));
399 if (*ma)
400 {
401 (**ma).key = NULL;
402 (**ma).data = '\0';
403 (**ma).next = NULL;
404 }
405 }
406
407 static void map_byte_destroy (map_byte **ma)
408 {
409 if (*ma)
410 {
411 map_byte_destroy (&(**ma).next);
412 mem_free ((void **) &(**ma).key);
413 mem_free ((void **) ma);
414 }
415 }
416
417 GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte)
418
419 /*
420 searches the map for the specified key,
421 returns pointer to the element with the specified key if it exists
422 returns NULL otherwise
423 */
424 static map_byte *map_byte_locate (map_byte **ma, const byte *key)
425 {
426 while (*ma)
427 {
428 if (str_equal ((**ma).key, key))
429 return *ma;
430
431 ma = &(**ma).next;
432 }
433
434 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
435 return NULL;
436 }
437
438 /*
439 searches the map for specified key,
440 if the key is matched, *data is filled with data associated with the key,
441 returns 0 if the key is matched,
442 returns 1 otherwise
443 */
444 static int map_byte_find (map_byte **ma, const byte *key, byte *data)
445 {
446 map_byte *found = map_byte_locate (ma, key);
447 if (found != NULL)
448 {
449 *data = found->data;
450
451 return 0;
452 }
453
454 return 1;
455 }
456
457 /*
458 regbyte context typedef
459
460 Each regbyte consists of its name and a default value. These are static and created at
461 grammar script compile-time, for example the following line:
462 .regbyte vertex_blend 0x00
463 adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
464 When the script is executed, this regbyte can be accessed by name for read and write. When a
465 particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
466 stack. The new entry contains information abot which regbyte it references and its new value.
467 When a given regbyte is accessed for read, the stack is searched top-down to find an
468 entry that references the regbyte. The first matching entry is used to return the current
469 value it holds. If no entry is found, the default value is returned.
470 */
471 typedef struct regbyte_ctx_
472 {
473 map_byte *m_regbyte;
474 byte m_current_value;
475 struct regbyte_ctx_ *m_prev;
476 } regbyte_ctx;
477
478 static void regbyte_ctx_create (regbyte_ctx **re)
479 {
480 *re = (regbyte_ctx *) mem_alloc (sizeof (regbyte_ctx));
481 if (*re)
482 {
483 (**re).m_regbyte = NULL;
484 (**re).m_prev = NULL;
485 }
486 }
487
488 static void regbyte_ctx_destroy (regbyte_ctx **re)
489 {
490 if (*re)
491 {
492 mem_free ((void **) re);
493 }
494 }
495
496 static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
497 {
498 /* first lookup in the register stack */
499 while (*re != NULL)
500 {
501 if ((**re).m_regbyte == reg)
502 return (**re).m_current_value;
503
504 re = &(**re).m_prev;
505 }
506
507 /* if not found - return the default value */
508 return reg->data;
509 }
510
511 /*
512 emit type typedef
513 */
514 typedef enum emit_type_
515 {
516 et_byte, /* explicit number */
517 et_stream, /* eaten character */
518 et_position /* current position */
519 } emit_type;
520
521 /*
522 emit destination typedef
523 */
524 typedef enum emit_dest_
525 {
526 ed_output, /* write to the output buffer */
527 ed_regbyte /* write a particular regbyte */
528 } emit_dest;
529
530 /*
531 emit typedef
532 */
533 typedef struct emit_
534 {
535 emit_dest m_emit_dest;
536 emit_type m_emit_type; /* ed_output */
537 byte m_byte; /* et_byte */
538 map_byte *m_regbyte; /* ed_regbyte */
539 byte *m_regname; /* ed_regbyte - temporary */
540 struct emit_ *m_next;
541 } emit;
542
543 static void emit_create (emit **em)
544 {
545 *em = (emit *) mem_alloc (sizeof (emit));
546 if (*em)
547 {
548 (**em).m_emit_dest = ed_output;
549 (**em).m_emit_type = et_byte;
550 (**em).m_byte = '\0';
551 (**em).m_regbyte = NULL;
552 (**em).m_regname = NULL;
553 (**em).m_next = NULL;
554 }
555 }
556
557 static void emit_destroy (emit **em)
558 {
559 if (*em)
560 {
561 emit_destroy (&(**em).m_next);
562 mem_free ((void **) &(**em).m_regname);
563 mem_free ((void **) em);
564 }
565 }
566
567 static unsigned int emit_size (emit *_E)
568 {
569 unsigned int n = 0;
570
571 while (_E != NULL)
572 {
573 if (_E->m_emit_dest == ed_output)
574 {
575 if (_E->m_emit_type == et_position)
576 n += 4; /* position is a 32-bit unsigned integer */
577 else
578 n++;
579 }
580 _E = _E->m_next;
581 }
582
583 return n;
584 }
585
586 static int emit_push (emit *_E, byte *_P, byte c, unsigned int _Pos, regbyte_ctx **_Ctx)
587 {
588 while (_E != NULL)
589 {
590 if (_E->m_emit_dest == ed_output)
591 {
592 if (_E->m_emit_type == et_byte)
593 *_P++ = _E->m_byte;
594 else if (_E->m_emit_type == et_stream)
595 *_P++ = c;
596 else /* _Em->type == et_position */
597 {
598 *_P++ = (byte) (_Pos);
599 *_P++ = (byte) (_Pos >> 8);
600 *_P++ = (byte) (_Pos >> 16);
601 *_P++ = (byte) (_Pos >> 24);
602 }
603 }
604 else
605 {
606 regbyte_ctx *new_rbc;
607 regbyte_ctx_create (&new_rbc);
608 if (new_rbc == NULL)
609 return 1;
610
611 new_rbc->m_prev = *_Ctx;
612 new_rbc->m_regbyte = _E->m_regbyte;
613 *_Ctx = new_rbc;
614
615 if (_E->m_emit_type == et_byte)
616 new_rbc->m_current_value = _E->m_byte;
617 else if (_E->m_emit_type == et_stream)
618 new_rbc->m_current_value = c;
619 }
620
621 _E = _E->m_next;
622 }
623
624 return 0;
625 }
626
627 /*
628 error typedef
629 */
630 typedef struct error_
631 {
632 byte *m_text;
633 byte *m_token_name;
634 struct rule_ *m_token;
635 } error;
636
637 static void error_create (error **er)
638 {
639 *er = (error *) mem_alloc (sizeof (error));
640 if (*er)
641 {
642 (**er).m_text = NULL;
643 (**er).m_token_name = NULL;
644 (**er).m_token = NULL;
645 }
646 }
647
648 static void error_destroy (error **er)
649 {
650 if (*er)
651 {
652 mem_free ((void **) &(**er).m_text);
653 mem_free ((void **) &(**er).m_token_name);
654 mem_free ((void **) er);
655 }
656 }
657
658 struct dict_;
659
660 static byte *
661 error_get_token (error *, struct dict_ *, const byte *, int);
662
663 /*
664 condition operand type typedef
665 */
666 typedef enum cond_oper_type_
667 {
668 cot_byte, /* constant 8-bit unsigned integer */
669 cot_regbyte /* pointer to byte register containing the current value */
670 } cond_oper_type;
671
672 /*
673 condition operand typedef
674 */
675 typedef struct cond_oper_
676 {
677 cond_oper_type m_type;
678 byte m_byte; /* cot_byte */
679 map_byte *m_regbyte; /* cot_regbyte */
680 byte *m_regname; /* cot_regbyte - temporary */
681 } cond_oper;
682
683 /*
684 condition type typedef
685 */
686 typedef enum cond_type_
687 {
688 ct_equal,
689 ct_not_equal
690 } cond_type;
691
692 /*
693 condition typedef
694 */
695 typedef struct cond_
696 {
697 cond_type m_type;
698 cond_oper m_operands[2];
699 } cond;
700
701 static void cond_create (cond **co)
702 {
703 *co = (cond *) mem_alloc (sizeof (cond));
704 if (*co)
705 {
706 (**co).m_operands[0].m_regname = NULL;
707 (**co).m_operands[1].m_regname = NULL;
708 }
709 }
710
711 static void cond_destroy (cond **co)
712 {
713 if (*co)
714 {
715 mem_free ((void **) &(**co).m_operands[0].m_regname);
716 mem_free ((void **) &(**co).m_operands[1].m_regname);
717 mem_free ((void **) co);
718 }
719 }
720
721 /*
722 specifier type typedef
723 */
724 typedef enum spec_type_
725 {
726 st_false,
727 st_true,
728 st_byte,
729 st_byte_range,
730 st_string,
731 st_identifier,
732 st_identifier_loop,
733 st_debug
734 } spec_type;
735
736 /*
737 specifier typedef
738 */
739 typedef struct spec_
740 {
741 spec_type m_spec_type;
742 byte m_byte[2]; /* st_byte, st_byte_range */
743 byte *m_string; /* st_string */
744 struct rule_ *m_rule; /* st_identifier, st_identifier_loop */
745 emit *m_emits;
746 error *m_errtext;
747 cond *m_cond;
748 struct spec_ *next;
749 } spec;
750
751 static void spec_create (spec **sp)
752 {
753 *sp = (spec *) mem_alloc (sizeof (spec));
754 if (*sp)
755 {
756 (**sp).m_spec_type = st_false;
757 (**sp).m_byte[0] = '\0';
758 (**sp).m_byte[1] = '\0';
759 (**sp).m_string = NULL;
760 (**sp).m_rule = NULL;
761 (**sp).m_emits = NULL;
762 (**sp).m_errtext = NULL;
763 (**sp).m_cond = NULL;
764 (**sp).next = NULL;
765 }
766 }
767
768 static void spec_destroy (spec **sp)
769 {
770 if (*sp)
771 {
772 spec_destroy (&(**sp).next);
773 emit_destroy (&(**sp).m_emits);
774 error_destroy (&(**sp).m_errtext);
775 mem_free ((void **) &(**sp).m_string);
776 cond_destroy (&(**sp).m_cond);
777 mem_free ((void **) sp);
778 }
779 }
780
781 GRAMMAR_IMPLEMENT_LIST_APPEND(spec)
782
783 /*
784 operator typedef
785 */
786 typedef enum oper_
787 {
788 op_none,
789 op_and,
790 op_or
791 } oper;
792
793 /*
794 rule typedef
795 */
796 typedef struct rule_
797 {
798 oper m_oper;
799 spec *m_specs;
800 struct rule_ *next;
801 int m_referenced;
802 } rule;
803
804 static void rule_create (rule **ru)
805 {
806 *ru = (rule *) mem_alloc (sizeof (rule));
807 if (*ru)
808 {
809 (**ru).m_oper = op_none;
810 (**ru).m_specs = NULL;
811 (**ru).next = NULL;
812 (**ru).m_referenced = 0;
813 }
814 }
815
816 static void rule_destroy (rule **ru)
817 {
818 if (*ru)
819 {
820 rule_destroy (&(**ru).next);
821 spec_destroy (&(**ru).m_specs);
822 mem_free ((void **) ru);
823 }
824 }
825
826 GRAMMAR_IMPLEMENT_LIST_APPEND(rule)
827
828 /*
829 returns unique grammar id
830 */
831 static grammar next_valid_grammar_id (void)
832 {
833 static grammar id = 0;
834
835 return ++id;
836 }
837
838 /*
839 dictionary typedef
840 */
841 typedef struct dict_
842 {
843 rule *m_rulez;
844 rule *m_syntax;
845 rule *m_string;
846 map_byte *m_regbytes;
847 grammar m_id;
848 struct dict_ *next;
849 } dict;
850
851 static void dict_create (dict **di)
852 {
853 *di = (dict *) mem_alloc (sizeof (dict));
854 if (*di)
855 {
856 (**di).m_rulez = NULL;
857 (**di).m_syntax = NULL;
858 (**di).m_string = NULL;
859 (**di).m_regbytes = NULL;
860 (**di).m_id = next_valid_grammar_id ();
861 (**di).next = NULL;
862 }
863 }
864
865 static void dict_destroy (dict **di)
866 {
867 if (*di)
868 {
869 rule_destroy (&(**di).m_rulez);
870 map_byte_destroy (&(**di).m_regbytes);
871 mem_free ((void **) di);
872 }
873 }
874
875 GRAMMAR_IMPLEMENT_LIST_APPEND(dict)
876
877 static void dict_find (dict **di, grammar key, dict **data)
878 {
879 while (*di)
880 {
881 if ((**di).m_id == key)
882 {
883 *data = *di;
884 return;
885 }
886
887 di = &(**di).next;
888 }
889
890 *data = NULL;
891 }
892
893 static dict *g_dicts = NULL;
894
895 /*
896 byte array typedef
897 */
898 typedef struct barray_
899 {
900 byte *data;
901 unsigned int len;
902 } barray;
903
904 static void barray_create (barray **ba)
905 {
906 *ba = (barray *) mem_alloc (sizeof (barray));
907 if (*ba)
908 {
909 (**ba).data = NULL;
910 (**ba).len = 0;
911 }
912 }
913
914 static void barray_destroy (barray **ba)
915 {
916 if (*ba)
917 {
918 mem_free ((void **) &(**ba).data);
919 mem_free ((void **) ba);
920 }
921 }
922
923 /*
924 reallocates byte array to requested size,
925 returns 0 on success,
926 returns 1 otherwise
927 */
928 static int barray_resize (barray **ba, unsigned int nlen)
929 {
930 byte *new_pointer;
931
932 if (nlen == 0)
933 {
934 mem_free ((void **) &(**ba).data);
935 (**ba).data = NULL;
936 (**ba).len = 0;
937
938 return 0;
939 }
940 else
941 {
942 new_pointer = (byte *) mem_realloc ((**ba).data, (**ba).len * sizeof (byte),
943 nlen * sizeof (byte));
944 if (new_pointer)
945 {
946 (**ba).data = new_pointer;
947 (**ba).len = nlen;
948
949 return 0;
950 }
951 }
952
953 return 1;
954 }
955
956 /*
957 adds byte array pointed by *nb to the end of array pointed by *ba,
958 returns 0 on success,
959 returns 1 otherwise
960 */
961 static int barray_append (barray **ba, barray **nb)
962 {
963 const unsigned int len = (**ba).len;
964
965 if (barray_resize (ba, (**ba).len + (**nb).len))
966 return 1;
967
968 mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
969
970 return 0;
971 }
972
973 /*
974 adds emit chain pointed by em to the end of array pointed by *ba,
975 returns 0 on success,
976 returns 1 otherwise
977 */
978 static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
979 {
980 unsigned int count = emit_size (em);
981
982 if (barray_resize (ba, (**ba).len + count))
983 return 1;
984
985 return emit_push (em, (**ba).data + ((**ba).len - count), c, pos, rbc);
986 }
987
988 /*
989 byte pool typedef
990 */
991 typedef struct bytepool_
992 {
993 byte *_F;
994 unsigned int _Siz;
995 } bytepool;
996
997 static void bytepool_destroy (bytepool **by)
998 {
999 if (*by != NULL)
1000 {
1001 mem_free ((void **) &(**by)._F);
1002 mem_free ((void **) by);
1003 }
1004 }
1005
1006 static void bytepool_create (bytepool **by, int len)
1007 {
1008 *by = (bytepool *) (mem_alloc (sizeof (bytepool)));
1009 if (*by != NULL)
1010 {
1011 (**by)._F = (byte *) (mem_alloc (sizeof (byte) * len));
1012 (**by)._Siz = len;
1013
1014 if ((**by)._F == NULL)
1015 bytepool_destroy (by);
1016 }
1017 }
1018
1019 static int bytepool_reserve (bytepool *by, unsigned int n)
1020 {
1021 byte *_P;
1022
1023 if (n <= by->_Siz)
1024 return 0;
1025
1026 /* byte pool can only grow and at least by doubling its size */
1027 n = n >= by->_Siz * 2 ? n : by->_Siz * 2;
1028
1029 /* reallocate the memory and adjust pointers to the new memory location */
1030 _P = (byte *) (mem_realloc (by->_F, sizeof (byte) * by->_Siz, sizeof (byte) * n));
1031 if (_P != NULL)
1032 {
1033 by->_F = _P;
1034 by->_Siz = n;
1035 return 0;
1036 }
1037
1038 return 1;
1039 }
1040
1041 /*
1042 string to string map typedef
1043 */
1044 typedef struct map_str_
1045 {
1046 byte *key;
1047 byte *data;
1048 struct map_str_ *next;
1049 } map_str;
1050
1051 static void map_str_create (map_str **ma)
1052 {
1053 *ma = (map_str *) mem_alloc (sizeof (map_str));
1054 if (*ma)
1055 {
1056 (**ma).key = NULL;
1057 (**ma).data = NULL;
1058 (**ma).next = NULL;
1059 }
1060 }
1061
1062 static void map_str_destroy (map_str **ma)
1063 {
1064 if (*ma)
1065 {
1066 map_str_destroy (&(**ma).next);
1067 mem_free ((void **) &(**ma).key);
1068 mem_free ((void **) &(**ma).data);
1069 mem_free ((void **) ma);
1070 }
1071 }
1072
1073 GRAMMAR_IMPLEMENT_LIST_APPEND(map_str)
1074
1075 /*
1076 searches the map for specified key,
1077 if the key is matched, *data is filled with data associated with the key,
1078 returns 0 if the key is matched,
1079 returns 1 otherwise
1080 */
1081 static int map_str_find (map_str **ma, const byte *key, byte **data)
1082 {
1083 while (*ma)
1084 {
1085 if (str_equal ((**ma).key, key))
1086 {
1087 *data = str_duplicate ((**ma).data);
1088 if (*data == NULL)
1089 return 1;
1090
1091 return 0;
1092 }
1093
1094 ma = &(**ma).next;
1095 }
1096
1097 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1098 return 1;
1099 }
1100
1101 /*
1102 string to rule map typedef
1103 */
1104 typedef struct map_rule_
1105 {
1106 byte *key;
1107 rule *data;
1108 struct map_rule_ *next;
1109 } map_rule;
1110
1111 static void map_rule_create (map_rule **ma)
1112 {
1113 *ma = (map_rule *) mem_alloc (sizeof (map_rule));
1114 if (*ma)
1115 {
1116 (**ma).key = NULL;
1117 (**ma).data = NULL;
1118 (**ma).next = NULL;
1119 }
1120 }
1121
1122 static void map_rule_destroy (map_rule **ma)
1123 {
1124 if (*ma)
1125 {
1126 map_rule_destroy (&(**ma).next);
1127 mem_free ((void **) &(**ma).key);
1128 mem_free ((void **) ma);
1129 }
1130 }
1131
1132 GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule)
1133
1134 /*
1135 searches the map for specified key,
1136 if the key is matched, *data is filled with data associated with the key,
1137 returns 0 if the is matched,
1138 returns 1 otherwise
1139 */
1140 static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1141 {
1142 while (*ma)
1143 {
1144 if (str_equal ((**ma).key, key))
1145 {
1146 *data = (**ma).data;
1147
1148 return 0;
1149 }
1150
1151 ma = &(**ma).next;
1152 }
1153
1154 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1155 return 1;
1156 }
1157
1158 /*
1159 returns 1 if given character is a white space,
1160 returns 0 otherwise
1161 */
1162 static int is_space (byte c)
1163 {
1164 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1165 }
1166
1167 /*
1168 advances text pointer by 1 if character pointed by *text is a space,
1169 returns 1 if a space has been eaten,
1170 returns 0 otherwise
1171 */
1172 static int eat_space (const byte **text)
1173 {
1174 if (is_space (**text))
1175 {
1176 (*text)++;
1177
1178 return 1;
1179 }
1180
1181 return 0;
1182 }
1183
1184 /*
1185 returns 1 if text points to C-style comment start string,
1186 returns 0 otherwise
1187 */
1188 static int is_comment_start (const byte *text)
1189 {
1190 return text[0] == '/' && text[1] == '*';
1191 }
1192
1193 /*
1194 advances text pointer to first character after C-style comment block - if any,
1195 returns 1 if C-style comment block has been encountered and eaten,
1196 returns 0 otherwise
1197 */
1198 static int eat_comment (const byte **text)
1199 {
1200 if (is_comment_start (*text))
1201 {
1202 /* *text points to comment block - skip two characters to enter comment body */
1203 *text += 2;
1204 /* skip any character except consecutive '*' and '/' */
1205 while (!((*text)[0] == '*' && (*text)[1] == '/'))
1206 (*text)++;
1207 /* skip those two terminating characters */
1208 *text += 2;
1209
1210 return 1;
1211 }
1212
1213 return 0;
1214 }
1215
1216 /*
1217 advances text pointer to first character that is neither space nor C-style comment block
1218 */
1219 static void eat_spaces (const byte **text)
1220 {
1221 while (eat_space (text) || eat_comment (text))
1222 ;
1223 }
1224
1225 /*
1226 resizes string pointed by *ptr to successfully add character c to the end of the string,
1227 returns 0 on success,
1228 returns 1 otherwise
1229 */
1230 static int string_grow (byte **ptr, unsigned int *len, byte c)
1231 {
1232 /* reallocate the string in 16-byte increments */
1233 if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1234 {
1235 byte *tmp = (byte *) mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
1236 ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1237 if (tmp == NULL)
1238 return 1;
1239
1240 *ptr = tmp;
1241 }
1242
1243 if (c)
1244 {
1245 /* append given character */
1246 (*ptr)[*len] = c;
1247 (*len)++;
1248 }
1249 (*ptr)[*len] = '\0';
1250
1251 return 0;
1252 }
1253
1254 /*
1255 returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1256 returns 0 otherwise
1257 */
1258 static int is_identifier (byte c)
1259 {
1260 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1261 }
1262
1263 /*
1264 copies characters from *text to *id until non-identifier character is encountered,
1265 assumes that *id points to NULL object - caller is responsible for later freeing the string,
1266 text pointer is advanced to point past the copied identifier,
1267 returns 0 if identifier was successfully copied,
1268 returns 1 otherwise
1269 */
1270 static int get_identifier (const byte **text, byte **id)
1271 {
1272 const byte *t = *text;
1273 byte *p = NULL;
1274 unsigned int len = 0;
1275
1276 if (string_grow (&p, &len, '\0'))
1277 return 1;
1278
1279 /* loop while next character in buffer is valid for identifiers */
1280 while (is_identifier (*t))
1281 {
1282 if (string_grow (&p, &len, *t++))
1283 {
1284 mem_free ((void **) (void *) &p);
1285 return 1;
1286 }
1287 }
1288
1289 *text = t;
1290 *id = p;
1291
1292 return 0;
1293 }
1294
1295 /*
1296 converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1297 advances text pointer past the converted sequence,
1298 returns the converted value
1299 */
1300 static unsigned int dec_convert (const byte **text)
1301 {
1302 unsigned int value = 0;
1303
1304 while (**text >= '0' && **text <= '9')
1305 {
1306 value = value * 10 + **text - '0';
1307 (*text)++;
1308 }
1309
1310 return value;
1311 }
1312
1313 /*
1314 returns 1 if given character is HEX digit 0-9, A-F or a-f,
1315 returns 0 otherwise
1316 */
1317 static int is_hex (byte c)
1318 {
1319 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1320 }
1321
1322 /*
1323 returns value of passed character as if it was HEX digit
1324 */
1325 static unsigned int hex2dec (byte c)
1326 {
1327 if (c >= '0' && c <= '9')
1328 return c - '0';
1329 if (c >= 'A' && c <= 'F')
1330 return c - 'A' + 10;
1331 return c - 'a' + 10;
1332 }
1333
1334 /*
1335 converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1336 advances text pointer past the converted sequence,
1337 returns the converted value
1338 */
1339 static unsigned int hex_convert (const byte **text)
1340 {
1341 unsigned int value = 0;
1342
1343 while (is_hex (**text))
1344 {
1345 value = value * 0x10 + hex2dec (**text);
1346 (*text)++;
1347 }
1348
1349 return value;
1350 }
1351
1352 /*
1353 returns 1 if given character is OCT digit 0-7,
1354 returns 0 otherwise
1355 */
1356 static int is_oct (byte c)
1357 {
1358 return c >= '0' && c <= '7';
1359 }
1360
1361 /*
1362 returns value of passed character as if it was OCT digit
1363 */
1364 static int oct2dec (byte c)
1365 {
1366 return c - '0';
1367 }
1368
1369 static byte get_escape_sequence (const byte **text)
1370 {
1371 int value = 0;
1372
1373 /* skip '\' character */
1374 (*text)++;
1375
1376 switch (*(*text)++)
1377 {
1378 case '\'':
1379 return '\'';
1380 case '"':
1381 return '\"';
1382 case '?':
1383 return '\?';
1384 case '\\':
1385 return '\\';
1386 case 'a':
1387 return '\a';
1388 case 'b':
1389 return '\b';
1390 case 'f':
1391 return '\f';
1392 case 'n':
1393 return '\n';
1394 case 'r':
1395 return '\r';
1396 case 't':
1397 return '\t';
1398 case 'v':
1399 return '\v';
1400 case 'x':
1401 return (byte) hex_convert (text);
1402 }
1403
1404 (*text)--;
1405 if (is_oct (**text))
1406 {
1407 value = oct2dec (*(*text)++);
1408 if (is_oct (**text))
1409 {
1410 value = value * 010 + oct2dec (*(*text)++);
1411 if (is_oct (**text))
1412 value = value * 010 + oct2dec (*(*text)++);
1413 }
1414 }
1415
1416 return (byte) value;
1417 }
1418
1419 /*
1420 copies characters from *text to *str until " or ' character is encountered,
1421 assumes that *str points to NULL object - caller is responsible for later freeing the string,
1422 assumes that *text points to " or ' character that starts the string,
1423 text pointer is advanced to point past the " or ' character,
1424 returns 0 if string was successfully copied,
1425 returns 1 otherwise
1426 */
1427 static int get_string (const byte **text, byte **str)
1428 {
1429 const byte *t = *text;
1430 byte *p = NULL;
1431 unsigned int len = 0;
1432 byte term_char;
1433
1434 if (string_grow (&p, &len, '\0'))
1435 return 1;
1436
1437 /* read " or ' character that starts the string */
1438 term_char = *t++;
1439 /* while next character is not the terminating character */
1440 while (*t && *t != term_char)
1441 {
1442 byte c;
1443
1444 if (*t == '\\')
1445 c = get_escape_sequence (&t);
1446 else
1447 c = *t++;
1448
1449 if (string_grow (&p, &len, c))
1450 {
1451 mem_free ((void **) (void *) &p);
1452 return 1;
1453 }
1454 }
1455 /* skip " or ' character that ends the string */
1456 t++;
1457
1458 *text = t;
1459 *str = p;
1460 return 0;
1461 }
1462
1463 /*
1464 gets emit code, the syntax is:
1465 ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1466 assumes that *text already points to <symbol>,
1467 returns 0 if emit code is successfully read,
1468 returns 1 otherwise
1469 */
1470 static int get_emtcode (const byte **text, map_byte **ma)
1471 {
1472 const byte *t = *text;
1473 map_byte *m = NULL;
1474
1475 map_byte_create (&m);
1476 if (m == NULL)
1477 return 1;
1478
1479 if (get_identifier (&t, &m->key))
1480 {
1481 map_byte_destroy (&m);
1482 return 1;
1483 }
1484 eat_spaces (&t);
1485
1486 if (*t == '\'')
1487 {
1488 byte *c;
1489
1490 if (get_string (&t, &c))
1491 {
1492 map_byte_destroy (&m);
1493 return 1;
1494 }
1495
1496 m->data = (byte) c[0];
1497 mem_free ((void **) (void *) &c);
1498 }
1499 else if (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))
1500 {
1501 /* skip HEX "0x" or "0X" prefix */
1502 t += 2;
1503 m->data = (byte) hex_convert (&t);
1504 }
1505 else
1506 {
1507 m->data = (byte) dec_convert (&t);
1508 }
1509
1510 eat_spaces (&t);
1511
1512 *text = t;
1513 *ma = m;
1514 return 0;
1515 }
1516
1517 /*
1518 gets regbyte declaration, the syntax is:
1519 ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1520 assumes that *text already points to <symbol>,
1521 returns 0 if regbyte is successfully read,
1522 returns 1 otherwise
1523 */
1524 static int get_regbyte (const byte **text, map_byte **ma)
1525 {
1526 /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
1527 return get_emtcode (text, ma);
1528 }
1529
1530 /*
1531 returns 0 on success,
1532 returns 1 otherwise
1533 */
1534 static int get_errtext (const byte **text, map_str **ma)
1535 {
1536 const byte *t = *text;
1537 map_str *m = NULL;
1538
1539 map_str_create (&m);
1540 if (m == NULL)
1541 return 1;
1542
1543 if (get_identifier (&t, &m->key))
1544 {
1545 map_str_destroy (&m);
1546 return 1;
1547 }
1548 eat_spaces (&t);
1549
1550 if (get_string (&t, &m->data))
1551 {
1552 map_str_destroy (&m);
1553 return 1;
1554 }
1555 eat_spaces (&t);
1556
1557 *text = t;
1558 *ma = m;
1559 return 0;
1560 }
1561
1562 /*
1563 returns 0 on success,
1564 returns 1 otherwise,
1565 */
1566 static int get_error (const byte **text, error **er, map_str *maps)
1567 {
1568 const byte *t = *text;
1569 byte *temp = NULL;
1570
1571 if (*t != '.')
1572 return 0;
1573
1574 t++;
1575 if (get_identifier (&t, &temp))
1576 return 1;
1577 eat_spaces (&t);
1578
1579 if (!str_equal ((byte *) "error", temp))
1580 {
1581 mem_free ((void **) (void *) &temp);
1582 return 0;
1583 }
1584
1585 mem_free ((void **) (void *) &temp);
1586
1587 error_create (er);
1588 if (*er == NULL)
1589 return 1;
1590
1591 if (*t == '\"')
1592 {
1593 if (get_string (&t, &(**er).m_text))
1594 {
1595 error_destroy (er);
1596 return 1;
1597 }
1598 eat_spaces (&t);
1599 }
1600 else
1601 {
1602 if (get_identifier (&t, &temp))
1603 {
1604 error_destroy (er);
1605 return 1;
1606 }
1607 eat_spaces (&t);
1608
1609 if (map_str_find (&maps, temp, &(**er).m_text))
1610 {
1611 mem_free ((void **) (void *) &temp);
1612 error_destroy (er);
1613 return 1;
1614 }
1615
1616 mem_free ((void **) (void *) &temp);
1617 }
1618
1619 /* try to extract "token" from "...$token$..." */
1620 {
1621 byte *processed = NULL;
1622 unsigned int len = 0;
1623 int i = 0;
1624
1625 if (string_grow (&processed, &len, '\0'))
1626 {
1627 error_destroy (er);
1628 return 1;
1629 }
1630
1631 while (i < str_length ((**er).m_text))
1632 {
1633 /* check if the dollar sign is repeated - if so skip it */
1634 if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1635 {
1636 if (string_grow (&processed, &len, '$'))
1637 {
1638 mem_free ((void **) (void *) &processed);
1639 error_destroy (er);
1640 return 1;
1641 }
1642
1643 i += 2;
1644 }
1645 else if ((**er).m_text[i] != '$')
1646 {
1647 if (string_grow (&processed, &len, (**er).m_text[i]))
1648 {
1649 mem_free ((void **) (void *) &processed);
1650 error_destroy (er);
1651 return 1;
1652 }
1653
1654 i++;
1655 }
1656 else
1657 {
1658 if (string_grow (&processed, &len, '$'))
1659 {
1660 mem_free ((void **) (void *) &processed);
1661 error_destroy (er);
1662 return 1;
1663 }
1664
1665 {
1666 /* length of token being extracted */
1667 unsigned int tlen = 0;
1668
1669 if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1670 {
1671 mem_free ((void **) (void *) &processed);
1672 error_destroy (er);
1673 return 1;
1674 }
1675
1676 /* skip the dollar sign */
1677 i++;
1678
1679 while ((**er).m_text[i] != '$')
1680 {
1681 if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1682 {
1683 mem_free ((void **) (void *) &processed);
1684 error_destroy (er);
1685 return 1;
1686 }
1687
1688 i++;
1689 }
1690
1691 /* skip the dollar sign */
1692 i++;
1693 }
1694 }
1695 }
1696
1697 mem_free ((void **) &(**er).m_text);
1698 (**er).m_text = processed;
1699 }
1700
1701 *text = t;
1702 return 0;
1703 }
1704
1705 /*
1706 returns 0 on success,
1707 returns 1 otherwise,
1708 */
1709 static int get_emits (const byte **text, emit **em, map_byte *mapb)
1710 {
1711 const byte *t = *text;
1712 byte *temp = NULL;
1713 emit *e = NULL;
1714 emit_dest dest;
1715
1716 if (*t != '.')
1717 return 0;
1718
1719 t++;
1720 if (get_identifier (&t, &temp))
1721 return 1;
1722 eat_spaces (&t);
1723
1724 /* .emit */
1725 if (str_equal ((byte *) "emit", temp))
1726 dest = ed_output;
1727 /* .load */
1728 else if (str_equal ((byte *) "load", temp))
1729 dest = ed_regbyte;
1730 else
1731 {
1732 mem_free ((void **) (void *) &temp);
1733 return 0;
1734 }
1735
1736 mem_free ((void **) (void *) &temp);
1737
1738 emit_create (&e);
1739 if (e == NULL)
1740 return 1;
1741
1742 e->m_emit_dest = dest;
1743
1744 if (dest == ed_regbyte)
1745 {
1746 if (get_identifier (&t, &e->m_regname))
1747 {
1748 emit_destroy (&e);
1749 return 1;
1750 }
1751 eat_spaces (&t);
1752 }
1753
1754 /* 0xNN */
1755 if (*t == '0' && (t[1] == 'x' || t[1] == 'X'))
1756 {
1757 t += 2;
1758 e->m_byte = (byte) hex_convert (&t);
1759
1760 e->m_emit_type = et_byte;
1761 }
1762 /* NNN */
1763 else if (*t >= '0' && *t <= '9')
1764 {
1765 e->m_byte = (byte) dec_convert (&t);
1766
1767 e->m_emit_type = et_byte;
1768 }
1769 /* * */
1770 else if (*t == '*')
1771 {
1772 t++;
1773
1774 e->m_emit_type = et_stream;
1775 }
1776 /* $ */
1777 else if (*t == '$')
1778 {
1779 t++;
1780
1781 e->m_emit_type = et_position;
1782 }
1783 /* 'c' */
1784 else if (*t == '\'')
1785 {
1786 if (get_string (&t, &temp))
1787 {
1788 emit_destroy (&e);
1789 return 1;
1790 }
1791 e->m_byte = (byte) temp[0];
1792
1793 mem_free ((void **) (void *) &temp);
1794
1795 e->m_emit_type = et_byte;
1796 }
1797 else
1798 {
1799 if (get_identifier (&t, &temp))
1800 {
1801 emit_destroy (&e);
1802 return 1;
1803 }
1804
1805 if (map_byte_find (&mapb, temp, &e->m_byte))
1806 {
1807 mem_free ((void **) (void *) &temp);
1808 emit_destroy (&e);
1809 return 1;
1810 }
1811
1812 mem_free ((void **) (void *) &temp);
1813
1814 e->m_emit_type = et_byte;
1815 }
1816
1817 eat_spaces (&t);
1818
1819 if (get_emits (&t, &e->m_next, mapb))
1820 {
1821 emit_destroy (&e);
1822 return 1;
1823 }
1824
1825 *text = t;
1826 *em = e;
1827 return 0;
1828 }
1829
1830 /*
1831 returns 0 on success,
1832 returns 1 otherwise,
1833 */
1834 static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1835 {
1836 const byte *t = *text;
1837 spec *s = NULL;
1838
1839 spec_create (&s);
1840 if (s == NULL)
1841 return 1;
1842
1843 /* first - read optional .if statement */
1844 if (*t == '.')
1845 {
1846 const byte *u = t;
1847 byte *keyword = NULL;
1848
1849 /* skip the dot */
1850 u++;
1851
1852 if (get_identifier (&u, &keyword))
1853 {
1854 spec_destroy (&s);
1855 return 1;
1856 }
1857
1858 /* .if */
1859 if (str_equal ((byte *) "if", keyword))
1860 {
1861 cond_create (&s->m_cond);
1862 if (s->m_cond == NULL)
1863 {
1864 spec_destroy (&s);
1865 return 1;
1866 }
1867
1868 /* skip the left paren */
1869 eat_spaces (&u);
1870 u++;
1871
1872 /* get the left operand */
1873 eat_spaces (&u);
1874 if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1875 {
1876 spec_destroy (&s);
1877 return 1;
1878 }
1879 s->m_cond->m_operands[0].m_type = cot_regbyte;
1880
1881 /* get the operator (!= or ==) */
1882 eat_spaces (&u);
1883 if (*u == '!')
1884 s->m_cond->m_type = ct_not_equal;
1885 else
1886 s->m_cond->m_type = ct_equal;
1887 u += 2;
1888 eat_spaces (&u);
1889
1890 if (u[0] == '0' && (u[1] == 'x' || u[1] == 'X'))
1891 {
1892 /* skip the 0x prefix */
1893 u += 2;
1894
1895 /* get the right operand */
1896 s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1897 s->m_cond->m_operands[1].m_type = cot_byte;
1898 }
1899 else /*if (*u >= '0' && *u <= '9')*/
1900 {
1901 /* get the right operand */
1902 s->m_cond->m_operands[1].m_byte = dec_convert (&u);
1903 s->m_cond->m_operands[1].m_type = cot_byte;
1904 }
1905
1906 /* skip the right paren */
1907 eat_spaces (&u);
1908 u++;
1909
1910 eat_spaces (&u);
1911
1912 t = u;
1913 }
1914
1915 mem_free ((void **) (void *) &keyword);
1916 }
1917
1918 if (*t == '\'')
1919 {
1920 byte *temp = NULL;
1921
1922 if (get_string (&t, &temp))
1923 {
1924 spec_destroy (&s);
1925 return 1;
1926 }
1927 eat_spaces (&t);
1928
1929 if (*t == '-')
1930 {
1931 byte *temp2 = NULL;
1932
1933 /* skip the '-' character */
1934 t++;
1935 eat_spaces (&t);
1936
1937 if (get_string (&t, &temp2))
1938 {
1939 mem_free ((void **) (void *) &temp);
1940 spec_destroy (&s);
1941 return 1;
1942 }
1943 eat_spaces (&t);
1944
1945 s->m_spec_type = st_byte_range;
1946 s->m_byte[0] = *temp;
1947 s->m_byte[1] = *temp2;
1948
1949 mem_free ((void **) (void *) &temp2);
1950 }
1951 else
1952 {
1953 s->m_spec_type = st_byte;
1954 *s->m_byte = *temp;
1955 }
1956
1957 mem_free ((void **) (void *) &temp);
1958 }
1959 else if (*t == '"')
1960 {
1961 if (get_string (&t, &s->m_string))
1962 {
1963 spec_destroy (&s);
1964 return 1;
1965 }
1966 eat_spaces (&t);
1967
1968 s->m_spec_type = st_string;
1969 }
1970 else if (*t == '.')
1971 {
1972 byte *keyword = NULL;
1973
1974 /* skip the dot */
1975 t++;
1976
1977 if (get_identifier (&t, &keyword))
1978 {
1979 spec_destroy (&s);
1980 return 1;
1981 }
1982 eat_spaces (&t);
1983
1984 /* .true */
1985 if (str_equal ((byte *) "true", keyword))
1986 {
1987 s->m_spec_type = st_true;
1988 }
1989 /* .false */
1990 else if (str_equal ((byte *) "false", keyword))
1991 {
1992 s->m_spec_type = st_false;
1993 }
1994 /* .debug */
1995 else if (str_equal ((byte *) "debug", keyword))
1996 {
1997 s->m_spec_type = st_debug;
1998 }
1999 /* .loop */
2000 else if (str_equal ((byte *) "loop", keyword))
2001 {
2002 if (get_identifier (&t, &s->m_string))
2003 {
2004 mem_free ((void **) (void *) &keyword);
2005 spec_destroy (&s);
2006 return 1;
2007 }
2008 eat_spaces (&t);
2009
2010 s->m_spec_type = st_identifier_loop;
2011 }
2012 mem_free ((void **) (void *) &keyword);
2013 }
2014 else
2015 {
2016 if (get_identifier (&t, &s->m_string))
2017 {
2018 spec_destroy (&s);
2019 return 1;
2020 }
2021 eat_spaces (&t);
2022
2023 s->m_spec_type = st_identifier;
2024 }
2025
2026 if (get_error (&t, &s->m_errtext, maps))
2027 {
2028 spec_destroy (&s);
2029 return 1;
2030 }
2031
2032 if (get_emits (&t, &s->m_emits, mapb))
2033 {
2034 spec_destroy (&s);
2035 return 1;
2036 }
2037
2038 *text = t;
2039 *sp = s;
2040 return 0;
2041 }
2042
2043 /*
2044 returns 0 on success,
2045 returns 1 otherwise,
2046 */
2047 static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
2048 {
2049 const byte *t = *text;
2050 rule *r = NULL;
2051
2052 rule_create (&r);
2053 if (r == NULL)
2054 return 1;
2055
2056 if (get_spec (&t, &r->m_specs, maps, mapb))
2057 {
2058 rule_destroy (&r);
2059 return 1;
2060 }
2061
2062 while (*t != ';')
2063 {
2064 byte *op = NULL;
2065 spec *sp = NULL;
2066
2067 /* skip the dot that precedes "and" or "or" */
2068 t++;
2069
2070 /* read "and" or "or" keyword */
2071 if (get_identifier (&t, &op))
2072 {
2073 rule_destroy (&r);
2074 return 1;
2075 }
2076 eat_spaces (&t);
2077
2078 if (r->m_oper == op_none)
2079 {
2080 /* .and */
2081 if (str_equal ((byte *) "and", op))
2082 r->m_oper = op_and;
2083 /* .or */
2084 else
2085 r->m_oper = op_or;
2086 }
2087
2088 mem_free ((void **) (void *) &op);
2089
2090 if (get_spec (&t, &sp, maps, mapb))
2091 {
2092 rule_destroy (&r);
2093 return 1;
2094 }
2095
2096 spec_append (&r->m_specs, sp);
2097 }
2098
2099 /* skip the semicolon */
2100 t++;
2101 eat_spaces (&t);
2102
2103 *text = t;
2104 *ru = r;
2105 return 0;
2106 }
2107
2108 /*
2109 returns 0 on success,
2110 returns 1 otherwise,
2111 */
2112 static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2113 {
2114 if (map_rule_find (&mapr, symbol, ru))
2115 return 1;
2116
2117 (**ru).m_referenced = 1;
2118
2119 return 0;
2120 }
2121
2122 /*
2123 returns 0 on success,
2124 returns 1 otherwise,
2125 */
2126 static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2127 byte **string_symbol, map_byte *regbytes)
2128 {
2129 rule *rulez = di->m_rulez;
2130
2131 /* update dependecies for the root and lexer symbols */
2132 if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2133 (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2134 return 1;
2135
2136 mem_free ((void **) syntax_symbol);
2137 mem_free ((void **) string_symbol);
2138
2139 /* update dependecies for the rest of the rules */
2140 while (rulez)
2141 {
2142 spec *sp = rulez->m_specs;
2143
2144 /* iterate through all the specifiers */
2145 while (sp)
2146 {
2147 /* update dependency for identifier */
2148 if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2149 {
2150 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2151 return 1;
2152
2153 mem_free ((void **) &sp->m_string);
2154 }
2155
2156 /* some errtexts reference to a rule */
2157 if (sp->m_errtext && sp->m_errtext->m_token_name)
2158 {
2159 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2160 return 1;
2161
2162 mem_free ((void **) &sp->m_errtext->m_token_name);
2163 }
2164
2165 /* update dependency for condition */
2166 if (sp->m_cond)
2167 {
2168 int i;
2169 for (i = 0; i < 2; i++)
2170 if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2171 {
2172 sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2173 sp->m_cond->m_operands[i].m_regname);
2174
2175 if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2176 return 1;
2177
2178 mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2179 }
2180 }
2181
2182 /* update dependency for all .load instructions */
2183 if (sp->m_emits)
2184 {
2185 emit *em = sp->m_emits;
2186 while (em != NULL)
2187 {
2188 if (em->m_emit_dest == ed_regbyte)
2189 {
2190 em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2191
2192 if (em->m_regbyte == NULL)
2193 return 1;
2194
2195 mem_free ((void **) &em->m_regname);
2196 }
2197
2198 em = em->m_next;
2199 }
2200 }
2201
2202 sp = sp->next;
2203 }
2204
2205 rulez = rulez->next;
2206 }
2207
2208 /* check for unreferenced symbols */
2209 rulez = di->m_rulez;
2210 while (rulez != NULL)
2211 {
2212 if (!rulez->m_referenced)
2213 {
2214 map_rule *ma = mapr;
2215 while (ma)
2216 {
2217 if (ma->data == rulez)
2218 {
2219 set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1);
2220 return 1;
2221 }
2222 ma = ma->next;
2223 }
2224 }
2225 rulez = rulez->next;
2226 }
2227
2228 return 0;
2229 }
2230
2231 static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2232 {
2233 byte values[2];
2234 int i;
2235
2236 if (co == NULL)
2237 return 1;
2238
2239 for (i = 0; i < 2; i++)
2240 switch (co->m_operands[i].m_type)
2241 {
2242 case cot_byte:
2243 values[i] = co->m_operands[i].m_byte;
2244 break;
2245 case cot_regbyte:
2246 values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2247 break;
2248 }
2249
2250 switch (co->m_type)
2251 {
2252 case ct_equal:
2253 return values[0] == values[1];
2254 case ct_not_equal:
2255 return values[0] != values[1];
2256 }
2257
2258 return 0;
2259 }
2260
2261 static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2262 {
2263 while (top != limit)
2264 {
2265 regbyte_ctx *rbc = top->m_prev;
2266 regbyte_ctx_destroy (&top);
2267 top = rbc;
2268 }
2269 }
2270
2271 typedef enum match_result_
2272 {
2273 mr_not_matched, /* the examined string does not match */
2274 mr_matched, /* the examined string matches */
2275 mr_error_raised, /* mr_not_matched + error has been raised */
2276 mr_dont_emit, /* used by identifier loops only */
2277 mr_internal_error /* an internal error has occured such as out of memory */
2278 } match_result;
2279
2280 /*
2281 * This function does the main job. It parses the text and generates output data.
2282 */
2283 static match_result
2284 match (dict *di, const byte *text, int *index, rule *ru, barray **ba, int filtering_string,
2285 regbyte_ctx **rbc)
2286 {
2287 int ind = *index;
2288 match_result status = mr_not_matched;
2289 spec *sp = ru->m_specs;
2290 regbyte_ctx *ctx = *rbc;
2291
2292 /* for every specifier in the rule */
2293 while (sp)
2294 {
2295 int i, len, save_ind = ind;
2296 barray *array = NULL;
2297
2298 if (satisfies_condition (sp->m_cond, ctx))
2299 {
2300 switch (sp->m_spec_type)
2301 {
2302 case st_identifier:
2303 barray_create (&array);
2304 if (array == NULL)
2305 {
2306 free_regbyte_ctx_stack (ctx, *rbc);
2307 return mr_internal_error;
2308 }
2309
2310 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2311
2312 if (status == mr_internal_error)
2313 {
2314 free_regbyte_ctx_stack (ctx, *rbc);
2315 barray_destroy (&array);
2316 return mr_internal_error;
2317 }
2318 break;
2319 case st_string:
2320 len = str_length (sp->m_string);
2321
2322 /* prefilter the stream */
2323 if (!filtering_string && di->m_string)
2324 {
2325 barray *ba;
2326 int filter_index = 0;
2327 match_result result;
2328 regbyte_ctx *null_ctx = NULL;
2329
2330 barray_create (&ba);
2331 if (ba == NULL)
2332 {
2333 free_regbyte_ctx_stack (ctx, *rbc);
2334 return mr_internal_error;
2335 }
2336
2337 result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2338
2339 if (result == mr_internal_error)
2340 {
2341 free_regbyte_ctx_stack (ctx, *rbc);
2342 barray_destroy (&ba);
2343 return mr_internal_error;
2344 }
2345
2346 if (result != mr_matched)
2347 {
2348 barray_destroy (&ba);
2349 status = mr_not_matched;
2350 break;
2351 }
2352
2353 barray_destroy (&ba);
2354
2355 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2356 {
2357 status = mr_not_matched;
2358 break;
2359 }
2360
2361 status = mr_matched;
2362 ind += len;
2363 }
2364 else
2365 {
2366 status = mr_matched;
2367 for (i = 0; status == mr_matched && i < len; i++)
2368 if (text[ind + i] != sp->m_string[i])
2369 status = mr_not_matched;
2370
2371 if (status == mr_matched)
2372 ind += len;
2373 }
2374 break;
2375 case st_byte:
2376 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2377 if (status == mr_matched)
2378 ind++;
2379 break;
2380 case st_byte_range:
2381 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2382 mr_matched : mr_not_matched;
2383 if (status == mr_matched)
2384 ind++;
2385 break;
2386 case st_true:
2387 status = mr_matched;
2388 break;
2389 case st_false:
2390 status = mr_not_matched;
2391 break;
2392 case st_debug:
2393 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2394 break;
2395 case st_identifier_loop:
2396 barray_create (&array);
2397 if (array == NULL)
2398 {
2399 free_regbyte_ctx_stack (ctx, *rbc);
2400 return mr_internal_error;
2401 }
2402
2403 status = mr_dont_emit;
2404 for (;;)
2405 {
2406 match_result result;
2407
2408 save_ind = ind;
2409 result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2410
2411 if (result == mr_error_raised)
2412 {
2413 status = result;
2414 break;
2415 }
2416 else if (result == mr_matched)
2417 {
2418 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2419 barray_append (ba, &array))
2420 {
2421 free_regbyte_ctx_stack (ctx, *rbc);
2422 barray_destroy (&array);
2423 return mr_internal_error;
2424 }
2425 barray_destroy (&array);
2426 barray_create (&array);
2427 if (array == NULL)
2428 {
2429 free_regbyte_ctx_stack (ctx, *rbc);
2430 return mr_internal_error;
2431 }
2432 }
2433 else if (result == mr_internal_error)
2434 {
2435 free_regbyte_ctx_stack (ctx, *rbc);
2436 barray_destroy (&array);
2437 return mr_internal_error;
2438 }
2439 else
2440 break;
2441 }
2442 break;
2443 }
2444 }
2445 else
2446 {
2447 status = mr_not_matched;
2448 }
2449
2450 if (status == mr_error_raised)
2451 {
2452 free_regbyte_ctx_stack (ctx, *rbc);
2453 barray_destroy (&array);
2454
2455 return mr_error_raised;
2456 }
2457
2458 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2459 {
2460 free_regbyte_ctx_stack (ctx, *rbc);
2461 barray_destroy (&array);
2462
2463 if (sp->m_errtext)
2464 {
2465 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2466 ind), ind);
2467
2468 return mr_error_raised;
2469 }
2470
2471 return mr_not_matched;
2472 }
2473
2474 if (status == mr_matched)
2475 {
2476 if (sp->m_emits)
2477 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2478 {
2479 free_regbyte_ctx_stack (ctx, *rbc);
2480 barray_destroy (&array);
2481 return mr_internal_error;
2482 }
2483
2484 if (array)
2485 if (barray_append (ba, &array))
2486 {
2487 free_regbyte_ctx_stack (ctx, *rbc);
2488 barray_destroy (&array);
2489 return mr_internal_error;
2490 }
2491 }
2492
2493 barray_destroy (&array);
2494
2495 /* if the rule operator is a logical or, we pick up the first matching specifier */
2496 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2497 {
2498 *index = ind;
2499 *rbc = ctx;
2500 return mr_matched;
2501 }
2502
2503 sp = sp->next;
2504 }
2505
2506 /* everything went fine - all specifiers match up */
2507 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2508 {
2509 *index = ind;
2510 *rbc = ctx;
2511 return mr_matched;
2512 }
2513
2514 free_regbyte_ctx_stack (ctx, *rbc);
2515 return mr_not_matched;
2516 }
2517
2518 static match_result
2519 fast_match (dict *di, const byte *text, int *index, rule *ru, int *_PP, bytepool *_BP,
2520 int filtering_string, regbyte_ctx **rbc)
2521 {
2522 int ind = *index;
2523 int _P = filtering_string ? 0 : *_PP;
2524 int _P2;
2525 match_result status = mr_not_matched;
2526 spec *sp = ru->m_specs;
2527 regbyte_ctx *ctx = *rbc;
2528
2529 /* for every specifier in the rule */
2530 while (sp)
2531 {
2532 int i, len, save_ind = ind;
2533
2534 _P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0);
2535 if (bytepool_reserve (_BP, _P2))
2536 {
2537 free_regbyte_ctx_stack (ctx, *rbc);
2538 return mr_internal_error;
2539 }
2540
2541 if (satisfies_condition (sp->m_cond, ctx))
2542 {
2543 switch (sp->m_spec_type)
2544 {
2545 case st_identifier:
2546 status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2547
2548 if (status == mr_internal_error)
2549 {
2550 free_regbyte_ctx_stack (ctx, *rbc);
2551 return mr_internal_error;
2552 }
2553 break;
2554 case st_string:
2555 len = str_length (sp->m_string);
2556
2557 /* prefilter the stream */
2558 if (!filtering_string && di->m_string)
2559 {
2560 int filter_index = 0;
2561 match_result result;
2562 regbyte_ctx *null_ctx = NULL;
2563
2564 result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx);
2565
2566 if (result == mr_internal_error)
2567 {
2568 free_regbyte_ctx_stack (ctx, *rbc);
2569 return mr_internal_error;
2570 }
2571
2572 if (result != mr_matched)
2573 {
2574 status = mr_not_matched;
2575 break;
2576 }
2577
2578 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2579 {
2580 status = mr_not_matched;
2581 break;
2582 }
2583
2584 status = mr_matched;
2585 ind += len;
2586 }
2587 else
2588 {
2589 status = mr_matched;
2590 for (i = 0; status == mr_matched && i < len; i++)
2591 if (text[ind + i] != sp->m_string[i])
2592 status = mr_not_matched;
2593
2594 if (status == mr_matched)
2595 ind += len;
2596 }
2597 break;
2598 case st_byte:
2599 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2600 if (status == mr_matched)
2601 ind++;
2602 break;
2603 case st_byte_range:
2604 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2605 mr_matched : mr_not_matched;
2606 if (status == mr_matched)
2607 ind++;
2608 break;
2609 case st_true:
2610 status = mr_matched;
2611 break;
2612 case st_false:
2613 status = mr_not_matched;
2614 break;
2615 case st_debug:
2616 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2617 break;
2618 case st_identifier_loop:
2619 status = mr_dont_emit;
2620 for (;;)
2621 {
2622 match_result result;
2623
2624 save_ind = ind;
2625 result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2626
2627 if (result == mr_error_raised)
2628 {
2629 status = result;
2630 break;
2631 }
2632 else if (result == mr_matched)
2633 {
2634 if (!filtering_string)
2635 {
2636 if (sp->m_emits != NULL)
2637 {
2638 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2639 {
2640 free_regbyte_ctx_stack (ctx, *rbc);
2641 return mr_internal_error;
2642 }
2643 }
2644
2645 _P = _P2;
2646 _P2 += sp->m_emits ? emit_size (sp->m_emits) : 0;
2647 if (bytepool_reserve (_BP, _P2))
2648 {
2649 free_regbyte_ctx_stack (ctx, *rbc);
2650 return mr_internal_error;
2651 }
2652 }
2653 }
2654 else if (result == mr_internal_error)
2655 {
2656 free_regbyte_ctx_stack (ctx, *rbc);
2657 return mr_internal_error;
2658 }
2659 else
2660 break;
2661 }
2662 break;
2663 }
2664 }
2665 else
2666 {
2667 status = mr_not_matched;
2668 }
2669
2670 if (status == mr_error_raised)
2671 {
2672 free_regbyte_ctx_stack (ctx, *rbc);
2673
2674 return mr_error_raised;
2675 }
2676
2677 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2678 {
2679 free_regbyte_ctx_stack (ctx, *rbc);
2680
2681 if (sp->m_errtext)
2682 {
2683 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2684 ind), ind);
2685
2686 return mr_error_raised;
2687 }
2688
2689 return mr_not_matched;
2690 }
2691
2692 if (status == mr_matched)
2693 {
2694 if (sp->m_emits != NULL)
2695 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2696 {
2697 free_regbyte_ctx_stack (ctx, *rbc);
2698 return mr_internal_error;
2699 }
2700
2701 _P = _P2;
2702 }
2703
2704 /* if the rule operator is a logical or, we pick up the first matching specifier */
2705 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2706 {
2707 *index = ind;
2708 *rbc = ctx;
2709 if (!filtering_string)
2710 *_PP = _P;
2711 return mr_matched;
2712 }
2713
2714 sp = sp->next;
2715 }
2716
2717 /* everything went fine - all specifiers match up */
2718 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2719 {
2720 *index = ind;
2721 *rbc = ctx;
2722 if (!filtering_string)
2723 *_PP = _P;
2724 return mr_matched;
2725 }
2726
2727 free_regbyte_ctx_stack (ctx, *rbc);
2728 return mr_not_matched;
2729 }
2730
2731 static byte *
2732 error_get_token (error *er, dict *di, const byte *text, int ind)
2733 {
2734 byte *str = NULL;
2735
2736 if (er->m_token)
2737 {
2738 barray *ba;
2739 int filter_index = 0;
2740 regbyte_ctx *ctx = NULL;
2741
2742 barray_create (&ba);
2743 if (ba != NULL)
2744 {
2745 if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2746 filter_index)
2747 {
2748 str = (byte *) mem_alloc (filter_index + 1);
2749 if (str != NULL)
2750 {
2751 str_copy_n (str, text + ind, filter_index);
2752 str[filter_index] = '\0';
2753 }
2754 }
2755 barray_destroy (&ba);
2756 }
2757 }
2758
2759 return str;
2760 }
2761
2762 typedef struct grammar_load_state_
2763 {
2764 dict *di;
2765 byte *syntax_symbol;
2766 byte *string_symbol;
2767 map_str *maps;
2768 map_byte *mapb;
2769 map_rule *mapr;
2770 } grammar_load_state;
2771
2772 static void grammar_load_state_create (grammar_load_state **gr)
2773 {
2774 *gr = (grammar_load_state *) mem_alloc (sizeof (grammar_load_state));
2775 if (*gr)
2776 {
2777 (**gr).di = NULL;
2778 (**gr).syntax_symbol = NULL;
2779 (**gr).string_symbol = NULL;
2780 (**gr).maps = NULL;
2781 (**gr).mapb = NULL;
2782 (**gr).mapr = NULL;
2783 }
2784 }
2785
2786 static void grammar_load_state_destroy (grammar_load_state **gr)
2787 {
2788 if (*gr)
2789 {
2790 dict_destroy (&(**gr).di);
2791 mem_free ((void **) &(**gr).syntax_symbol);
2792 mem_free ((void **) &(**gr).string_symbol);
2793 map_str_destroy (&(**gr).maps);
2794 map_byte_destroy (&(**gr).mapb);
2795 map_rule_destroy (&(**gr).mapr);
2796 mem_free ((void **) gr);
2797 }
2798 }
2799
2800 /*
2801 the API
2802 */
2803
2804 grammar grammar_load_from_text (const byte *text)
2805 {
2806 grammar_load_state *g = NULL;
2807 grammar id = 0;
2808
2809 clear_last_error ();
2810
2811 grammar_load_state_create (&g);
2812 if (g == NULL)
2813 return 0;
2814
2815 dict_create (&g->di);
2816 if (g->di == NULL)
2817 {
2818 grammar_load_state_destroy (&g);
2819 return 0;
2820 }
2821
2822 eat_spaces (&text);
2823
2824 /* skip ".syntax" keyword */
2825 text += 7;
2826 eat_spaces (&text);
2827
2828 /* retrieve root symbol */
2829 if (get_identifier (&text, &g->syntax_symbol))
2830 {
2831 grammar_load_state_destroy (&g);
2832 return 0;
2833 }
2834 eat_spaces (&text);
2835
2836 /* skip semicolon */
2837 text++;
2838 eat_spaces (&text);
2839
2840 while (*text)
2841 {
2842 byte *symbol = NULL;
2843 int is_dot = *text == '.';
2844
2845 if (is_dot)
2846 text++;
2847
2848 if (get_identifier (&text, &symbol))
2849 {
2850 grammar_load_state_destroy (&g);
2851 return 0;
2852 }
2853 eat_spaces (&text);
2854
2855 /* .emtcode */
2856 if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2857 {
2858 map_byte *ma = NULL;
2859
2860 mem_free ((void **) (void *) &symbol);
2861
2862 if (get_emtcode (&text, &ma))
2863 {
2864 grammar_load_state_destroy (&g);
2865 return 0;
2866 }
2867
2868 map_byte_append (&g->mapb, ma);
2869 }
2870 /* .regbyte */
2871 else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2872 {
2873 map_byte *ma = NULL;
2874
2875 mem_free ((void **) (void *) &symbol);
2876
2877 if (get_regbyte (&text, &ma))
2878 {
2879 grammar_load_state_destroy (&g);
2880 return 0;
2881 }
2882
2883 map_byte_append (&g->di->m_regbytes, ma);
2884 }
2885 /* .errtext */
2886 else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2887 {
2888 map_str *ma = NULL;
2889
2890 mem_free ((void **) (void *) &symbol);
2891
2892 if (get_errtext (&text, &ma))
2893 {
2894 grammar_load_state_destroy (&g);
2895 return 0;
2896 }
2897
2898 map_str_append (&g->maps, ma);
2899 }
2900 /* .string */
2901 else if (is_dot && str_equal (symbol, (byte *) "string"))
2902 {
2903 mem_free ((void **) (void *) &symbol);
2904
2905 if (g->di->m_string != NULL)
2906 {
2907 grammar_load_state_destroy (&g);
2908 return 0;
2909 }
2910
2911 if (get_identifier (&text, &g->string_symbol))
2912 {
2913 grammar_load_state_destroy (&g);
2914 return 0;
2915 }
2916
2917 /* skip semicolon */
2918 eat_spaces (&text);
2919 text++;
2920 eat_spaces (&text);
2921 }
2922 else
2923 {
2924 rule *ru = NULL;
2925 map_rule *ma = NULL;
2926
2927 if (get_rule (&text, &ru, g->maps, g->mapb))
2928 {
2929 grammar_load_state_destroy (&g);
2930 return 0;
2931 }
2932
2933 rule_append (&g->di->m_rulez, ru);
2934
2935 /* if a rule consist of only one specifier, give it an ".and" operator */
2936 if (ru->m_oper == op_none)
2937 ru->m_oper = op_and;
2938
2939 map_rule_create (&ma);
2940 if (ma == NULL)
2941 {
2942 grammar_load_state_destroy (&g);
2943 return 0;
2944 }
2945
2946 ma->key = symbol;
2947 ma->data = ru;
2948 map_rule_append (&g->mapr, ma);
2949 }
2950 }
2951
2952 if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2953 g->di->m_regbytes))
2954 {
2955 grammar_load_state_destroy (&g);
2956 return 0;
2957 }
2958
2959 dict_append (&g_dicts, g->di);
2960 id = g->di->m_id;
2961 g->di = NULL;
2962
2963 grammar_load_state_destroy (&g);
2964
2965 return id;
2966 }
2967
2968 int grammar_set_reg8 (grammar id, const byte *name, byte value)
2969 {
2970 dict *di = NULL;
2971 map_byte *reg = NULL;
2972
2973 clear_last_error ();
2974
2975 dict_find (&g_dicts, id, &di);
2976 if (di == NULL)
2977 {
2978 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2979 return 0;
2980 }
2981
2982 reg = map_byte_locate (&di->m_regbytes, name);
2983 if (reg == NULL)
2984 {
2985 set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
2986 return 0;
2987 }
2988
2989 reg->data = value;
2990 return 1;
2991 }
2992
2993 /*
2994 internal checking function used by both grammar_check and grammar_fast_check functions
2995 */
2996 static int _grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size,
2997 unsigned int estimate_prod_size, int use_fast_path)
2998 {
2999 dict *di = NULL;
3000 int index = 0;
3001
3002 clear_last_error ();
3003
3004 dict_find (&g_dicts, id, &di);
3005 if (di == NULL)
3006 {
3007 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3008 return 0;
3009 }
3010
3011 *prod = NULL;
3012 *size = 0;
3013
3014 if (use_fast_path)
3015 {
3016 regbyte_ctx *rbc = NULL;
3017 bytepool *bp = NULL;
3018 int _P = 0;
3019
3020 bytepool_create (&bp, estimate_prod_size);
3021 if (bp == NULL)
3022 return 0;
3023
3024 if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched)
3025 {
3026 bytepool_destroy (&bp);
3027 free_regbyte_ctx_stack (rbc, NULL);
3028 return 0;
3029 }
3030
3031 free_regbyte_ctx_stack (rbc, NULL);
3032
3033 *prod = bp->_F;
3034 *size = _P;
3035 bp->_F = NULL;
3036 bytepool_destroy (&bp);
3037 }
3038 else
3039 {
3040 regbyte_ctx *rbc = NULL;
3041 barray *ba = NULL;
3042
3043 barray_create (&ba);
3044 if (ba == NULL)
3045 return 0;
3046
3047 if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
3048 {
3049 barray_destroy (&ba);
3050 free_regbyte_ctx_stack (rbc, NULL);
3051 return 0;
3052 }
3053
3054 free_regbyte_ctx_stack (rbc, NULL);
3055
3056 *prod = (byte *) mem_alloc (ba->len * sizeof (byte));
3057 if (*prod == NULL)
3058 {
3059 barray_destroy (&ba);
3060 return 0;
3061 }
3062
3063 mem_copy (*prod, ba->data, ba->len * sizeof (byte));
3064 *size = ba->len;
3065 barray_destroy (&ba);
3066 }
3067
3068 return 1;
3069 }
3070
3071 int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
3072 {
3073 return _grammar_check (id, text, prod, size, 0, 0);
3074 }
3075
3076 int grammar_fast_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3077 unsigned int estimate_prod_size)
3078 {
3079 return _grammar_check (id, text, prod, size, estimate_prod_size, 1);
3080 }
3081
3082 int grammar_destroy (grammar id)
3083 {
3084 dict **di = &g_dicts;
3085
3086 clear_last_error ();
3087
3088 while (*di != NULL)
3089 {
3090 if ((**di).m_id == id)
3091 {
3092 dict *tmp = *di;
3093 *di = (**di).next;
3094 dict_destroy (&tmp);
3095 return 1;
3096 }
3097
3098 di = &(**di).next;
3099 }
3100
3101 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3102 return 0;
3103 }
3104
3105 static void append_character (const char x, byte *text, int *dots_made, int *len, int size)
3106 {
3107 if (*dots_made == 0)
3108 {
3109 if (*len < size - 1)
3110 {
3111 text[(*len)++] = x;
3112 text[*len] = '\0';
3113 }
3114 else
3115 {
3116 int i;
3117 for (i = 0; i < 3; i++)
3118 if (--(*len) >= 0)
3119 text[*len] = '.';
3120 *dots_made = 1;
3121 }
3122 }
3123 }
3124
3125 void grammar_get_last_error (byte *text, unsigned int size, int *pos)
3126 {
3127 int len = 0, dots_made = 0;
3128 const byte *p = error_message;
3129
3130 *text = '\0';
3131
3132 if (p)
3133 {
3134 while (*p)
3135 {
3136 if (*p == '$')
3137 {
3138 const byte *r = error_param;
3139
3140 while (*r)
3141 {
3142 append_character (*r++, text, &dots_made, &len, (int) size);
3143 }
3144
3145 p++;
3146 }
3147 else
3148 {
3149 append_character (*p++, text, &dots_made, &len, size);
3150 }
3151 }
3152 }
3153
3154 *pos = error_position;
3155 }