fix state.light[n].spot.direction.w value (bug 3083)
[mesa.git] / src / mesa / shader / grammar.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.2
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file grammar.c
27 * syntax parsing engine
28 * \author Michal Krol
29 */
30
31 #ifndef GRAMMAR_PORT_BUILD
32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
33 #endif
34
35 /*
36 $Id: grammar.c,v 1.10 2004/12/08 14:00:46 alanh Exp $
37 */
38
39 /*
40 INTRODUCTION
41 ------------
42
43 The task is to check the syntax of an input string. Input string is a stream of ASCII
44 characters terminated with a null-character ('\0'). Checking it using C language is
45 difficult and hard to implement without bugs. It is hard to maintain and make changes when
46 the syntax changes.
47
48 This is because of a high redundancy of the C code. Large blocks of code are duplicated with
49 only small changes. Even use of macros does not solve the problem because macros cannot
50 erase the complexity of the problem.
51
52 The resolution is to create a new language that will be highly oriented to our task. Once
53 we describe a particular syntax, we are done. We can then focus on the code that implements
54 the language. The size and complexity of it is relatively small than the code that directly
55 checks the syntax.
56
57 First, we must implement our new language. Here, the language is implemented in C, but it
58 could also be implemented in any other language. The code is listed below. We must take
59 a good care that it is bug free. This is simple because the code is simple and clean.
60
61 Next, we must describe the syntax of our new language in itself. Once created and checked
62 manually that it is correct, we can use it to check another scripts.
63
64 Note that our new language loading code does not have to check the syntax. It is because we
65 assume that the script describing itself is correct, and other scripts can be syntactically
66 checked by the former script. The loading code must only do semantic checking which leads us to
67 simple resolving references.
68
69 THE LANGUAGE
70 ------------
71
72 Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
73 sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
74 which is an identifier, and its definition. A definition is in turn a sequence of specifiers
75 connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
76 definition. Specifier can be a symbol, string, character, character range or a special
77 keyword ".true" or ".false".
78
79 On the very beginning of the script there is a declaration of a root symbol and is in the form:
80 .syntax <root_symbol>;
81 The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
82 the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
83 the symbol evaluates to true. Definition evaluation depends on the operator used to connect
84 specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
85 only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
86 true if any of the specifiers evaluates to true. If definition contains only one specifier,
87 it is evaluated as if it was connected with ".true" keyword by ".and" operator.
88
89 If specifier is a ".true" keyword, it always evaluates to true.
90
91 If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
92 when it does not evaluate to true.
93
94 Character range specifier is in the form:
95 '<first_character>' - '<second_character>'
96 If specifier is a character range, it evaluates to true if character in the stream is greater
97 or equal to <first_character> and less or equal to <second_character>. In that situation
98 the stream pointer is advanced to point to next character in the stream. All C-style escape
99 sequences are supported although trigraph sequences are not. The comparisions are performed
100 on 8-bit unsigned integers.
101
102 Character specifier is in the form:
103 '<single_character>'
104 It evaluates to true if the following character range specifier evaluates to true:
105 '<single_character>' - '<single_character>'
106
107 String specifier is in the form:
108 "<string>"
109 Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
110 <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
111 the following character specifier evaluates to true:
112 '<string>[i]'
113 If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
114
115 Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
116 .loop <symbol> (1)
117 where <symbol> is defined as follows:
118 <symbol> <definition>; (2)
119 Construction (1) is replaced by the following code:
120 <symbol$1>
121 and declaration (2) is replaced by the following:
122 <symbol$1> <symbol$2> .or .true;
123 <symbol$2> <symbol> .and <symbol$1>;
124 <symbol> <definition>;
125
126 Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
127 registers that can be accessed in the syn body. Each reg has its name and a default value.
128 The register is one byte wide. The C code can change the default value by calling
129 grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
130 a sequence of specifiers joined with .and or .or operator. And now each specifier can be
131 prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
132 where <operator> can be == or !=. If the condition evaluates to false, the specifier
133 evaluates to .false. Otherwise it evalutes to the specifier.
134
135 ESCAPE SEQUENCES
136 ----------------
137
138 Synek supports all escape sequences in character specifiers. The mapping table is listed below.
139 All occurences of the characters in the first column are replaced with the corresponding
140 character in the second column.
141
142 Escape sequence Represents
143 ------------------------------------------------------------------------------------------------
144 \a Bell (alert)
145 \b Backspace
146 \f Formfeed
147 \n New line
148 \r Carriage return
149 \t Horizontal tab
150 \v Vertical tab
151 \' Single quotation mark
152 \" Double quotation mark
153 \\ Backslash
154 \? Literal question mark
155 \ooo ASCII character in octal notation
156 \xhhh ASCII character in hexadecimal notation
157 ------------------------------------------------------------------------------------------------
158
159 RAISING ERRORS
160 --------------
161
162 Any specifier can be followed by a special construction that is executed when the specifier
163 evaluates to false. The construction is in the form:
164 .error <ERROR_TEXT>
165 <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
166 in the form:
167 .errtext <ERROR_TEXT> "<error_desc>"
168 When specifier evaluates to false and this construction is present, parsing is stopped
169 immediately and <error_desc> is returned as a result of parsing. The error position is also
170 returned and it is meant as an offset from the beggining of the stream to the character that
171 was valid so far. Example:
172
173 (**** syntax script ****)
174
175 .syntax program;
176 .errtext MISSING_SEMICOLON "missing ';'"
177 program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
178 .loop space .and '\0';
179 declaration "declare" .and .loop space .and identifier;
180 space ' ';
181
182 (**** sample code ****)
183
184 declare foo ,
185
186 In the example above checking the sample code will result in error message "missing ';'" and
187 error position 12. The sample code is not correct. Note the presence of '\0' specifier to
188 assure that there is no code after semicolon - only spaces.
189 <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
190 the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
191 the identifier name. The starting position is the error position. The lenght of the resulting
192 string is the position after invoking the symbol.
193
194 PRODUCTION
195 ----------
196
197 Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
198 that evaluate to true. That is, every specifier and optional error construction can be followed
199 by a number of emit constructions that are in the form:
200 .emit <parameter>
201 <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
202 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
203 in the form:
204 .emtcode <identifier> <hex_number>
205
206 When given specifier evaluates to true, all emits associated with the specifier are output
207 in order they were declared. A star means that last-read character should be output instead
208 of constant value. Example:
209
210 (**** syntax script ****)
211
212 .syntax foobar;
213 .emtcode WORD_FOO 0x01
214 .emtcode WORD_BAR 0x02
215 foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
216 FOO "foo" .and SPACE;
217 BAR "bar" .and SPACE;
218 SPACE ' ' .or '\0';
219
220 (**** sample text 1 ****)
221
222 foo
223
224 (**** sample text 2 ****)
225
226 foobar
227
228 For both samples the result will be one-element array. For first sample text it will be
229 value 1, for second - 0. Note that every text will be accepted because of presence of
230 .true as an alternative.
231
232 Another example:
233
234 (**** syntax script ****)
235
236 .syntax declaration;
237 .emtcode VARIABLE 0x01
238 declaration "declare" .and .loop space .and
239 identifier .emit VARIABLE .and (1)
240 .true .emit 0x00 .and (2)
241 .loop space .and ';';
242 space ' ' .or '\t';
243 identifier .loop id_char .emit *; (3)
244 id_char 'a'-'z' .or 'A'-'Z' .or '_';
245
246 (**** sample code ****)
247
248 declare fubar;
249
250 In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
251 true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
252 to terminate the string with null to signal when the string ends. Specifier (3) outputs
253 all characters that make declared identifier. The result of sample code will be the
254 following array:
255 { 1, 'f', 'u', 'b', 'a', 'r', 0 }
256
257 If .emit is followed by dollar $, it means that current position should be output. Current
258 position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
259 first character consumed by the specifier associated with the .emit instruction. Current
260 position is stored in the output buffer in Little-Endian convention (the lowest byte comes
261 first).
262 */
263
264 static void mem_free (void **);
265
266 /*
267 internal error messages
268 */
269 static const byte *OUT_OF_MEMORY = (byte *) "internal error 1001: out of physical memory";
270 static const byte *UNRESOLVED_REFERENCE = (byte *) "internal error 1002: unresolved reference '$'";
271 static const byte *INVALID_GRAMMAR_ID = (byte *) "internal error 1003: invalid grammar object";
272 static const byte *INVALID_REGISTER_NAME = (byte *) "internal error 1004: invalid register name: '$'";
273 static const byte *DUPLICATE_IDENTIFIER = (byte *) "internal error 1005: identifier '$' already defined";
274 static const byte *UNREFERENCED_IDENTIFIER =(byte *) "internal error 1006: unreferenced identifier '$'";
275
276 static const byte *error_message = NULL; /* points to one of the error messages above */
277 static byte *error_param = NULL; /* this is inserted into error_message in place of $ */
278 static int error_position = -1;
279
280 static byte *unknown = (byte *) "???";
281
282 static void clear_last_error (void)
283 {
284 /* reset error message */
285 error_message = NULL;
286
287 /* free error parameter - if error_param is a "???" don't free it - it's static */
288 if (error_param != unknown)
289 mem_free ((void **) (void *) &error_param);
290 else
291 error_param = NULL;
292
293 /* reset error position */
294 error_position = -1;
295 }
296
297 static void set_last_error (const byte *msg, byte *param, int pos)
298 {
299 /* error message can be set only once */
300 if (error_message != NULL)
301 {
302 mem_free ((void **) (void *) &param);
303 return;
304 }
305
306 error_message = msg;
307
308 /* if param is NULL, set error_param to unknown ("???") */
309 /* note: do not try to strdup the "???" - it may be that we are here because of */
310 /* out of memory error so strdup can fail */
311 if (param != NULL)
312 error_param = param;
313 else
314 error_param = unknown;
315
316 error_position = pos;
317 }
318
319 /*
320 memory management routines
321 */
322 static void *mem_alloc (size_t size)
323 {
324 void *ptr = grammar_alloc_malloc (size);
325 if (ptr == NULL)
326 set_last_error (OUT_OF_MEMORY, NULL, -1);
327 return ptr;
328 }
329
330 static void *mem_copy (void *dst, const void *src, size_t size)
331 {
332 return grammar_memory_copy (dst, src, size);
333 }
334
335 static void mem_free (void **ptr)
336 {
337 grammar_alloc_free (*ptr);
338 *ptr = NULL;
339 }
340
341 static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
342 {
343 void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
344 if (ptr2 == NULL)
345 set_last_error (OUT_OF_MEMORY, NULL, -1);
346 return ptr2;
347 }
348
349 static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
350 {
351 return grammar_string_copy_n (dst, src, max_len);
352 }
353
354 static byte *str_duplicate (const byte *str)
355 {
356 byte *new_str = grammar_string_duplicate (str);
357 if (new_str == NULL)
358 set_last_error (OUT_OF_MEMORY, NULL, -1);
359 return new_str;
360 }
361
362 static int str_equal (const byte *str1, const byte *str2)
363 {
364 return grammar_string_compare (str1, str2) == 0;
365 }
366
367 static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
368 {
369 return grammar_string_compare_n (str1, str2, n) == 0;
370 }
371
372 static unsigned int str_length (const byte *str)
373 {
374 return grammar_string_length (str);
375 }
376
377 /*
378 useful macros
379 */
380 #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
381 static void _Ty##_append (_Ty **x, _Ty *nx) {\
382 while (*x) x = &(**x).next;\
383 *x = nx;\
384 }
385
386 /*
387 string to byte map typedef
388 */
389 typedef struct map_byte_
390 {
391 byte *key;
392 byte data;
393 struct map_byte_ *next;
394 } map_byte;
395
396 static void map_byte_create (map_byte **ma)
397 {
398 *ma = (map_byte *) mem_alloc (sizeof (map_byte));
399 if (*ma)
400 {
401 (**ma).key = NULL;
402 (**ma).data = '\0';
403 (**ma).next = NULL;
404 }
405 }
406
407 static void map_byte_destroy (map_byte **ma)
408 {
409 if (*ma)
410 {
411 map_byte_destroy (&(**ma).next);
412 mem_free ((void **) &(**ma).key);
413 mem_free ((void **) ma);
414 }
415 }
416
417 GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte)
418
419 /*
420 searches the map for the specified key,
421 returns pointer to the element with the specified key if it exists
422 returns NULL otherwise
423 */
424 static map_byte *map_byte_locate (map_byte **ma, const byte *key)
425 {
426 while (*ma)
427 {
428 if (str_equal ((**ma).key, key))
429 return *ma;
430
431 ma = &(**ma).next;
432 }
433
434 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
435 return NULL;
436 }
437
438 /*
439 searches the map for specified key,
440 if the key is matched, *data is filled with data associated with the key,
441 returns 0 if the key is matched,
442 returns 1 otherwise
443 */
444 static int map_byte_find (map_byte **ma, const byte *key, byte *data)
445 {
446 map_byte *found = map_byte_locate (ma, key);
447 if (found != NULL)
448 {
449 *data = found->data;
450
451 return 0;
452 }
453
454 return 1;
455 }
456
457 /*
458 regbyte context typedef
459
460 Each regbyte consists of its name and a default value. These are static and created at
461 grammar script compile-time, for example the following line:
462 .regbyte vertex_blend 0x00
463 adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
464 When the script is executed, this regbyte can be accessed by name for read and write. When a
465 particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
466 stack. The new entry contains information abot which regbyte it references and its new value.
467 When a given regbyte is accessed for read, the stack is searched top-down to find an
468 entry that references the regbyte. The first matching entry is used to return the current
469 value it holds. If no entry is found, the default value is returned.
470 */
471 typedef struct regbyte_ctx_
472 {
473 map_byte *m_regbyte;
474 byte m_current_value;
475 struct regbyte_ctx_ *m_prev;
476 } regbyte_ctx;
477
478 static void regbyte_ctx_create (regbyte_ctx **re)
479 {
480 *re = (regbyte_ctx *) mem_alloc (sizeof (regbyte_ctx));
481 if (*re)
482 {
483 (**re).m_regbyte = NULL;
484 (**re).m_prev = NULL;
485 }
486 }
487
488 static void regbyte_ctx_destroy (regbyte_ctx **re)
489 {
490 if (*re)
491 {
492 mem_free ((void **) re);
493 }
494 }
495
496 static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
497 {
498 /* first lookup in the register stack */
499 while (*re != NULL)
500 {
501 if ((**re).m_regbyte == reg)
502 return (**re).m_current_value;
503
504 re = &(**re).m_prev;
505 }
506
507 /* if not found - return the default value */
508 return reg->data;
509 }
510
511 /*
512 emit type typedef
513 */
514 typedef enum emit_type_
515 {
516 et_byte, /* explicit number */
517 et_stream, /* eaten character */
518 et_position /* current position */
519 } emit_type;
520
521 /*
522 emit destination typedef
523 */
524 typedef enum emit_dest_
525 {
526 ed_output, /* write to the output buffer */
527 ed_regbyte /* write a particular regbyte */
528 } emit_dest;
529
530 /*
531 emit typedef
532 */
533 typedef struct emit_
534 {
535 emit_dest m_emit_dest;
536 emit_type m_emit_type; /* ed_output */
537 byte m_byte; /* et_byte */
538 map_byte *m_regbyte; /* ed_regbyte */
539 byte *m_regname; /* ed_regbyte - temporary */
540 struct emit_ *m_next;
541 } emit;
542
543 static void emit_create (emit **em)
544 {
545 *em = (emit *) mem_alloc (sizeof (emit));
546 if (*em)
547 {
548 (**em).m_emit_dest = ed_output;
549 (**em).m_emit_type = et_byte;
550 (**em).m_byte = '\0';
551 (**em).m_regbyte = NULL;
552 (**em).m_regname = NULL;
553 (**em).m_next = NULL;
554 }
555 }
556
557 static void emit_destroy (emit **em)
558 {
559 if (*em)
560 {
561 emit_destroy (&(**em).m_next);
562 mem_free ((void **) &(**em).m_regname);
563 mem_free ((void **) em);
564 }
565 }
566
567 static unsigned int emit_size (emit *_E)
568 {
569 unsigned int _N = 0;
570
571 while (_E != NULL)
572 {
573 if (_E->m_emit_dest == ed_output)
574 {
575 if (_E->m_emit_type == et_position)
576 _N += 4; /* position is a 32-bit unsigned integer */
577 else
578 _N++;
579 }
580 _E = _E->m_next;
581 }
582
583 return _N;
584 }
585
586 static int emit_push (emit *_E, byte *_P, byte _C, unsigned int _Pos, regbyte_ctx **_Ctx)
587 {
588 while (_E != NULL)
589 {
590 if (_E->m_emit_dest == ed_output)
591 {
592 if (_E->m_emit_type == et_byte)
593 *_P++ = _E->m_byte;
594 else if (_E->m_emit_type == et_stream)
595 *_P++ = _C;
596 else /* _Em->type == et_position */
597 {
598 *_P++ = (byte) (_Pos);
599 *_P++ = (byte) (_Pos >> 8);
600 *_P++ = (byte) (_Pos >> 16);
601 *_P++ = (byte) (_Pos >> 24);
602 }
603 }
604 else
605 {
606 regbyte_ctx *new_rbc;
607 regbyte_ctx_create (&new_rbc);
608 if (new_rbc == NULL)
609 return 1;
610
611 new_rbc->m_prev = *_Ctx;
612 new_rbc->m_regbyte = _E->m_regbyte;
613 *_Ctx = new_rbc;
614
615 if (_E->m_emit_type == et_byte)
616 new_rbc->m_current_value = _E->m_byte;
617 else if (_E->m_emit_type == et_stream)
618 new_rbc->m_current_value = _C;
619 }
620
621 _E = _E->m_next;
622 }
623
624 return 0;
625 }
626
627 /*
628 error typedef
629 */
630 typedef struct error_
631 {
632 byte *m_text;
633 byte *m_token_name;
634 struct rule_ *m_token;
635 } error;
636
637 static void error_create (error **er)
638 {
639 *er = (error *) mem_alloc (sizeof (error));
640 if (*er)
641 {
642 (**er).m_text = NULL;
643 (**er).m_token_name = NULL;
644 (**er).m_token = NULL;
645 }
646 }
647
648 static void error_destroy (error **er)
649 {
650 if (*er)
651 {
652 mem_free ((void **) &(**er).m_text);
653 mem_free ((void **) &(**er).m_token_name);
654 mem_free ((void **) er);
655 }
656 }
657
658 struct dict_;
659 static byte *error_get_token (error *, struct dict_ *, const byte *, unsigned int);
660
661 /*
662 condition operand type typedef
663 */
664 typedef enum cond_oper_type_
665 {
666 cot_byte, /* constant 8-bit unsigned integer */
667 cot_regbyte /* pointer to byte register containing the current value */
668 } cond_oper_type;
669
670 /*
671 condition operand typedef
672 */
673 typedef struct cond_oper_
674 {
675 cond_oper_type m_type;
676 byte m_byte; /* cot_byte */
677 map_byte *m_regbyte; /* cot_regbyte */
678 byte *m_regname; /* cot_regbyte - temporary */
679 } cond_oper;
680
681 /*
682 condition type typedef
683 */
684 typedef enum cond_type_
685 {
686 ct_equal,
687 ct_not_equal
688 } cond_type;
689
690 /*
691 condition typedef
692 */
693 typedef struct cond_
694 {
695 cond_type m_type;
696 cond_oper m_operands[2];
697 } cond;
698
699 static void cond_create (cond **co)
700 {
701 *co = (cond *) mem_alloc (sizeof (cond));
702 if (*co)
703 {
704 (**co).m_operands[0].m_regname = NULL;
705 (**co).m_operands[1].m_regname = NULL;
706 }
707 }
708
709 static void cond_destroy (cond **co)
710 {
711 if (*co)
712 {
713 mem_free ((void **) &(**co).m_operands[0].m_regname);
714 mem_free ((void **) &(**co).m_operands[1].m_regname);
715 mem_free ((void **) co);
716 }
717 }
718
719 /*
720 specifier type typedef
721 */
722 typedef enum spec_type_
723 {
724 st_false,
725 st_true,
726 st_byte,
727 st_byte_range,
728 st_string,
729 st_identifier,
730 st_identifier_loop,
731 st_debug
732 } spec_type;
733
734 /*
735 specifier typedef
736 */
737 typedef struct spec_
738 {
739 spec_type m_spec_type;
740 byte m_byte[2]; /* st_byte, st_byte_range */
741 byte *m_string; /* st_string */
742 struct rule_ *m_rule; /* st_identifier, st_identifier_loop */
743 emit *m_emits;
744 error *m_errtext;
745 cond *m_cond;
746 struct spec_ *next;
747 } spec;
748
749 static void spec_create (spec **sp)
750 {
751 *sp = (spec *) mem_alloc (sizeof (spec));
752 if (*sp)
753 {
754 (**sp).m_spec_type = st_false;
755 (**sp).m_byte[0] = '\0';
756 (**sp).m_byte[1] = '\0';
757 (**sp).m_string = NULL;
758 (**sp).m_rule = NULL;
759 (**sp).m_emits = NULL;
760 (**sp).m_errtext = NULL;
761 (**sp).m_cond = NULL;
762 (**sp).next = NULL;
763 }
764 }
765
766 static void spec_destroy (spec **sp)
767 {
768 if (*sp)
769 {
770 spec_destroy (&(**sp).next);
771 emit_destroy (&(**sp).m_emits);
772 error_destroy (&(**sp).m_errtext);
773 mem_free ((void **) &(**sp).m_string);
774 cond_destroy (&(**sp).m_cond);
775 mem_free ((void **) sp);
776 }
777 }
778
779 GRAMMAR_IMPLEMENT_LIST_APPEND(spec)
780
781 /*
782 operator typedef
783 */
784 typedef enum oper_
785 {
786 op_none,
787 op_and,
788 op_or
789 } oper;
790
791 /*
792 rule typedef
793 */
794 typedef struct rule_
795 {
796 oper m_oper;
797 spec *m_specs;
798 struct rule_ *next;
799 int m_referenced;
800 } rule;
801
802 static void rule_create (rule **ru)
803 {
804 *ru = (rule *) mem_alloc (sizeof (rule));
805 if (*ru)
806 {
807 (**ru).m_oper = op_none;
808 (**ru).m_specs = NULL;
809 (**ru).next = NULL;
810 (**ru).m_referenced = 0;
811 }
812 }
813
814 static void rule_destroy (rule **ru)
815 {
816 if (*ru)
817 {
818 rule_destroy (&(**ru).next);
819 spec_destroy (&(**ru).m_specs);
820 mem_free ((void **) ru);
821 }
822 }
823
824 GRAMMAR_IMPLEMENT_LIST_APPEND(rule)
825
826 /*
827 returns unique grammar id
828 */
829 static grammar next_valid_grammar_id (void)
830 {
831 static grammar id = 0;
832
833 return ++id;
834 }
835
836 /*
837 dictionary typedef
838 */
839 typedef struct dict_
840 {
841 rule *m_rulez;
842 rule *m_syntax;
843 rule *m_string;
844 map_byte *m_regbytes;
845 grammar m_id;
846 struct dict_ *next;
847 } dict;
848
849 static void dict_create (dict **di)
850 {
851 *di = (dict *) mem_alloc (sizeof (dict));
852 if (*di)
853 {
854 (**di).m_rulez = NULL;
855 (**di).m_syntax = NULL;
856 (**di).m_string = NULL;
857 (**di).m_regbytes = NULL;
858 (**di).m_id = next_valid_grammar_id ();
859 (**di).next = NULL;
860 }
861 }
862
863 static void dict_destroy (dict **di)
864 {
865 if (*di)
866 {
867 rule_destroy (&(**di).m_rulez);
868 map_byte_destroy (&(**di).m_regbytes);
869 mem_free ((void **) di);
870 }
871 }
872
873 GRAMMAR_IMPLEMENT_LIST_APPEND(dict)
874
875 static void dict_find (dict **di, grammar key, dict **data)
876 {
877 while (*di)
878 {
879 if ((**di).m_id == key)
880 {
881 *data = *di;
882 return;
883 }
884
885 di = &(**di).next;
886 }
887
888 *data = NULL;
889 }
890
891 static dict *g_dicts = NULL;
892
893 /*
894 byte array typedef
895 */
896 typedef struct barray_
897 {
898 byte *data;
899 unsigned int len;
900 } barray;
901
902 static void barray_create (barray **ba)
903 {
904 *ba = (barray *) mem_alloc (sizeof (barray));
905 if (*ba)
906 {
907 (**ba).data = NULL;
908 (**ba).len = 0;
909 }
910 }
911
912 static void barray_destroy (barray **ba)
913 {
914 if (*ba)
915 {
916 mem_free ((void **) &(**ba).data);
917 mem_free ((void **) ba);
918 }
919 }
920
921 /*
922 reallocates byte array to requested size,
923 returns 0 on success,
924 returns 1 otherwise
925 */
926 static int barray_resize (barray **ba, unsigned int nlen)
927 {
928 byte *new_pointer;
929
930 if (nlen == 0)
931 {
932 mem_free ((void **) &(**ba).data);
933 (**ba).data = NULL;
934 (**ba).len = 0;
935
936 return 0;
937 }
938 else
939 {
940 new_pointer = (byte *) mem_realloc ((**ba).data, (**ba).len * sizeof (byte),
941 nlen * sizeof (byte));
942 if (new_pointer)
943 {
944 (**ba).data = new_pointer;
945 (**ba).len = nlen;
946
947 return 0;
948 }
949 }
950
951 return 1;
952 }
953
954 /*
955 adds byte array pointed by *nb to the end of array pointed by *ba,
956 returns 0 on success,
957 returns 1 otherwise
958 */
959 static int barray_append (barray **ba, barray **nb)
960 {
961 const unsigned int len = (**ba).len;
962
963 if (barray_resize (ba, (**ba).len + (**nb).len))
964 return 1;
965
966 mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
967
968 return 0;
969 }
970
971 /*
972 adds emit chain pointed by em to the end of array pointed by *ba,
973 returns 0 on success,
974 returns 1 otherwise
975 */
976 static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
977 {
978 unsigned int count = emit_size (em);
979
980 if (barray_resize (ba, (**ba).len + count))
981 return 1;
982
983 return emit_push (em, (**ba).data + ((**ba).len - count), c, pos, rbc);
984 }
985
986 /*
987 byte pool typedef
988 */
989 typedef struct bytepool_
990 {
991 byte *_F;
992 unsigned int _Siz;
993 } bytepool;
994
995 static void bytepool_destroy (bytepool **by)
996 {
997 if (*by != NULL)
998 {
999 mem_free ((void **) &(**by)._F);
1000 mem_free ((void **) by);
1001 }
1002 }
1003
1004 static void bytepool_create (bytepool **by, int len)
1005 {
1006 *by = (bytepool *) (mem_alloc (sizeof (bytepool)));
1007 if (*by != NULL)
1008 {
1009 (**by)._F = (byte *) (mem_alloc (sizeof (byte) * len));
1010 (**by)._Siz = len;
1011
1012 if ((**by)._F == NULL)
1013 bytepool_destroy (by);
1014 }
1015 }
1016
1017 static int bytepool_reserve (bytepool *by, unsigned int _N)
1018 {
1019 byte *_P;
1020
1021 if (_N <= by->_Siz)
1022 return 0;
1023
1024 /* byte pool can only grow and at least by doubling its size */
1025 _N = _N >= by->_Siz * 2 ? _N : by->_Siz * 2;
1026
1027 /* reallocate the memory and adjust pointers to the new memory location */
1028 _P = (byte *) (mem_realloc (by->_F, sizeof (byte) * by->_Siz, sizeof (byte) * _N));
1029 if (_P != NULL)
1030 {
1031 by->_F = _P;
1032 by->_Siz = _N;
1033 return 0;
1034 }
1035
1036 return 1;
1037 }
1038
1039 /*
1040 string to string map typedef
1041 */
1042 typedef struct map_str_
1043 {
1044 byte *key;
1045 byte *data;
1046 struct map_str_ *next;
1047 } map_str;
1048
1049 static void map_str_create (map_str **ma)
1050 {
1051 *ma = (map_str *) mem_alloc (sizeof (map_str));
1052 if (*ma)
1053 {
1054 (**ma).key = NULL;
1055 (**ma).data = NULL;
1056 (**ma).next = NULL;
1057 }
1058 }
1059
1060 static void map_str_destroy (map_str **ma)
1061 {
1062 if (*ma)
1063 {
1064 map_str_destroy (&(**ma).next);
1065 mem_free ((void **) &(**ma).key);
1066 mem_free ((void **) &(**ma).data);
1067 mem_free ((void **) ma);
1068 }
1069 }
1070
1071 GRAMMAR_IMPLEMENT_LIST_APPEND(map_str)
1072
1073 /*
1074 searches the map for specified key,
1075 if the key is matched, *data is filled with data associated with the key,
1076 returns 0 if the key is matched,
1077 returns 1 otherwise
1078 */
1079 static int map_str_find (map_str **ma, const byte *key, byte **data)
1080 {
1081 while (*ma)
1082 {
1083 if (str_equal ((**ma).key, key))
1084 {
1085 *data = str_duplicate ((**ma).data);
1086 if (*data == NULL)
1087 return 1;
1088
1089 return 0;
1090 }
1091
1092 ma = &(**ma).next;
1093 }
1094
1095 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1096 return 1;
1097 }
1098
1099 /*
1100 string to rule map typedef
1101 */
1102 typedef struct map_rule_
1103 {
1104 byte *key;
1105 rule *data;
1106 struct map_rule_ *next;
1107 } map_rule;
1108
1109 static void map_rule_create (map_rule **ma)
1110 {
1111 *ma = (map_rule *) mem_alloc (sizeof (map_rule));
1112 if (*ma)
1113 {
1114 (**ma).key = NULL;
1115 (**ma).data = NULL;
1116 (**ma).next = NULL;
1117 }
1118 }
1119
1120 static void map_rule_destroy (map_rule **ma)
1121 {
1122 if (*ma)
1123 {
1124 map_rule_destroy (&(**ma).next);
1125 mem_free ((void **) &(**ma).key);
1126 mem_free ((void **) ma);
1127 }
1128 }
1129
1130 GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule)
1131
1132 /*
1133 searches the map for specified key,
1134 if the key is matched, *data is filled with data associated with the key,
1135 returns 0 if the is matched,
1136 returns 1 otherwise
1137 */
1138 static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1139 {
1140 while (*ma)
1141 {
1142 if (str_equal ((**ma).key, key))
1143 {
1144 *data = (**ma).data;
1145
1146 return 0;
1147 }
1148
1149 ma = &(**ma).next;
1150 }
1151
1152 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1153 return 1;
1154 }
1155
1156 /*
1157 returns 1 if given character is a white space,
1158 returns 0 otherwise
1159 */
1160 static int is_space (byte c)
1161 {
1162 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1163 }
1164
1165 /*
1166 advances text pointer by 1 if character pointed by *text is a space,
1167 returns 1 if a space has been eaten,
1168 returns 0 otherwise
1169 */
1170 static int eat_space (const byte **text)
1171 {
1172 if (is_space (**text))
1173 {
1174 (*text)++;
1175
1176 return 1;
1177 }
1178
1179 return 0;
1180 }
1181
1182 /*
1183 returns 1 if text points to C-style comment start string,
1184 returns 0 otherwise
1185 */
1186 static int is_comment_start (const byte *text)
1187 {
1188 return text[0] == '/' && text[1] == '*';
1189 }
1190
1191 /*
1192 advances text pointer to first character after C-style comment block - if any,
1193 returns 1 if C-style comment block has been encountered and eaten,
1194 returns 0 otherwise
1195 */
1196 static int eat_comment (const byte **text)
1197 {
1198 if (is_comment_start (*text))
1199 {
1200 /* *text points to comment block - skip two characters to enter comment body */
1201 *text += 2;
1202 /* skip any character except consecutive '*' and '/' */
1203 while (!((*text)[0] == '*' && (*text)[1] == '/'))
1204 (*text)++;
1205 /* skip those two terminating characters */
1206 *text += 2;
1207
1208 return 1;
1209 }
1210
1211 return 0;
1212 }
1213
1214 /*
1215 advances text pointer to first character that is neither space nor C-style comment block
1216 */
1217 static void eat_spaces (const byte **text)
1218 {
1219 while (eat_space (text) || eat_comment (text))
1220 ;
1221 }
1222
1223 /*
1224 resizes string pointed by *ptr to successfully add character c to the end of the string,
1225 returns 0 on success,
1226 returns 1 otherwise
1227 */
1228 static int string_grow (byte **ptr, unsigned int *len, byte c)
1229 {
1230 /* reallocate the string in 16-byte increments */
1231 if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1232 {
1233 byte *tmp = (byte *) mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
1234 ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1235 if (tmp == NULL)
1236 return 1;
1237
1238 *ptr = tmp;
1239 }
1240
1241 if (c)
1242 {
1243 /* append given character */
1244 (*ptr)[*len] = c;
1245 (*len)++;
1246 }
1247 (*ptr)[*len] = '\0';
1248
1249 return 0;
1250 }
1251
1252 /*
1253 returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1254 returns 0 otherwise
1255 */
1256 static int is_identifier (byte c)
1257 {
1258 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1259 }
1260
1261 /*
1262 copies characters from *text to *id until non-identifier character is encountered,
1263 assumes that *id points to NULL object - caller is responsible for later freeing the string,
1264 text pointer is advanced to point past the copied identifier,
1265 returns 0 if identifier was successfully copied,
1266 returns 1 otherwise
1267 */
1268 static int get_identifier (const byte **text, byte **id)
1269 {
1270 const byte *t = *text;
1271 byte *p = NULL;
1272 unsigned int len = 0;
1273
1274 if (string_grow (&p, &len, '\0'))
1275 return 1;
1276
1277 /* loop while next character in buffer is valid for identifiers */
1278 while (is_identifier (*t))
1279 {
1280 if (string_grow (&p, &len, *t++))
1281 {
1282 mem_free ((void **) (void *) &p);
1283 return 1;
1284 }
1285 }
1286
1287 *text = t;
1288 *id = p;
1289
1290 return 0;
1291 }
1292
1293 /*
1294 converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1295 advances text pointer past the converted sequence,
1296 returns the converted value
1297 */
1298 static unsigned int dec_convert (const byte **text)
1299 {
1300 unsigned int value = 0;
1301
1302 while (**text >= '0' && **text <= '9')
1303 {
1304 value = value * 10 + **text - '0';
1305 (*text)++;
1306 }
1307
1308 return value;
1309 }
1310
1311 /*
1312 returns 1 if given character is HEX digit 0-9, A-F or a-f,
1313 returns 0 otherwise
1314 */
1315 static int is_hex (byte c)
1316 {
1317 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1318 }
1319
1320 /*
1321 returns value of passed character as if it was HEX digit
1322 */
1323 static unsigned int hex2dec (byte c)
1324 {
1325 if (c >= '0' && c <= '9')
1326 return c - '0';
1327 if (c >= 'A' && c <= 'F')
1328 return c - 'A' + 10;
1329 return c - 'a' + 10;
1330 }
1331
1332 /*
1333 converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1334 advances text pointer past the converted sequence,
1335 returns the converted value
1336 */
1337 static unsigned int hex_convert (const byte **text)
1338 {
1339 unsigned int value = 0;
1340
1341 while (is_hex (**text))
1342 {
1343 value = value * 0x10 + hex2dec (**text);
1344 (*text)++;
1345 }
1346
1347 return value;
1348 }
1349
1350 /*
1351 returns 1 if given character is OCT digit 0-7,
1352 returns 0 otherwise
1353 */
1354 static int is_oct (byte c)
1355 {
1356 return c >= '0' && c <= '7';
1357 }
1358
1359 /*
1360 returns value of passed character as if it was OCT digit
1361 */
1362 static int oct2dec (byte c)
1363 {
1364 return c - '0';
1365 }
1366
1367 static byte get_escape_sequence (const byte **text)
1368 {
1369 int value = 0;
1370
1371 /* skip '\' character */
1372 (*text)++;
1373
1374 switch (*(*text)++)
1375 {
1376 case '\'':
1377 return '\'';
1378 case '"':
1379 return '\"';
1380 case '?':
1381 return '\?';
1382 case '\\':
1383 return '\\';
1384 case 'a':
1385 return '\a';
1386 case 'b':
1387 return '\b';
1388 case 'f':
1389 return '\f';
1390 case 'n':
1391 return '\n';
1392 case 'r':
1393 return '\r';
1394 case 't':
1395 return '\t';
1396 case 'v':
1397 return '\v';
1398 case 'x':
1399 return (byte) hex_convert (text);
1400 }
1401
1402 (*text)--;
1403 if (is_oct (**text))
1404 {
1405 value = oct2dec (*(*text)++);
1406 if (is_oct (**text))
1407 {
1408 value = value * 010 + oct2dec (*(*text)++);
1409 if (is_oct (**text))
1410 value = value * 010 + oct2dec (*(*text)++);
1411 }
1412 }
1413
1414 return (byte) value;
1415 }
1416
1417 /*
1418 copies characters from *text to *str until " or ' character is encountered,
1419 assumes that *str points to NULL object - caller is responsible for later freeing the string,
1420 assumes that *text points to " or ' character that starts the string,
1421 text pointer is advanced to point past the " or ' character,
1422 returns 0 if string was successfully copied,
1423 returns 1 otherwise
1424 */
1425 static int get_string (const byte **text, byte **str)
1426 {
1427 const byte *t = *text;
1428 byte *p = NULL;
1429 unsigned int len = 0;
1430 byte term_char;
1431
1432 if (string_grow (&p, &len, '\0'))
1433 return 1;
1434
1435 /* read " or ' character that starts the string */
1436 term_char = *t++;
1437 /* while next character is not the terminating character */
1438 while (*t && *t != term_char)
1439 {
1440 byte c;
1441
1442 if (*t == '\\')
1443 c = get_escape_sequence (&t);
1444 else
1445 c = *t++;
1446
1447 if (string_grow (&p, &len, c))
1448 {
1449 mem_free ((void **) (void *) &p);
1450 return 1;
1451 }
1452 }
1453 /* skip " or ' character that ends the string */
1454 t++;
1455
1456 *text = t;
1457 *str = p;
1458 return 0;
1459 }
1460
1461 /*
1462 gets emit code, the syntax is:
1463 ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1464 assumes that *text already points to <symbol>,
1465 returns 0 if emit code is successfully read,
1466 returns 1 otherwise
1467 */
1468 static int get_emtcode (const byte **text, map_byte **ma)
1469 {
1470 const byte *t = *text;
1471 map_byte *m = NULL;
1472
1473 map_byte_create (&m);
1474 if (m == NULL)
1475 return 1;
1476
1477 if (get_identifier (&t, &m->key))
1478 {
1479 map_byte_destroy (&m);
1480 return 1;
1481 }
1482 eat_spaces (&t);
1483
1484 if (*t == '\'')
1485 {
1486 byte *c;
1487
1488 if (get_string (&t, &c))
1489 {
1490 map_byte_destroy (&m);
1491 return 1;
1492 }
1493
1494 m->data = (byte) c[0];
1495 mem_free ((void **) (void *) &c);
1496 }
1497 else if (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))
1498 {
1499 /* skip HEX "0x" or "0X" prefix */
1500 t += 2;
1501 m->data = (byte) hex_convert (&t);
1502 }
1503 else
1504 {
1505 m->data = (byte) dec_convert (&t);
1506 }
1507
1508 eat_spaces (&t);
1509
1510 *text = t;
1511 *ma = m;
1512 return 0;
1513 }
1514
1515 /*
1516 gets regbyte declaration, the syntax is:
1517 ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1518 assumes that *text already points to <symbol>,
1519 returns 0 if regbyte is successfully read,
1520 returns 1 otherwise
1521 */
1522 static int get_regbyte (const byte **text, map_byte **ma)
1523 {
1524 /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
1525 return get_emtcode (text, ma);
1526 }
1527
1528 /*
1529 returns 0 on success,
1530 returns 1 otherwise
1531 */
1532 static int get_errtext (const byte **text, map_str **ma)
1533 {
1534 const byte *t = *text;
1535 map_str *m = NULL;
1536
1537 map_str_create (&m);
1538 if (m == NULL)
1539 return 1;
1540
1541 if (get_identifier (&t, &m->key))
1542 {
1543 map_str_destroy (&m);
1544 return 1;
1545 }
1546 eat_spaces (&t);
1547
1548 if (get_string (&t, &m->data))
1549 {
1550 map_str_destroy (&m);
1551 return 1;
1552 }
1553 eat_spaces (&t);
1554
1555 *text = t;
1556 *ma = m;
1557 return 0;
1558 }
1559
1560 /*
1561 returns 0 on success,
1562 returns 1 otherwise,
1563 */
1564 static int get_error (const byte **text, error **er, map_str *maps)
1565 {
1566 const byte *t = *text;
1567 byte *temp = NULL;
1568
1569 if (*t != '.')
1570 return 0;
1571
1572 t++;
1573 if (get_identifier (&t, &temp))
1574 return 1;
1575 eat_spaces (&t);
1576
1577 if (!str_equal ((byte *) "error", temp))
1578 {
1579 mem_free ((void **) (void *) &temp);
1580 return 0;
1581 }
1582
1583 mem_free ((void **) (void *) &temp);
1584
1585 error_create (er);
1586 if (*er == NULL)
1587 return 1;
1588
1589 if (*t == '\"')
1590 {
1591 if (get_string (&t, &(**er).m_text))
1592 {
1593 error_destroy (er);
1594 return 1;
1595 }
1596 eat_spaces (&t);
1597 }
1598 else
1599 {
1600 if (get_identifier (&t, &temp))
1601 {
1602 error_destroy (er);
1603 return 1;
1604 }
1605 eat_spaces (&t);
1606
1607 if (map_str_find (&maps, temp, &(**er).m_text))
1608 {
1609 mem_free ((void **) (void *) &temp);
1610 error_destroy (er);
1611 return 1;
1612 }
1613
1614 mem_free ((void **) (void *) &temp);
1615 }
1616
1617 /* try to extract "token" from "...$token$..." */
1618 {
1619 byte *processed = NULL;
1620 unsigned int len = 0, i = 0;
1621
1622 if (string_grow (&processed, &len, '\0'))
1623 {
1624 error_destroy (er);
1625 return 1;
1626 }
1627
1628 while (i < str_length ((**er).m_text))
1629 {
1630 /* check if the dollar sign is repeated - if so skip it */
1631 if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1632 {
1633 if (string_grow (&processed, &len, '$'))
1634 {
1635 mem_free ((void **) (void *) &processed);
1636 error_destroy (er);
1637 return 1;
1638 }
1639
1640 i += 2;
1641 }
1642 else if ((**er).m_text[i] != '$')
1643 {
1644 if (string_grow (&processed, &len, (**er).m_text[i]))
1645 {
1646 mem_free ((void **) (void *) &processed);
1647 error_destroy (er);
1648 return 1;
1649 }
1650
1651 i++;
1652 }
1653 else
1654 {
1655 if (string_grow (&processed, &len, '$'))
1656 {
1657 mem_free ((void **) (void *) &processed);
1658 error_destroy (er);
1659 return 1;
1660 }
1661
1662 {
1663 /* length of token being extracted */
1664 unsigned int tlen = 0;
1665
1666 if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1667 {
1668 mem_free ((void **) (void *) &processed);
1669 error_destroy (er);
1670 return 1;
1671 }
1672
1673 /* skip the dollar sign */
1674 i++;
1675
1676 while ((**er).m_text[i] != '$')
1677 {
1678 if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1679 {
1680 mem_free ((void **) (void *) &processed);
1681 error_destroy (er);
1682 return 1;
1683 }
1684
1685 i++;
1686 }
1687
1688 /* skip the dollar sign */
1689 i++;
1690 }
1691 }
1692 }
1693
1694 mem_free ((void **) &(**er).m_text);
1695 (**er).m_text = processed;
1696 }
1697
1698 *text = t;
1699 return 0;
1700 }
1701
1702 /*
1703 returns 0 on success,
1704 returns 1 otherwise,
1705 */
1706 static int get_emits (const byte **text, emit **em, map_byte *mapb)
1707 {
1708 const byte *t = *text;
1709 byte *temp = NULL;
1710 emit *e = NULL;
1711 emit_dest dest;
1712
1713 if (*t != '.')
1714 return 0;
1715
1716 t++;
1717 if (get_identifier (&t, &temp))
1718 return 1;
1719 eat_spaces (&t);
1720
1721 /* .emit */
1722 if (str_equal ((byte *) "emit", temp))
1723 dest = ed_output;
1724 /* .load */
1725 else if (str_equal ((byte *) "load", temp))
1726 dest = ed_regbyte;
1727 else
1728 {
1729 mem_free ((void **) (void *) &temp);
1730 return 0;
1731 }
1732
1733 mem_free ((void **) (void *) &temp);
1734
1735 emit_create (&e);
1736 if (e == NULL)
1737 return 1;
1738
1739 e->m_emit_dest = dest;
1740
1741 if (dest == ed_regbyte)
1742 {
1743 if (get_identifier (&t, &e->m_regname))
1744 {
1745 emit_destroy (&e);
1746 return 1;
1747 }
1748 eat_spaces (&t);
1749 }
1750
1751 /* 0xNN */
1752 if (*t == '0' && (t[1] == 'x' || t[1] == 'X'))
1753 {
1754 t += 2;
1755 e->m_byte = (byte) hex_convert (&t);
1756
1757 e->m_emit_type = et_byte;
1758 }
1759 /* NNN */
1760 else if (*t >= '0' && *t <= '9')
1761 {
1762 e->m_byte = (byte) dec_convert (&t);
1763
1764 e->m_emit_type = et_byte;
1765 }
1766 /* * */
1767 else if (*t == '*')
1768 {
1769 t++;
1770
1771 e->m_emit_type = et_stream;
1772 }
1773 /* $ */
1774 else if (*t == '$')
1775 {
1776 t++;
1777
1778 e->m_emit_type = et_position;
1779 }
1780 /* 'c' */
1781 else if (*t == '\'')
1782 {
1783 if (get_string (&t, &temp))
1784 {
1785 emit_destroy (&e);
1786 return 1;
1787 }
1788 e->m_byte = (byte) temp[0];
1789
1790 mem_free ((void **) (void *) &temp);
1791
1792 e->m_emit_type = et_byte;
1793 }
1794 else
1795 {
1796 if (get_identifier (&t, &temp))
1797 {
1798 emit_destroy (&e);
1799 return 1;
1800 }
1801
1802 if (map_byte_find (&mapb, temp, &e->m_byte))
1803 {
1804 mem_free ((void **) (void *) &temp);
1805 emit_destroy (&e);
1806 return 1;
1807 }
1808
1809 mem_free ((void **) (void *) &temp);
1810
1811 e->m_emit_type = et_byte;
1812 }
1813
1814 eat_spaces (&t);
1815
1816 if (get_emits (&t, &e->m_next, mapb))
1817 {
1818 emit_destroy (&e);
1819 return 1;
1820 }
1821
1822 *text = t;
1823 *em = e;
1824 return 0;
1825 }
1826
1827 /*
1828 returns 0 on success,
1829 returns 1 otherwise,
1830 */
1831 static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1832 {
1833 const byte *t = *text;
1834 spec *s = NULL;
1835
1836 spec_create (&s);
1837 if (s == NULL)
1838 return 1;
1839
1840 /* first - read optional .if statement */
1841 if (*t == '.')
1842 {
1843 const byte *u = t;
1844 byte *keyword = NULL;
1845
1846 /* skip the dot */
1847 u++;
1848
1849 if (get_identifier (&u, &keyword))
1850 {
1851 spec_destroy (&s);
1852 return 1;
1853 }
1854
1855 /* .if */
1856 if (str_equal ((byte *) "if", keyword))
1857 {
1858 cond_create (&s->m_cond);
1859 if (s->m_cond == NULL)
1860 {
1861 spec_destroy (&s);
1862 return 1;
1863 }
1864
1865 /* skip the left paren */
1866 eat_spaces (&u);
1867 u++;
1868
1869 /* get the left operand */
1870 eat_spaces (&u);
1871 if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1872 {
1873 spec_destroy (&s);
1874 return 1;
1875 }
1876 s->m_cond->m_operands[0].m_type = cot_regbyte;
1877
1878 /* get the operator (!= or ==) */
1879 eat_spaces (&u);
1880 if (*u == '!')
1881 s->m_cond->m_type = ct_not_equal;
1882 else
1883 s->m_cond->m_type = ct_equal;
1884 u += 2;
1885 eat_spaces (&u);
1886
1887 if (u[0] == '0' && (u[1] == 'x' || u[1] == 'X'))
1888 {
1889 /* skip the 0x prefix */
1890 u += 2;
1891
1892 /* get the right operand */
1893 s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1894 s->m_cond->m_operands[1].m_type = cot_byte;
1895 }
1896 else /*if (*u >= '0' && *u <= '9')*/
1897 {
1898 /* get the right operand */
1899 s->m_cond->m_operands[1].m_byte = dec_convert (&u);
1900 s->m_cond->m_operands[1].m_type = cot_byte;
1901 }
1902
1903 /* skip the right paren */
1904 eat_spaces (&u);
1905 u++;
1906
1907 eat_spaces (&u);
1908
1909 t = u;
1910 }
1911
1912 mem_free ((void **) (void *) &keyword);
1913 }
1914
1915 if (*t == '\'')
1916 {
1917 byte *temp = NULL;
1918
1919 if (get_string (&t, &temp))
1920 {
1921 spec_destroy (&s);
1922 return 1;
1923 }
1924 eat_spaces (&t);
1925
1926 if (*t == '-')
1927 {
1928 byte *temp2 = NULL;
1929
1930 /* skip the '-' character */
1931 t++;
1932 eat_spaces (&t);
1933
1934 if (get_string (&t, &temp2))
1935 {
1936 mem_free ((void **) (void *) &temp);
1937 spec_destroy (&s);
1938 return 1;
1939 }
1940 eat_spaces (&t);
1941
1942 s->m_spec_type = st_byte_range;
1943 s->m_byte[0] = *temp;
1944 s->m_byte[1] = *temp2;
1945
1946 mem_free ((void **) (void *) &temp2);
1947 }
1948 else
1949 {
1950 s->m_spec_type = st_byte;
1951 *s->m_byte = *temp;
1952 }
1953
1954 mem_free ((void **) (void *) &temp);
1955 }
1956 else if (*t == '"')
1957 {
1958 if (get_string (&t, &s->m_string))
1959 {
1960 spec_destroy (&s);
1961 return 1;
1962 }
1963 eat_spaces (&t);
1964
1965 s->m_spec_type = st_string;
1966 }
1967 else if (*t == '.')
1968 {
1969 byte *keyword = NULL;
1970
1971 /* skip the dot */
1972 t++;
1973
1974 if (get_identifier (&t, &keyword))
1975 {
1976 spec_destroy (&s);
1977 return 1;
1978 }
1979 eat_spaces (&t);
1980
1981 /* .true */
1982 if (str_equal ((byte *) "true", keyword))
1983 {
1984 s->m_spec_type = st_true;
1985 }
1986 /* .false */
1987 else if (str_equal ((byte *) "false", keyword))
1988 {
1989 s->m_spec_type = st_false;
1990 }
1991 /* .debug */
1992 else if (str_equal ((byte *) "debug", keyword))
1993 {
1994 s->m_spec_type = st_debug;
1995 }
1996 /* .loop */
1997 else if (str_equal ((byte *) "loop", keyword))
1998 {
1999 if (get_identifier (&t, &s->m_string))
2000 {
2001 mem_free ((void **) (void *) &keyword);
2002 spec_destroy (&s);
2003 return 1;
2004 }
2005 eat_spaces (&t);
2006
2007 s->m_spec_type = st_identifier_loop;
2008 }
2009 mem_free ((void **) (void *) &keyword);
2010 }
2011 else
2012 {
2013 if (get_identifier (&t, &s->m_string))
2014 {
2015 spec_destroy (&s);
2016 return 1;
2017 }
2018 eat_spaces (&t);
2019
2020 s->m_spec_type = st_identifier;
2021 }
2022
2023 if (get_error (&t, &s->m_errtext, maps))
2024 {
2025 spec_destroy (&s);
2026 return 1;
2027 }
2028
2029 if (get_emits (&t, &s->m_emits, mapb))
2030 {
2031 spec_destroy (&s);
2032 return 1;
2033 }
2034
2035 *text = t;
2036 *sp = s;
2037 return 0;
2038 }
2039
2040 /*
2041 returns 0 on success,
2042 returns 1 otherwise,
2043 */
2044 static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
2045 {
2046 const byte *t = *text;
2047 rule *r = NULL;
2048
2049 rule_create (&r);
2050 if (r == NULL)
2051 return 1;
2052
2053 if (get_spec (&t, &r->m_specs, maps, mapb))
2054 {
2055 rule_destroy (&r);
2056 return 1;
2057 }
2058
2059 while (*t != ';')
2060 {
2061 byte *op = NULL;
2062 spec *sp = NULL;
2063
2064 /* skip the dot that precedes "and" or "or" */
2065 t++;
2066
2067 /* read "and" or "or" keyword */
2068 if (get_identifier (&t, &op))
2069 {
2070 rule_destroy (&r);
2071 return 1;
2072 }
2073 eat_spaces (&t);
2074
2075 if (r->m_oper == op_none)
2076 {
2077 /* .and */
2078 if (str_equal ((byte *) "and", op))
2079 r->m_oper = op_and;
2080 /* .or */
2081 else
2082 r->m_oper = op_or;
2083 }
2084
2085 mem_free ((void **) (void *) &op);
2086
2087 if (get_spec (&t, &sp, maps, mapb))
2088 {
2089 rule_destroy (&r);
2090 return 1;
2091 }
2092
2093 spec_append (&r->m_specs, sp);
2094 }
2095
2096 /* skip the semicolon */
2097 t++;
2098 eat_spaces (&t);
2099
2100 *text = t;
2101 *ru = r;
2102 return 0;
2103 }
2104
2105 /*
2106 returns 0 on success,
2107 returns 1 otherwise,
2108 */
2109 static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2110 {
2111 if (map_rule_find (&mapr, symbol, ru))
2112 return 1;
2113
2114 (**ru).m_referenced = 1;
2115
2116 return 0;
2117 }
2118
2119 /*
2120 returns 0 on success,
2121 returns 1 otherwise,
2122 */
2123 static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2124 byte **string_symbol, map_byte *regbytes)
2125 {
2126 rule *rulez = di->m_rulez;
2127
2128 /* update dependecies for the root and lexer symbols */
2129 if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2130 (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2131 return 1;
2132
2133 mem_free ((void **) syntax_symbol);
2134 mem_free ((void **) string_symbol);
2135
2136 /* update dependecies for the rest of the rules */
2137 while (rulez)
2138 {
2139 spec *sp = rulez->m_specs;
2140
2141 /* iterate through all the specifiers */
2142 while (sp)
2143 {
2144 /* update dependency for identifier */
2145 if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2146 {
2147 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2148 return 1;
2149
2150 mem_free ((void **) &sp->m_string);
2151 }
2152
2153 /* some errtexts reference to a rule */
2154 if (sp->m_errtext && sp->m_errtext->m_token_name)
2155 {
2156 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2157 return 1;
2158
2159 mem_free ((void **) &sp->m_errtext->m_token_name);
2160 }
2161
2162 /* update dependency for condition */
2163 if (sp->m_cond)
2164 {
2165 int i;
2166 for (i = 0; i < 2; i++)
2167 if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2168 {
2169 sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2170 sp->m_cond->m_operands[i].m_regname);
2171
2172 if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2173 return 1;
2174
2175 mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2176 }
2177 }
2178
2179 /* update dependency for all .load instructions */
2180 if (sp->m_emits)
2181 {
2182 emit *em = sp->m_emits;
2183 while (em != NULL)
2184 {
2185 if (em->m_emit_dest == ed_regbyte)
2186 {
2187 em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2188
2189 if (em->m_regbyte == NULL)
2190 return 1;
2191
2192 mem_free ((void **) &em->m_regname);
2193 }
2194
2195 em = em->m_next;
2196 }
2197 }
2198
2199 sp = sp->next;
2200 }
2201
2202 rulez = rulez->next;
2203 }
2204
2205 /* check for unreferenced symbols */
2206 rulez = di->m_rulez;
2207 while (rulez != NULL)
2208 {
2209 if (!rulez->m_referenced)
2210 {
2211 map_rule *ma = mapr;
2212 while (ma)
2213 {
2214 if (ma->data == rulez)
2215 {
2216 set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1);
2217 return 1;
2218 }
2219 ma = ma->next;
2220 }
2221 }
2222 rulez = rulez->next;
2223 }
2224
2225 return 0;
2226 }
2227
2228 static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2229 {
2230 byte values[2];
2231 int i;
2232
2233 if (co == NULL)
2234 return 1;
2235
2236 for (i = 0; i < 2; i++)
2237 switch (co->m_operands[i].m_type)
2238 {
2239 case cot_byte:
2240 values[i] = co->m_operands[i].m_byte;
2241 break;
2242 case cot_regbyte:
2243 values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2244 break;
2245 }
2246
2247 switch (co->m_type)
2248 {
2249 case ct_equal:
2250 return values[0] == values[1];
2251 case ct_not_equal:
2252 return values[0] != values[1];
2253 }
2254
2255 return 0;
2256 }
2257
2258 static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2259 {
2260 while (top != limit)
2261 {
2262 regbyte_ctx *rbc = top->m_prev;
2263 regbyte_ctx_destroy (&top);
2264 top = rbc;
2265 }
2266 }
2267
2268 typedef enum match_result_
2269 {
2270 mr_not_matched, /* the examined string does not match */
2271 mr_matched, /* the examined string matches */
2272 mr_error_raised, /* mr_not_matched + error has been raised */
2273 mr_dont_emit, /* used by identifier loops only */
2274 mr_internal_error /* an internal error has occured such as out of memory */
2275 } match_result;
2276
2277 /*
2278 This function does the main job. It parses the text and generates output data.
2279 */
2280 static match_result match (dict *di, const byte *text, unsigned int *index, rule *ru, barray **ba,
2281 int filtering_string, regbyte_ctx **rbc)
2282 {
2283 unsigned int ind = *index;
2284 match_result status = mr_not_matched;
2285 spec *sp = ru->m_specs;
2286 regbyte_ctx *ctx = *rbc;
2287
2288 /* for every specifier in the rule */
2289 while (sp)
2290 {
2291 unsigned int i, len, save_ind = ind;
2292 barray *array = NULL;
2293
2294 if (satisfies_condition (sp->m_cond, ctx))
2295 {
2296 switch (sp->m_spec_type)
2297 {
2298 case st_identifier:
2299 barray_create (&array);
2300 if (array == NULL)
2301 {
2302 free_regbyte_ctx_stack (ctx, *rbc);
2303 return mr_internal_error;
2304 }
2305
2306 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2307
2308 if (status == mr_internal_error)
2309 {
2310 free_regbyte_ctx_stack (ctx, *rbc);
2311 barray_destroy (&array);
2312 return mr_internal_error;
2313 }
2314 break;
2315 case st_string:
2316 len = str_length (sp->m_string);
2317
2318 /* prefilter the stream */
2319 if (!filtering_string && di->m_string)
2320 {
2321 barray *ba;
2322 unsigned int filter_index = 0;
2323 match_result result;
2324 regbyte_ctx *null_ctx = NULL;
2325
2326 barray_create (&ba);
2327 if (ba == NULL)
2328 {
2329 free_regbyte_ctx_stack (ctx, *rbc);
2330 return mr_internal_error;
2331 }
2332
2333 result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2334
2335 if (result == mr_internal_error)
2336 {
2337 free_regbyte_ctx_stack (ctx, *rbc);
2338 barray_destroy (&ba);
2339 return mr_internal_error;
2340 }
2341
2342 if (result != mr_matched)
2343 {
2344 barray_destroy (&ba);
2345 status = mr_not_matched;
2346 break;
2347 }
2348
2349 barray_destroy (&ba);
2350
2351 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2352 {
2353 status = mr_not_matched;
2354 break;
2355 }
2356
2357 status = mr_matched;
2358 ind += len;
2359 }
2360 else
2361 {
2362 status = mr_matched;
2363 for (i = 0; status == mr_matched && i < len; i++)
2364 if (text[ind + i] != sp->m_string[i])
2365 status = mr_not_matched;
2366
2367 if (status == mr_matched)
2368 ind += len;
2369 }
2370 break;
2371 case st_byte:
2372 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2373 if (status == mr_matched)
2374 ind++;
2375 break;
2376 case st_byte_range:
2377 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2378 mr_matched : mr_not_matched;
2379 if (status == mr_matched)
2380 ind++;
2381 break;
2382 case st_true:
2383 status = mr_matched;
2384 break;
2385 case st_false:
2386 status = mr_not_matched;
2387 break;
2388 case st_debug:
2389 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2390 break;
2391 case st_identifier_loop:
2392 barray_create (&array);
2393 if (array == NULL)
2394 {
2395 free_regbyte_ctx_stack (ctx, *rbc);
2396 return mr_internal_error;
2397 }
2398
2399 status = mr_dont_emit;
2400 for (;;)
2401 {
2402 match_result result;
2403
2404 save_ind = ind;
2405 result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2406
2407 if (result == mr_error_raised)
2408 {
2409 status = result;
2410 break;
2411 }
2412 else if (result == mr_matched)
2413 {
2414 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2415 barray_append (ba, &array))
2416 {
2417 free_regbyte_ctx_stack (ctx, *rbc);
2418 barray_destroy (&array);
2419 return mr_internal_error;
2420 }
2421 barray_destroy (&array);
2422 barray_create (&array);
2423 if (array == NULL)
2424 {
2425 free_regbyte_ctx_stack (ctx, *rbc);
2426 return mr_internal_error;
2427 }
2428 }
2429 else if (result == mr_internal_error)
2430 {
2431 free_regbyte_ctx_stack (ctx, *rbc);
2432 barray_destroy (&array);
2433 return mr_internal_error;
2434 }
2435 else
2436 break;
2437 }
2438 break;
2439 }
2440 }
2441 else
2442 {
2443 status = mr_not_matched;
2444 }
2445
2446 if (status == mr_error_raised)
2447 {
2448 free_regbyte_ctx_stack (ctx, *rbc);
2449 barray_destroy (&array);
2450
2451 return mr_error_raised;
2452 }
2453
2454 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2455 {
2456 free_regbyte_ctx_stack (ctx, *rbc);
2457 barray_destroy (&array);
2458
2459 if (sp->m_errtext)
2460 {
2461 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2462 ind), ind);
2463
2464 return mr_error_raised;
2465 }
2466
2467 return mr_not_matched;
2468 }
2469
2470 if (status == mr_matched)
2471 {
2472 if (sp->m_emits)
2473 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2474 {
2475 free_regbyte_ctx_stack (ctx, *rbc);
2476 barray_destroy (&array);
2477 return mr_internal_error;
2478 }
2479
2480 if (array)
2481 if (barray_append (ba, &array))
2482 {
2483 free_regbyte_ctx_stack (ctx, *rbc);
2484 barray_destroy (&array);
2485 return mr_internal_error;
2486 }
2487 }
2488
2489 barray_destroy (&array);
2490
2491 /* if the rule operator is a logical or, we pick up the first matching specifier */
2492 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2493 {
2494 *index = ind;
2495 *rbc = ctx;
2496 return mr_matched;
2497 }
2498
2499 sp = sp->next;
2500 }
2501
2502 /* everything went fine - all specifiers match up */
2503 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2504 {
2505 *index = ind;
2506 *rbc = ctx;
2507 return mr_matched;
2508 }
2509
2510 free_regbyte_ctx_stack (ctx, *rbc);
2511 return mr_not_matched;
2512 }
2513
2514 static match_result fast_match (dict *di, const byte *text, unsigned int *index, rule *ru, int *_PP, bytepool *_BP,
2515 int filtering_string, regbyte_ctx **rbc)
2516 {
2517 unsigned int ind = *index;
2518 int _P = filtering_string ? 0 : *_PP;
2519 int _P2;
2520 match_result status = mr_not_matched;
2521 spec *sp = ru->m_specs;
2522 regbyte_ctx *ctx = *rbc;
2523
2524 /* for every specifier in the rule */
2525 while (sp)
2526 {
2527 unsigned int i, len, save_ind = ind;
2528
2529 _P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0);
2530 if (bytepool_reserve (_BP, _P2))
2531 {
2532 free_regbyte_ctx_stack (ctx, *rbc);
2533 return mr_internal_error;
2534 }
2535
2536 if (satisfies_condition (sp->m_cond, ctx))
2537 {
2538 switch (sp->m_spec_type)
2539 {
2540 case st_identifier:
2541 status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2542
2543 if (status == mr_internal_error)
2544 {
2545 free_regbyte_ctx_stack (ctx, *rbc);
2546 return mr_internal_error;
2547 }
2548 break;
2549 case st_string:
2550 len = str_length (sp->m_string);
2551
2552 /* prefilter the stream */
2553 if (!filtering_string && di->m_string)
2554 {
2555 unsigned int filter_index = 0;
2556 match_result result;
2557 regbyte_ctx *null_ctx = NULL;
2558
2559 result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx);
2560
2561 if (result == mr_internal_error)
2562 {
2563 free_regbyte_ctx_stack (ctx, *rbc);
2564 return mr_internal_error;
2565 }
2566
2567 if (result != mr_matched)
2568 {
2569 status = mr_not_matched;
2570 break;
2571 }
2572
2573 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2574 {
2575 status = mr_not_matched;
2576 break;
2577 }
2578
2579 status = mr_matched;
2580 ind += len;
2581 }
2582 else
2583 {
2584 status = mr_matched;
2585 for (i = 0; status == mr_matched && i < len; i++)
2586 if (text[ind + i] != sp->m_string[i])
2587 status = mr_not_matched;
2588
2589 if (status == mr_matched)
2590 ind += len;
2591 }
2592 break;
2593 case st_byte:
2594 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2595 if (status == mr_matched)
2596 ind++;
2597 break;
2598 case st_byte_range:
2599 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2600 mr_matched : mr_not_matched;
2601 if (status == mr_matched)
2602 ind++;
2603 break;
2604 case st_true:
2605 status = mr_matched;
2606 break;
2607 case st_false:
2608 status = mr_not_matched;
2609 break;
2610 case st_debug:
2611 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2612 break;
2613 case st_identifier_loop:
2614 status = mr_dont_emit;
2615 for (;;)
2616 {
2617 match_result result;
2618
2619 save_ind = ind;
2620 result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2621
2622 if (result == mr_error_raised)
2623 {
2624 status = result;
2625 break;
2626 }
2627 else if (result == mr_matched)
2628 {
2629 if (!filtering_string)
2630 {
2631 if (sp->m_emits != NULL)
2632 {
2633 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2634 {
2635 free_regbyte_ctx_stack (ctx, *rbc);
2636 return mr_internal_error;
2637 }
2638 }
2639
2640 _P = _P2;
2641 _P2 += sp->m_emits ? emit_size (sp->m_emits) : 0;
2642 if (bytepool_reserve (_BP, _P2))
2643 {
2644 free_regbyte_ctx_stack (ctx, *rbc);
2645 return mr_internal_error;
2646 }
2647 }
2648 }
2649 else if (result == mr_internal_error)
2650 {
2651 free_regbyte_ctx_stack (ctx, *rbc);
2652 return mr_internal_error;
2653 }
2654 else
2655 break;
2656 }
2657 break;
2658 }
2659 }
2660 else
2661 {
2662 status = mr_not_matched;
2663 }
2664
2665 if (status == mr_error_raised)
2666 {
2667 free_regbyte_ctx_stack (ctx, *rbc);
2668
2669 return mr_error_raised;
2670 }
2671
2672 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2673 {
2674 free_regbyte_ctx_stack (ctx, *rbc);
2675
2676 if (sp->m_errtext)
2677 {
2678 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2679 ind), ind);
2680
2681 return mr_error_raised;
2682 }
2683
2684 return mr_not_matched;
2685 }
2686
2687 if (status == mr_matched)
2688 {
2689 if (sp->m_emits != NULL)
2690 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2691 {
2692 free_regbyte_ctx_stack (ctx, *rbc);
2693 return mr_internal_error;
2694 }
2695
2696 _P = _P2;
2697 }
2698
2699 /* if the rule operator is a logical or, we pick up the first matching specifier */
2700 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2701 {
2702 *index = ind;
2703 *rbc = ctx;
2704 if (!filtering_string)
2705 *_PP = _P;
2706 return mr_matched;
2707 }
2708
2709 sp = sp->next;
2710 }
2711
2712 /* everything went fine - all specifiers match up */
2713 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2714 {
2715 *index = ind;
2716 *rbc = ctx;
2717 if (!filtering_string)
2718 *_PP = _P;
2719 return mr_matched;
2720 }
2721
2722 free_regbyte_ctx_stack (ctx, *rbc);
2723 return mr_not_matched;
2724 }
2725
2726 static byte *error_get_token (error *er, dict *di, const byte *text, unsigned int ind)
2727 {
2728 byte *str = NULL;
2729
2730 if (er->m_token)
2731 {
2732 barray *ba;
2733 unsigned int filter_index = 0;
2734 regbyte_ctx *ctx = NULL;
2735
2736 barray_create (&ba);
2737 if (ba != NULL)
2738 {
2739 if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2740 filter_index)
2741 {
2742 str = (byte *) mem_alloc (filter_index + 1);
2743 if (str != NULL)
2744 {
2745 str_copy_n (str, text + ind, filter_index);
2746 str[filter_index] = '\0';
2747 }
2748 }
2749 barray_destroy (&ba);
2750 }
2751 }
2752
2753 return str;
2754 }
2755
2756 typedef struct grammar_load_state_
2757 {
2758 dict *di;
2759 byte *syntax_symbol;
2760 byte *string_symbol;
2761 map_str *maps;
2762 map_byte *mapb;
2763 map_rule *mapr;
2764 } grammar_load_state;
2765
2766 static void grammar_load_state_create (grammar_load_state **gr)
2767 {
2768 *gr = (grammar_load_state *) mem_alloc (sizeof (grammar_load_state));
2769 if (*gr)
2770 {
2771 (**gr).di = NULL;
2772 (**gr).syntax_symbol = NULL;
2773 (**gr).string_symbol = NULL;
2774 (**gr).maps = NULL;
2775 (**gr).mapb = NULL;
2776 (**gr).mapr = NULL;
2777 }
2778 }
2779
2780 static void grammar_load_state_destroy (grammar_load_state **gr)
2781 {
2782 if (*gr)
2783 {
2784 dict_destroy (&(**gr).di);
2785 mem_free ((void **) &(**gr).syntax_symbol);
2786 mem_free ((void **) &(**gr).string_symbol);
2787 map_str_destroy (&(**gr).maps);
2788 map_byte_destroy (&(**gr).mapb);
2789 map_rule_destroy (&(**gr).mapr);
2790 mem_free ((void **) gr);
2791 }
2792 }
2793
2794 /*
2795 the API
2796 */
2797
2798 grammar grammar_load_from_text (const byte *text)
2799 {
2800 grammar_load_state *g = NULL;
2801 grammar id = 0;
2802
2803 clear_last_error ();
2804
2805 grammar_load_state_create (&g);
2806 if (g == NULL)
2807 return 0;
2808
2809 dict_create (&g->di);
2810 if (g->di == NULL)
2811 {
2812 grammar_load_state_destroy (&g);
2813 return 0;
2814 }
2815
2816 eat_spaces (&text);
2817
2818 /* skip ".syntax" keyword */
2819 text += 7;
2820 eat_spaces (&text);
2821
2822 /* retrieve root symbol */
2823 if (get_identifier (&text, &g->syntax_symbol))
2824 {
2825 grammar_load_state_destroy (&g);
2826 return 0;
2827 }
2828 eat_spaces (&text);
2829
2830 /* skip semicolon */
2831 text++;
2832 eat_spaces (&text);
2833
2834 while (*text)
2835 {
2836 byte *symbol = NULL;
2837 int is_dot = *text == '.';
2838
2839 if (is_dot)
2840 text++;
2841
2842 if (get_identifier (&text, &symbol))
2843 {
2844 grammar_load_state_destroy (&g);
2845 return 0;
2846 }
2847 eat_spaces (&text);
2848
2849 /* .emtcode */
2850 if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2851 {
2852 map_byte *ma = NULL;
2853
2854 mem_free ((void **) (void *) &symbol);
2855
2856 if (get_emtcode (&text, &ma))
2857 {
2858 grammar_load_state_destroy (&g);
2859 return 0;
2860 }
2861
2862 map_byte_append (&g->mapb, ma);
2863 }
2864 /* .regbyte */
2865 else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2866 {
2867 map_byte *ma = NULL;
2868
2869 mem_free ((void **) (void *) &symbol);
2870
2871 if (get_regbyte (&text, &ma))
2872 {
2873 grammar_load_state_destroy (&g);
2874 return 0;
2875 }
2876
2877 map_byte_append (&g->di->m_regbytes, ma);
2878 }
2879 /* .errtext */
2880 else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2881 {
2882 map_str *ma = NULL;
2883
2884 mem_free ((void **) (void *) &symbol);
2885
2886 if (get_errtext (&text, &ma))
2887 {
2888 grammar_load_state_destroy (&g);
2889 return 0;
2890 }
2891
2892 map_str_append (&g->maps, ma);
2893 }
2894 /* .string */
2895 else if (is_dot && str_equal (symbol, (byte *) "string"))
2896 {
2897 mem_free ((void **) (void *) &symbol);
2898
2899 if (g->di->m_string != NULL)
2900 {
2901 grammar_load_state_destroy (&g);
2902 return 0;
2903 }
2904
2905 if (get_identifier (&text, &g->string_symbol))
2906 {
2907 grammar_load_state_destroy (&g);
2908 return 0;
2909 }
2910
2911 /* skip semicolon */
2912 eat_spaces (&text);
2913 text++;
2914 eat_spaces (&text);
2915 }
2916 else
2917 {
2918 rule *ru = NULL;
2919 map_rule *ma = NULL;
2920
2921 if (get_rule (&text, &ru, g->maps, g->mapb))
2922 {
2923 grammar_load_state_destroy (&g);
2924 return 0;
2925 }
2926
2927 rule_append (&g->di->m_rulez, ru);
2928
2929 /* if a rule consist of only one specifier, give it an ".and" operator */
2930 if (ru->m_oper == op_none)
2931 ru->m_oper = op_and;
2932
2933 map_rule_create (&ma);
2934 if (ma == NULL)
2935 {
2936 grammar_load_state_destroy (&g);
2937 return 0;
2938 }
2939
2940 ma->key = symbol;
2941 ma->data = ru;
2942 map_rule_append (&g->mapr, ma);
2943 }
2944 }
2945
2946 if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2947 g->di->m_regbytes))
2948 {
2949 grammar_load_state_destroy (&g);
2950 return 0;
2951 }
2952
2953 dict_append (&g_dicts, g->di);
2954 id = g->di->m_id;
2955 g->di = NULL;
2956
2957 grammar_load_state_destroy (&g);
2958
2959 return id;
2960 }
2961
2962 int grammar_set_reg8 (grammar id, const byte *name, byte value)
2963 {
2964 dict *di = NULL;
2965 map_byte *reg = NULL;
2966
2967 clear_last_error ();
2968
2969 dict_find (&g_dicts, id, &di);
2970 if (di == NULL)
2971 {
2972 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2973 return 0;
2974 }
2975
2976 reg = map_byte_locate (&di->m_regbytes, name);
2977 if (reg == NULL)
2978 {
2979 set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
2980 return 0;
2981 }
2982
2983 reg->data = value;
2984 return 1;
2985 }
2986
2987 /*
2988 internal checking function used by both grammar_check and grammar_fast_check functions
2989 */
2990 static int _grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size,
2991 unsigned int estimate_prod_size, int use_fast_path)
2992 {
2993 dict *di = NULL;
2994 unsigned int index = 0;
2995
2996 clear_last_error ();
2997
2998 dict_find (&g_dicts, id, &di);
2999 if (di == NULL)
3000 {
3001 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3002 return 0;
3003 }
3004
3005 *prod = NULL;
3006 *size = 0;
3007
3008 if (use_fast_path)
3009 {
3010 regbyte_ctx *rbc = NULL;
3011 bytepool *bp = NULL;
3012 int _P = 0;
3013
3014 bytepool_create (&bp, estimate_prod_size);
3015 if (bp == NULL)
3016 return 0;
3017
3018 if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched)
3019 {
3020 bytepool_destroy (&bp);
3021 free_regbyte_ctx_stack (rbc, NULL);
3022 return 0;
3023 }
3024
3025 free_regbyte_ctx_stack (rbc, NULL);
3026
3027 *prod = bp->_F;
3028 *size = _P;
3029 bp->_F = NULL;
3030 bytepool_destroy (&bp);
3031 }
3032 else
3033 {
3034 regbyte_ctx *rbc = NULL;
3035 barray *ba = NULL;
3036
3037 barray_create (&ba);
3038 if (ba == NULL)
3039 return 0;
3040
3041 if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
3042 {
3043 barray_destroy (&ba);
3044 free_regbyte_ctx_stack (rbc, NULL);
3045 return 0;
3046 }
3047
3048 free_regbyte_ctx_stack (rbc, NULL);
3049
3050 *prod = (byte *) mem_alloc (ba->len * sizeof (byte));
3051 if (*prod == NULL)
3052 {
3053 barray_destroy (&ba);
3054 return 0;
3055 }
3056
3057 mem_copy (*prod, ba->data, ba->len * sizeof (byte));
3058 *size = ba->len;
3059 barray_destroy (&ba);
3060 }
3061
3062 return 1;
3063 }
3064
3065 int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
3066 {
3067 return _grammar_check (id, text, prod, size, 0, 0);
3068 }
3069
3070 int grammar_fast_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3071 unsigned int estimate_prod_size)
3072 {
3073 return _grammar_check (id, text, prod, size, estimate_prod_size, 1);
3074 }
3075
3076 int grammar_destroy (grammar id)
3077 {
3078 dict **di = &g_dicts;
3079
3080 clear_last_error ();
3081
3082 while (*di != NULL)
3083 {
3084 if ((**di).m_id == id)
3085 {
3086 dict *tmp = *di;
3087 *di = (**di).next;
3088 dict_destroy (&tmp);
3089 return 1;
3090 }
3091
3092 di = &(**di).next;
3093 }
3094
3095 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3096 return 0;
3097 }
3098
3099 static void append_character (const char x, byte *text, int *dots_made, int *len, int size)
3100 {
3101 if (*dots_made == 0)
3102 {
3103 if (*len < size - 1)
3104 {
3105 text[(*len)++] = x;
3106 text[*len] = '\0';
3107 }
3108 else
3109 {
3110 int i;
3111 for (i = 0; i < 3; i++)
3112 if (--(*len) >= 0)
3113 text[*len] = '.';
3114 *dots_made = 1;
3115 }
3116 }
3117 }
3118
3119 void grammar_get_last_error (byte *text, unsigned int size, int *pos)
3120 {
3121 int len = 0, dots_made = 0;
3122 const byte *p = error_message;
3123
3124 *text = '\0';
3125
3126 if (p)
3127 {
3128 while (*p)
3129 {
3130 if (*p == '$')
3131 {
3132 const byte *r = error_param;
3133
3134 while (*r)
3135 {
3136 append_character (*r++, text, &dots_made, &len, (int) size);
3137 }
3138
3139 p++;
3140 }
3141 else
3142 {
3143 append_character (*p++, text, &dots_made, &len, size);
3144 }
3145 }
3146 }
3147
3148 *pos = error_position;
3149 }