Added missing Copyrights.
[mesa.git] / src / mesa / shader / grammar.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.1
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file grammar.c
27 * syntax parsing engine
28 * \author Michal Krol
29 */
30
31 #ifndef GRAMMAR_PORT_BUILD
32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
33 #endif
34
35 /*
36 Last Modified: 2004-II-8
37 */
38
39 /*
40 INTRODUCTION
41 ------------
42
43 The task is to check the syntax of an input string. Input string is a stream of ASCII
44 characters terminated with a null-character ('\0'). Checking it using C language is
45 difficult and hard to implement without bugs. It is hard to maintain and make changes when
46 the syntax changes.
47
48 This is because of a high redundancy of the C code. Large blocks of code are duplicated with
49 only small changes. Even use of macros does not solve the problem because macros cannot
50 erase the complexity of the problem.
51
52 The resolution is to create a new language that will be highly oriented to our task. Once
53 we describe a particular syntax, we are done. We can then focus on the code that implements
54 the language. The size and complexity of it is relatively small than the code that directly
55 checks the syntax.
56
57 First, we must implement our new language. Here, the language is implemented in C, but it
58 could also be implemented in any other language. The code is listed below. We must take
59 a good care that it is bug free. This is simple because the code is simple and clean.
60
61 Next, we must describe the syntax of our new language in itself. Once created and checked
62 manually that it is correct, we can use it to check another scripts.
63
64 Note that our new language loading code does not have to check the syntax. It is because we
65 assume that the script describing itself is correct, and other scripts can be syntactically
66 checked by the former script. The loading code must only do semantic checking which leads us to
67 simple resolving references.
68
69 THE LANGUAGE
70 ------------
71
72 Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
73 sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
74 which is an identifier, and its definition. A definition is in turn a sequence of specifiers
75 connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
76 definition. Specifier can be a symbol, string, character, character range or a special
77 keyword ".true" or ".false".
78
79 On the very beginning of the script there is a declaration of a root symbol and is in the form:
80 .syntax <root_symbol>;
81 The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
82 the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
83 the symbol evaluates to true. Definition evaluation depends on the operator used to connect
84 specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
85 only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
86 true if any of the specifiers evaluates to true. If definition contains only one specifier,
87 it is evaluated as if it was connected with ".true" keyword by ".and" operator.
88
89 If specifier is a ".true" keyword, it always evaluates to true.
90
91 If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
92 when it does not evaluate to true.
93
94 Character range specifier is in the form:
95 '<first_character>' - '<second_character>'
96 If specifier is a character range, it evaluates to true if character in the stream is greater
97 or equal to <first_character> and less or equal to <second_character>. In that situation
98 the stream pointer is advanced to point to next character in the stream. All C-style escape
99 sequences are supported although trigraph sequences are not. The comparisions are performed
100 on 8-bit unsigned integers.
101
102 Character specifier is in the form:
103 '<single_character>'
104 It evaluates to true if the following character range specifier evaluates to true:
105 '<single_character>' - '<single_character>'
106
107 String specifier is in the form:
108 "<string>"
109 Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
110 <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
111 the following character specifier evaluates to true:
112 '<string>[i]'
113 If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
114
115 Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
116 .loop <symbol> (1)
117 where <symbol> is defined as follows:
118 <symbol> <definition>; (2)
119 Construction (1) is replaced by the following code:
120 <symbol$1>
121 and declaration (2) is replaced by the following:
122 <symbol$1> <symbol$2> .or .true;
123 <symbol$2> <symbol> .and <symbol$1>;
124 <symbol> <definition>;
125
126 Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
127 registers that can be accessed in the syn body. Each reg has its name and a default value.
128 The register is one byte wide. The C code can change the default value by calling
129 grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
130 a sequence of specifiers joined with .and or .or operator. And now each specifier can be
131 prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
132 where <operator> can be == or !=. If the condition evaluates to false, the specifier
133 evaluates to .false. Otherwise it evalutes to the specifier.
134
135 ESCAPE SEQUENCES
136 ----------------
137
138 Synek supports all escape sequences in character specifiers. The mapping table is listed below.
139 All occurences of the characters in the first column are replaced with the corresponding
140 character in the second column.
141
142 Escape sequence Represents
143 ------------------------------------------------------------------------------------------------
144 \a Bell (alert)
145 \b Backspace
146 \f Formfeed
147 \n New line
148 \r Carriage return
149 \t Horizontal tab
150 \v Vertical tab
151 \' Single quotation mark
152 \" Double quotation mark
153 \\ Backslash
154 \? Literal question mark
155 \ooo ASCII character in octal notation
156 \xhhh ASCII character in hexadecimal notation
157 ------------------------------------------------------------------------------------------------
158
159 RAISING ERRORS
160 --------------
161
162 Any specifier can be followed by a special construction that is executed when the specifier
163 evaluates to false. The construction is in the form:
164 .error <ERROR_TEXT>
165 <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
166 in the form:
167 .errtext <ERROR_TEXT> "<error_desc>"
168 When specifier evaluates to false and this construction is present, parsing is stopped
169 immediately and <error_desc> is returned as a result of parsing. The error position is also
170 returned and it is meant as an offset from the beggining of the stream to the character that
171 was valid so far. Example:
172
173 (**** syntax script ****)
174
175 .syntax program;
176 .errtext MISSING_SEMICOLON "missing ';'"
177 program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
178 .loop space .and '\0';
179 declaration "declare" .and .loop space .and identifier;
180 space ' ';
181
182 (**** sample code ****)
183
184 declare foo ,
185
186 In the example above checking the sample code will result in error message "missing ';'" and
187 error position 12. The sample code is not correct. Note the presence of '\0' specifier to
188 assure that there is no code after semicolon - only spaces.
189 <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
190 the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
191 the identifier name. The starting position is the error position. The lenght of the resulting
192 string is the position after invoking the symbol.
193
194 PRODUCTION
195 ----------
196
197 Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
198 that evaluate to true. That is, every specifier and optional error construction can be followed
199 by a number of emit constructions that are in the form:
200 .emit <parameter>
201 <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
202 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
203 in the form:
204 .emtcode <identifier> <hex_number>
205
206 When given specifier evaluates to true, all emits associated with the specifier are output
207 in order they were declared. A star means that last-read character should be output instead
208 of constant value. Example:
209
210 (**** syntax script ****)
211
212 .syntax foobar;
213 .emtcode WORD_FOO 0x01
214 .emtcode WORD_BAR 0x02
215 foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
216 FOO "foo" .and SPACE;
217 BAR "bar" .and SPACE;
218 SPACE ' ' .or '\0';
219
220 (**** sample text 1 ****)
221
222 foo
223
224 (**** sample text 2 ****)
225
226 foobar
227
228 For both samples the result will be one-element array. For first sample text it will be
229 value 1, for second - 0. Note that every text will be accepted because of presence of
230 .true as an alternative.
231
232 Another example:
233
234 (**** syntax script ****)
235
236 .syntax declaration;
237 .emtcode VARIABLE 0x01
238 declaration "declare" .and .loop space .and
239 identifier .emit VARIABLE .and (1)
240 .true .emit 0x00 .and (2)
241 .loop space .and ';';
242 space ' ' .or '\t';
243 identifier .loop id_char .emit *; (3)
244 id_char 'a'-'z' .or 'A'-'Z' .or '_';
245
246 (**** sample code ****)
247
248 declare fubar;
249
250 In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
251 true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
252 to terminate the string with null to signal when the string ends. Specifier (3) outputs
253 all characters that make declared identifier. The result of sample code will be the
254 following array:
255 { 1, 'f', 'u', 'b', 'a', 'r', 0 }
256
257 If .emit is followed by dollar $, it means that current position should be output. Current
258 position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
259 first character consumed by the specifier associated with the .emit instruction. Current
260 position is stored in the output buffer in Little-Endian convention (the lowest byte comes
261 first).
262 */
263
264 static void mem_free (void **);
265
266 /*
267 internal error messages
268 */
269 static const byte *OUT_OF_MEMORY = (byte *) "internal error 1001: out of physical memory";
270 static const byte *UNRESOLVED_REFERENCE = (byte *) "internal error 1002: unresolved reference '$'";
271 static const byte *INVALID_GRAMMAR_ID = (byte *) "internal error 1003: invalid grammar object";
272 static const byte *INVALID_REGISTER_NAME = (byte *) "internal error 1004: invalid register name: '$'";
273
274 static const byte *error_message = NULL;
275 static byte *error_param = NULL; /* this is inserted into error_message in place of $ */
276 static int error_position = -1;
277
278 static byte *unknown = (byte *) "???";
279
280 static void clear_last_error ()
281 {
282 /* reset error message */
283 error_message = NULL;
284
285 /* free error parameter - if error_param is a "???" don't free it - it's static */
286 if (error_param != unknown)
287 mem_free ((void **) &error_param);
288 else
289 error_param = NULL;
290
291 /* reset error position */
292 error_position = -1;
293 }
294
295 static void set_last_error (const byte *msg, byte *param, int pos)
296 {
297 /* error message can only be set only once */
298 if (error_message != NULL)
299 {
300 mem_free (&param);
301 return;
302 }
303
304 error_message = msg;
305
306 if (param != NULL)
307 error_param = param;
308 else
309 error_param = unknown;
310
311 error_position = pos;
312 }
313
314 /*
315 memory management routines
316 */
317 static void *mem_alloc (size_t size)
318 {
319 void *ptr = grammar_alloc_malloc (size);
320 if (ptr == NULL)
321 set_last_error (OUT_OF_MEMORY, NULL, -1);
322 return ptr;
323 }
324
325 static void *mem_copy (void *dst, const void *src, size_t size)
326 {
327 return grammar_memory_copy (dst, src, size);
328 }
329
330 static void mem_free (void **ptr)
331 {
332 grammar_alloc_free (*ptr);
333 *ptr = NULL;
334 }
335
336 static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
337 {
338 void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
339 if (ptr2 == NULL)
340 set_last_error (OUT_OF_MEMORY, NULL, -1);
341 return ptr2;
342 }
343
344 static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
345 {
346 return grammar_string_copy_n (dst, src, max_len);
347 }
348
349 static byte *str_duplicate (const byte *str)
350 {
351 byte *new_str = grammar_string_duplicate (str);
352 if (new_str == NULL)
353 set_last_error (OUT_OF_MEMORY, NULL, -1);
354 return new_str;
355 }
356
357 static int str_equal (const byte *str1, const byte *str2)
358 {
359 return grammar_string_compare (str1, str2) == 0;
360 }
361
362 static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
363 {
364 return grammar_string_compare_n (str1, str2, n) == 0;
365 }
366
367 static unsigned int str_length (const byte *str)
368 {
369 return grammar_string_length (str);
370 }
371
372 /*
373 string to byte map typedef
374 */
375 typedef struct map_byte_
376 {
377 byte *key;
378 byte data;
379 struct map_byte_ *next;
380 } map_byte;
381
382 static void map_byte_create (map_byte **ma)
383 {
384 *ma = mem_alloc (sizeof (map_byte));
385 if (*ma)
386 {
387 (**ma).key = NULL;
388 (**ma).data = '\0';
389 (**ma).next = NULL;
390 }
391 }
392
393 /* XXX unfold the recursion */
394 static void map_byte_destroy (map_byte **ma)
395 {
396 if (*ma)
397 {
398 map_byte_destroy (&(**ma).next);
399 mem_free ((void **) &(**ma).key);
400 mem_free ((void **) ma);
401 }
402 }
403
404 static void map_byte_append (map_byte **ma, map_byte **nm)
405 {
406 while (*ma)
407 ma = &(**ma).next;
408 *ma = *nm;
409 }
410
411 /*
412 searches the map for the specified key,
413 returns pointer to the element with the specified key if it exists
414 returns NULL otherwise
415 */
416 map_byte *map_byte_locate (map_byte **ma, const byte *key)
417 {
418 while (*ma)
419 {
420 if (str_equal ((**ma).key, key))
421 return *ma;
422
423 ma = &(**ma).next;
424 }
425
426 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
427 return NULL;
428 }
429
430 /*
431 searches the map for specified key,
432 if the key is matched, *data is filled with data associated with the key,
433 returns 0 if the key is matched,
434 returns 1 otherwise
435 */
436 static int map_byte_find (map_byte **ma, const byte *key, byte *data)
437 {
438 map_byte *found = map_byte_locate (ma, key);
439 if (found != NULL)
440 {
441 *data = found->data;
442
443 return 0;
444 }
445
446 return 1;
447 }
448
449 /*
450 regbyte context typedef
451
452 Each regbyte consists of its name and a default value. These are static and created at
453 grammar script compile-time, for example the following line:
454 .regbyte vertex_blend 0x00
455 adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
456 When the script is executed, this regbyte can be accessed by name for read and write. When a
457 particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
458 stack. The new entry contains information abot which regbyte it references and its new value.
459 When a given regbyte is accessed for read, the stack is searched top-down to find an
460 entry that references the regbyte. The first matching entry is used to return the current
461 value it holds. If no entry is found, the default value is returned.
462 */
463 typedef struct regbyte_ctx_
464 {
465 map_byte *m_regbyte;
466 byte m_current_value;
467 struct regbyte_ctx_ *m_prev;
468 } regbyte_ctx;
469
470 static void regbyte_ctx_create (regbyte_ctx **re)
471 {
472 *re = mem_alloc (sizeof (regbyte_ctx));
473 if (*re)
474 {
475 (**re).m_regbyte = NULL;
476 (**re).m_prev = NULL;
477 }
478 }
479
480 static void regbyte_ctx_destroy (regbyte_ctx **re)
481 {
482 if (*re)
483 {
484 mem_free ((void **) re);
485 }
486 }
487
488 static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
489 {
490 /* first lookup in the register stack */
491 while (*re != NULL)
492 {
493 if ((**re).m_regbyte == reg)
494 return (**re).m_current_value;
495
496 re = &(**re).m_prev;
497 }
498
499 /* if not found - return the default value */
500 return reg->data;
501 }
502
503 /*
504 emit type typedef
505 */
506 typedef enum emit_type_
507 {
508 et_byte, /* explicit number */
509 et_stream, /* eaten character */
510 et_position /* current position */
511 } emit_type;
512
513 /*
514 emit destination typedef
515 */
516 typedef enum emit_dest_
517 {
518 ed_output, /* write to the output buffer */
519 ed_regbyte /* write a particular regbyte */
520 } emit_dest;
521
522 /*
523 emit typedef
524 */
525 typedef struct emit_
526 {
527 emit_dest m_emit_dest;
528 emit_type m_emit_type; /* ed_output */
529 byte m_byte; /* et_byte */
530 map_byte *m_regbyte; /* ed_regbyte */
531 byte *m_regname; /* ed_regbyte - temporary */
532 struct emit_ *m_next;
533 } emit;
534
535 static void emit_create (emit **em)
536 {
537 *em = mem_alloc (sizeof (emit));
538 if (*em)
539 {
540 (**em).m_emit_dest = ed_output;
541 (**em).m_emit_type = et_byte;
542 (**em).m_byte = '\0';
543 (**em).m_regbyte = NULL;
544 (**em).m_regname = NULL;
545 (**em).m_next = NULL;
546 }
547 }
548
549 static void emit_destroy (emit **em)
550 {
551 if (*em)
552 {
553 emit_destroy (&(**em).m_next);
554 mem_free ((void **) &(**em).m_regname);
555 mem_free ((void **) em);
556 }
557 }
558
559 /*
560 error typedef
561 */
562 typedef struct error_
563 {
564 byte *m_text;
565 byte *m_token_name;
566 struct rule_ *m_token;
567 } error;
568
569 static void error_create (error **er)
570 {
571 *er = mem_alloc (sizeof (error));
572 if (*er)
573 {
574 (**er).m_text = NULL;
575 (**er).m_token_name = NULL;
576 (**er).m_token = NULL;
577 }
578 }
579
580 static void error_destroy (error **er)
581 {
582 if (*er)
583 {
584 mem_free ((void **) &(**er).m_text);
585 mem_free ((void **) &(**er).m_token_name);
586 mem_free ((void **) er);
587 }
588 }
589
590 struct dict_;
591 static byte *error_get_token (error *, struct dict_ *, const byte *, unsigned int);
592
593 /*
594 condition operand type typedef
595 */
596 typedef enum cond_oper_type_
597 {
598 cot_byte, /* constant 8-bit unsigned integer */
599 cot_regbyte /* pointer to byte register containing the current value */
600 } cond_oper_type;
601
602 /*
603 condition operand typedef
604 */
605 typedef struct cond_oper_
606 {
607 cond_oper_type m_type;
608 byte m_byte; /* cot_byte */
609 map_byte *m_regbyte; /* cot_regbyte */
610 byte *m_regname; /* cot_regbyte - temporary */
611 } cond_oper;
612
613 /*
614 condition type typedef
615 */
616 typedef enum cond_type_
617 {
618 ct_equal,
619 ct_not_equal
620 } cond_type;
621
622 /*
623 condition typedef
624 */
625 typedef struct cond_
626 {
627 cond_type m_type;
628 cond_oper m_operands[2];
629 } cond;
630
631 static void cond_create (cond **co)
632 {
633 *co = mem_alloc (sizeof (cond));
634 if (*co)
635 {
636 (**co).m_operands[0].m_regname = NULL;
637 (**co).m_operands[1].m_regname = NULL;
638 }
639 }
640
641 static void cond_destroy (cond **co)
642 {
643 if (*co)
644 {
645 mem_free ((void **) &(**co).m_operands[0].m_regname);
646 mem_free ((void **) &(**co).m_operands[1].m_regname);
647 mem_free ((void **) co);
648 }
649 }
650
651 /*
652 specifier type typedef
653 */
654 typedef enum spec_type_
655 {
656 st_false,
657 st_true,
658 st_byte,
659 st_byte_range,
660 st_string,
661 st_identifier,
662 st_identifier_loop,
663 st_debug
664 } spec_type;
665
666 /*
667 specifier typedef
668 */
669 typedef struct spec_
670 {
671 spec_type m_spec_type;
672 byte m_byte[2]; /* st_byte, st_byte_range */
673 byte *m_string; /* st_string */
674 struct rule_ *m_rule; /* st_identifier, st_identifier_loop */
675 emit *m_emits;
676 error *m_errtext;
677 cond *m_cond;
678 struct spec_ *m_next;
679 } spec;
680
681 static void spec_create (spec **sp)
682 {
683 *sp = mem_alloc (sizeof (spec));
684 if (*sp)
685 {
686 (**sp).m_spec_type = st_false;
687 (**sp).m_byte[0] = '\0';
688 (**sp).m_byte[1] = '\0';
689 (**sp).m_string = NULL;
690 (**sp).m_rule = NULL;
691 (**sp).m_emits = NULL;
692 (**sp).m_errtext = NULL;
693 (**sp).m_cond = NULL;
694 (**sp).m_next = NULL;
695 }
696 }
697
698 static void spec_destroy (spec **sp)
699 {
700 if (*sp)
701 {
702 spec_destroy (&(**sp).m_next);
703 emit_destroy (&(**sp).m_emits);
704 error_destroy (&(**sp).m_errtext);
705 mem_free ((void **) &(**sp).m_string);
706 cond_destroy (&(**sp).m_cond);
707 mem_free ((void **) sp);
708 }
709 }
710
711 static void spec_append (spec **sp, spec **ns)
712 {
713 while (*sp)
714 sp = &(**sp).m_next;
715 *sp = *ns;
716 }
717
718 /*
719 operator typedef
720 */
721 typedef enum oper_
722 {
723 op_none,
724 op_and,
725 op_or
726 } oper;
727
728 /*
729 rule typedef
730 */
731 typedef struct rule_
732 {
733 oper m_oper;
734 spec *m_specs;
735 struct rule_ *m_next;
736 /* int m_referenced; */ /* for debugging purposes */
737 } rule;
738
739 static void rule_create (rule **ru)
740 {
741 *ru = mem_alloc (sizeof (rule));
742 if (*ru)
743 {
744 (**ru).m_oper = op_none;
745 (**ru).m_specs = NULL;
746 (**ru).m_next = NULL;
747 /* (**ru).m_referenced = 0; */
748 }
749 }
750
751 static void rule_destroy (rule **ru)
752 {
753 if (*ru)
754 {
755 rule_destroy (&(**ru).m_next);
756 spec_destroy (&(**ru).m_specs);
757 mem_free ((void **) ru);
758 }
759 }
760
761 static void rule_append (rule **ru, rule **nr)
762 {
763 while (*ru)
764 ru = &(**ru).m_next;
765 *ru = *nr;
766 }
767
768 /*
769 returns unique grammar id
770 */
771 static grammar next_valid_grammar_id ()
772 {
773 static grammar id = 0;
774
775 return ++id;
776 }
777
778 /*
779 dictionary typedef
780 */
781 typedef struct dict_
782 {
783 rule *m_rulez;
784 rule *m_syntax;
785 rule *m_string;
786 map_byte *m_regbytes;
787 grammar m_id;
788 struct dict_ *m_next;
789 } dict;
790
791 static void dict_create (dict **di)
792 {
793 *di = mem_alloc (sizeof (dict));
794 if (*di)
795 {
796 (**di).m_rulez = NULL;
797 (**di).m_syntax = NULL;
798 (**di).m_string = NULL;
799 (**di).m_regbytes = NULL;
800 (**di).m_id = next_valid_grammar_id ();
801 (**di).m_next = NULL;
802 }
803 }
804
805 static void dict_destroy (dict **di)
806 {
807 if (*di)
808 {
809 rule_destroy (&(**di).m_rulez);
810 map_byte_destroy (&(**di).m_regbytes);
811 mem_free ((void **) di);
812 }
813 }
814
815 static void dict_append (dict **di, dict **nd)
816 {
817 while (*di)
818 di = &(**di).m_next;
819 *di = *nd;
820 }
821
822 static void dict_find (dict **di, grammar key, dict **data)
823 {
824 while (*di)
825 {
826 if ((**di).m_id == key)
827 {
828 *data = *di;
829 return;
830 }
831
832 di = &(**di).m_next;
833 }
834
835 *data = NULL;
836 }
837
838 static dict *g_dicts = NULL;
839
840 /*
841 byte array typedef
842
843 XXX this class is going to be replaced by a faster one, soon
844 */
845 typedef struct barray_
846 {
847 byte *data;
848 unsigned int len;
849 } barray;
850
851 static void barray_create (barray **ba)
852 {
853 *ba = mem_alloc (sizeof (barray));
854 if (*ba)
855 {
856 (**ba).data = NULL;
857 (**ba).len = 0;
858 }
859 }
860
861 static void barray_destroy (barray **ba)
862 {
863 if (*ba)
864 {
865 mem_free ((void **) &(**ba).data);
866 mem_free ((void **) ba);
867 }
868 }
869
870 /*
871 reallocates byte array to requested size,
872 returns 0 on success,
873 returns 1 otherwise
874 */
875 static int barray_resize (barray **ba, unsigned int nlen)
876 {
877 byte *new_pointer;
878
879 if (nlen == 0)
880 {
881 mem_free ((void **) &(**ba).data);
882 (**ba).data = NULL;
883 (**ba).len = 0;
884
885 return 0;
886 }
887 else
888 {
889 new_pointer = mem_realloc ((**ba).data, (**ba).len * sizeof (byte), nlen * sizeof (byte));
890 if (new_pointer)
891 {
892 (**ba).data = new_pointer;
893 (**ba).len = nlen;
894
895 return 0;
896 }
897 }
898
899 return 1;
900 }
901
902 /*
903 adds byte array pointed by *nb to the end of array pointed by *ba,
904 returns 0 on success,
905 returns 1 otherwise
906 */
907 static int barray_append (barray **ba, barray **nb)
908 {
909 const unsigned int len = (**ba).len;
910
911 if (barray_resize (ba, (**ba).len + (**nb).len))
912 return 1;
913
914 mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
915
916 return 0;
917 }
918
919 /*
920 adds emit chain pointed by em to the end of array pointed by *ba,
921 returns 0 on success,
922 returns 1 otherwise
923 */
924 static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
925 {
926 emit *temp = em;
927 unsigned int count = 0;
928
929 while (temp)
930 {
931 if (temp->m_emit_dest == ed_output)
932 if (temp->m_emit_type == et_position)
933 count += 4; /* position is a 32-bit unsigned integer */
934 else
935 count++;
936
937 temp = temp->m_next;
938 }
939
940 if (barray_resize (ba, (**ba).len + count))
941 return 1;
942
943 while (em)
944 {
945 if (em->m_emit_dest == ed_output)
946 {
947 if (em->m_emit_type == et_byte)
948 (**ba).data[(**ba).len - count--] = em->m_byte;
949 else if (em->m_emit_type == et_stream)
950 (**ba).data[(**ba).len - count--] = c;
951 else // em->type == et_position
952 (**ba).data[(**ba).len - count--] = (byte) pos,
953 (**ba).data[(**ba).len - count--] = (byte) (pos >> 8),
954 (**ba).data[(**ba).len - count--] = (byte) (pos >> 16),
955 (**ba).data[(**ba).len - count--] = (byte) (pos >> 24);
956 }
957 else
958 {
959 regbyte_ctx *new_rbc;
960 regbyte_ctx_create (&new_rbc);
961 if (new_rbc == NULL)
962 return 1;
963
964 new_rbc->m_prev = *rbc;
965 new_rbc->m_regbyte = em->m_regbyte;
966 *rbc = new_rbc;
967
968 if (em->m_emit_type == et_byte)
969 new_rbc->m_current_value = em->m_byte;
970 else if (em->m_emit_type == et_stream)
971 new_rbc->m_current_value = c;
972 }
973
974 em = em->m_next;
975 }
976
977 return 0;
978 }
979
980 /*
981 string to string map typedef
982 */
983 typedef struct map_str_
984 {
985 byte *key;
986 byte *data;
987 struct map_str_ *next;
988 } map_str;
989
990 static void map_str_create (map_str **ma)
991 {
992 *ma = mem_alloc (sizeof (map_str));
993 if (*ma)
994 {
995 (**ma).key = NULL;
996 (**ma).data = NULL;
997 (**ma).next = NULL;
998 }
999 }
1000
1001 static void map_str_destroy (map_str **ma)
1002 {
1003 if (*ma)
1004 {
1005 map_str_destroy (&(**ma).next);
1006 mem_free ((void **) &(**ma).key);
1007 mem_free ((void **) &(**ma).data);
1008 mem_free ((void **) ma);
1009 }
1010 }
1011
1012 static void map_str_append (map_str **ma, map_str **nm)
1013 {
1014 while (*ma)
1015 ma = &(**ma).next;
1016 *ma = *nm;
1017 }
1018
1019 /*
1020 searches the map for specified key,
1021 if the key is matched, *data is filled with data associated with the key,
1022 returns 0 if the key is matched,
1023 returns 1 otherwise
1024 */
1025 static int map_str_find (map_str **ma, const byte *key, byte **data)
1026 {
1027 while (*ma)
1028 {
1029 if (str_equal ((**ma).key, key))
1030 {
1031 *data = str_duplicate ((**ma).data);
1032 if (*data == NULL)
1033 return 1;
1034
1035 return 0;
1036 }
1037
1038 ma = &(**ma).next;
1039 }
1040
1041 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1042 return 1;
1043 }
1044
1045 /*
1046 string to rule map typedef
1047 */
1048 typedef struct map_rule_
1049 {
1050 byte *key;
1051 rule *data;
1052 struct map_rule_ *next;
1053 } map_rule;
1054
1055 static void map_rule_create (map_rule **ma)
1056 {
1057 *ma = mem_alloc (sizeof (map_rule));
1058 if (*ma)
1059 {
1060 (**ma).key = NULL;
1061 (**ma).data = NULL;
1062 (**ma).next = NULL;
1063 }
1064 }
1065
1066 static void map_rule_destroy (map_rule **ma)
1067 {
1068 if (*ma)
1069 {
1070 map_rule_destroy (&(**ma).next);
1071 mem_free ((void **) &(**ma).key);
1072 mem_free ((void **) ma);
1073 }
1074 }
1075
1076 static void map_rule_append (map_rule **ma, map_rule **nm)
1077 {
1078 while (*ma)
1079 ma = &(**ma).next;
1080 *ma = *nm;
1081 }
1082
1083 /*
1084 searches the map for specified key,
1085 if the key is matched, *data is filled with data associated with the key,
1086 returns 0 if the is matched,
1087 returns 1 otherwise
1088 */
1089 static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1090 {
1091 while (*ma)
1092 {
1093 if (str_equal ((**ma).key, key))
1094 {
1095 *data = (**ma).data;
1096
1097 return 0;
1098 }
1099
1100 ma = &(**ma).next;
1101 }
1102
1103 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1104 return 1;
1105 }
1106
1107 /*
1108 returns 1 if given character is a white space,
1109 returns 0 otherwise
1110 */
1111 static int is_space (byte c)
1112 {
1113 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1114 }
1115
1116 /*
1117 advances text pointer by 1 if character pointed by *text is a space,
1118 returns 1 if a space has been eaten,
1119 returns 0 otherwise
1120 */
1121 static int eat_space (const byte **text)
1122 {
1123 if (is_space (**text))
1124 {
1125 (*text)++;
1126
1127 return 1;
1128 }
1129
1130 return 0;
1131 }
1132
1133 /*
1134 returns 1 if text points to C-style comment start string "/*",
1135 returns 0 otherwise
1136 */
1137 static int is_comment_start (const byte *text)
1138 {
1139 return text[0] == '/' && text[1] == '*';
1140 }
1141
1142 /*
1143 advances text pointer to first character after C-style comment block - if any,
1144 returns 1 if C-style comment block has been encountered and eaten,
1145 returns 0 otherwise
1146 */
1147 static int eat_comment (const byte **text)
1148 {
1149 if (is_comment_start (*text))
1150 {
1151 /* *text points to comment block - skip two characters to enter comment body */
1152 *text += 2;
1153 /* skip any character except consecutive '*' and '/' */
1154 while (!((*text)[0] == '*' && (*text)[1] == '/'))
1155 (*text)++;
1156 /* skip those two terminating characters */
1157 *text += 2;
1158
1159 return 1;
1160 }
1161
1162 return 0;
1163 }
1164
1165 /*
1166 advances text pointer to first character that is neither space nor C-style comment block
1167 */
1168 static void eat_spaces (const byte **text)
1169 {
1170 while (eat_space (text) || eat_comment (text))
1171 ;
1172 }
1173
1174 /*
1175 resizes string pointed by *ptr to successfully add character c to the end of the string,
1176 returns 0 on success,
1177 returns 1 otherwise
1178 */
1179 static int string_grow (byte **ptr, unsigned int *len, byte c)
1180 {
1181 /* reallocate the string in 16-byte increments */
1182 if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1183 {
1184 byte *tmp = mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
1185 ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1186 if (tmp == NULL)
1187 return 1;
1188
1189 *ptr = tmp;
1190 }
1191
1192 if (c)
1193 {
1194 /* append given character */
1195 (*ptr)[*len] = c;
1196 (*len)++;
1197 }
1198 (*ptr)[*len] = '\0';
1199
1200 return 0;
1201 }
1202
1203 /*
1204 returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1205 returns 0 otherwise
1206 */
1207 static int is_identifier (byte c)
1208 {
1209 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1210 }
1211
1212 /*
1213 copies characters from *text to *id until non-identifier character is encountered,
1214 assumes that *id points to NULL object - caller is responsible for later freeing the string,
1215 text pointer is advanced to point past the copied identifier,
1216 returns 0 if identifier was successfully copied,
1217 returns 1 otherwise
1218 */
1219 static int get_identifier (const byte **text, byte **id)
1220 {
1221 const byte *t = *text;
1222 byte *p = NULL;
1223 unsigned int len = 0;
1224
1225 if (string_grow (&p, &len, '\0'))
1226 return 1;
1227
1228 /* loop while next character in buffer is valid for identifiers */
1229 while (is_identifier (*t))
1230 {
1231 if (string_grow (&p, &len, *t++))
1232 {
1233 mem_free ((void **) &p);
1234 return 1;
1235 }
1236 }
1237
1238 *text = t;
1239 *id = p;
1240
1241 return 0;
1242 }
1243
1244 /*
1245 returns 1 if given character is HEX digit 0-9, A-F or a-f,
1246 returns 0 otherwise
1247 */
1248 static int is_hex (byte c)
1249 {
1250 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1251 }
1252
1253 /*
1254 returns value of passed character as if it was HEX digit
1255 */
1256 static unsigned int hex2dec (byte c)
1257 {
1258 if (c >= '0' && c <= '9')
1259 return c - '0';
1260 if (c >= 'A' && c <= 'F')
1261 return c - 'A' + 10;
1262 return c - 'a' + 10;
1263 }
1264
1265 /*
1266 converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1267 advances text pointer past the converted sequence,
1268 returns the converted value
1269 */
1270 static unsigned int hex_convert (const byte **text)
1271 {
1272 unsigned int value = 0;
1273
1274 while (is_hex (**text))
1275 {
1276 value = value * 0x10 + hex2dec (**text);
1277 (*text)++;
1278 }
1279
1280 return value;
1281 }
1282
1283 /*
1284 returns 1 if given character is OCT digit 0-7,
1285 returns 0 otherwise
1286 */
1287 static int is_oct (byte c)
1288 {
1289 return c >= '0' && c <= '7';
1290 }
1291
1292 /*
1293 returns value of passed character as if it was OCT digit
1294 */
1295 static int oct2dec (byte c)
1296 {
1297 return c - '0';
1298 }
1299
1300 static byte get_escape_sequence (const byte **text)
1301 {
1302 int value = 0;
1303
1304 /* skip '\' character */
1305 (*text)++;
1306
1307 switch (*(*text)++)
1308 {
1309 case '\'':
1310 return '\'';
1311 case '"':
1312 return '\"';
1313 case '?':
1314 return '\?';
1315 case '\\':
1316 return '\\';
1317 case 'a':
1318 return '\a';
1319 case 'b':
1320 return '\b';
1321 case 'f':
1322 return '\f';
1323 case 'n':
1324 return '\n';
1325 case 'r':
1326 return '\r';
1327 case 't':
1328 return '\t';
1329 case 'v':
1330 return '\v';
1331 case 'x':
1332 return (byte) hex_convert (text);
1333 }
1334
1335 (*text)--;
1336 if (is_oct (**text))
1337 {
1338 value = oct2dec (*(*text)++);
1339 if (is_oct (**text))
1340 {
1341 value = value * 010 + oct2dec (*(*text)++);
1342 if (is_oct (**text))
1343 value = value * 010 + oct2dec (*(*text)++);
1344 }
1345 }
1346
1347 return (byte) value;
1348 }
1349
1350 /*
1351 copies characters from *text to *str until " or ' character is encountered,
1352 assumes that *str points to NULL object - caller is responsible for later freeing the string,
1353 assumes that *text points to " or ' character that starts the string,
1354 text pointer is advanced to point past the " or ' character,
1355 returns 0 if string was successfully copied,
1356 returns 1 otherwise
1357 */
1358 static int get_string (const byte **text, byte **str)
1359 {
1360 const byte *t = *text;
1361 byte *p = NULL;
1362 unsigned int len = 0;
1363 byte term_char;
1364
1365 if (string_grow (&p, &len, '\0'))
1366 return 1;
1367
1368 /* read " or ' character that starts the string */
1369 term_char = *t++;
1370 /* while next character is not the terminating character */
1371 while (*t && *t != term_char)
1372 {
1373 byte c;
1374
1375 if (*t == '\\')
1376 c = get_escape_sequence (&t);
1377 else
1378 c = *t++;
1379
1380 if (string_grow (&p, &len, c))
1381 {
1382 mem_free ((void **) &p);
1383 return 1;
1384 }
1385 }
1386 /* skip " or ' character that ends the string */
1387 t++;
1388
1389 *text = t;
1390 *str = p;
1391 return 0;
1392 }
1393
1394 /*
1395 gets emit code, the syntax is: ".emtcode" " " <symbol> " " ("0x" | "0X") <hex_value>
1396 assumes that *text already points to <symbol>,
1397 returns 0 if emit code is successfully read,
1398 returns 1 otherwise
1399 */
1400 static int get_emtcode (const byte **text, map_byte **ma)
1401 {
1402 const byte *t = *text;
1403 map_byte *m = NULL;
1404
1405 map_byte_create (&m);
1406 if (m == NULL)
1407 return 1;
1408
1409 if (get_identifier (&t, &m->key))
1410 {
1411 map_byte_destroy (&m);
1412 return 1;
1413 }
1414 eat_spaces (&t);
1415
1416 if (*t == '\'')
1417 {
1418 byte *c;
1419
1420 if (get_string (&t, &c))
1421 {
1422 map_byte_destroy (&m);
1423 return 1;
1424 }
1425
1426 m->data = (byte) c[0];
1427 mem_free ((void **) &c);
1428 }
1429 else
1430 {
1431 /* skip HEX "0x" or "0X" prefix */
1432 t += 2;
1433 m->data = (byte) hex_convert (&t);
1434 }
1435
1436 eat_spaces (&t);
1437
1438 *text = t;
1439 *ma = m;
1440 return 0;
1441 }
1442
1443 /*
1444 gets regbyte declaration, the syntax is: ".regbyte" " " <symbol> " " ("0x" | "0X") <hex_value>
1445 assumes that *text already points to <symbol>,
1446 returns 0 if regbyte is successfully read,
1447 returns 1 otherwise
1448 */
1449 static int get_regbyte (const byte **text, map_byte **ma)
1450 {
1451 return get_emtcode (text, ma);
1452 }
1453
1454 /*
1455 returns 0 on success,
1456 returns 1 otherwise
1457 */
1458 static int get_errtext (const byte **text, map_str **ma)
1459 {
1460 const byte *t = *text;
1461 map_str *m = NULL;
1462
1463 map_str_create (&m);
1464 if (m == NULL)
1465 return 1;
1466
1467 if (get_identifier (&t, &m->key))
1468 {
1469 map_str_destroy (&m);
1470 return 1;
1471 }
1472 eat_spaces (&t);
1473
1474 if (get_string (&t, &m->data))
1475 {
1476 map_str_destroy (&m);
1477 return 1;
1478 }
1479 eat_spaces (&t);
1480
1481 *text = t;
1482 *ma = m;
1483 return 0;
1484 }
1485
1486 /*
1487 returns 0 on success,
1488 returns 1 otherwise,
1489 */
1490 static int get_error (const byte **text, error **er, map_str *maps)
1491 {
1492 const byte *t = *text;
1493 byte *temp = NULL;
1494
1495 if (*t != '.')
1496 return 0;
1497
1498 t++;
1499 if (get_identifier (&t, &temp))
1500 return 1;
1501 eat_spaces (&t);
1502
1503 if (!str_equal ((byte *) "error", temp))
1504 {
1505 mem_free ((void **) &temp);
1506 return 0;
1507 }
1508
1509 mem_free ((void **) &temp);
1510
1511 error_create (er);
1512 if (*er == NULL)
1513 return 1;
1514
1515 if (*t == '\"')
1516 {
1517 if (get_string (&t, &(**er).m_text))
1518 {
1519 error_destroy (er);
1520 return 1;
1521 }
1522 eat_spaces (&t);
1523 }
1524 else
1525 {
1526 if (get_identifier (&t, &temp))
1527 {
1528 error_destroy (er);
1529 return 1;
1530 }
1531 eat_spaces (&t);
1532
1533 if (map_str_find (&maps, temp, &(**er).m_text))
1534 {
1535 mem_free ((void **) &temp);
1536 error_destroy (er);
1537 return 1;
1538 }
1539
1540 mem_free ((void **) &temp);
1541 }
1542
1543 /* try to extract "token" from "...$token$..." */
1544 {
1545 byte *processed = NULL;
1546 unsigned int len = 0, i = 0;
1547
1548 if (string_grow (&processed, &len, '\0'))
1549 {
1550 error_destroy (er);
1551 return 1;
1552 }
1553
1554 while (i < str_length ((**er).m_text))
1555 {
1556 /* check if the dollar sign is repeated - if so skip it */
1557 if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1558 {
1559 if (string_grow (&processed, &len, '$'))
1560 {
1561 mem_free ((void **) &processed);
1562 error_destroy (er);
1563 return 1;
1564 }
1565
1566 i += 2;
1567 }
1568 else if ((**er).m_text[i] != '$')
1569 {
1570 if (string_grow (&processed, &len, (**er).m_text[i]))
1571 {
1572 mem_free ((void **) &processed);
1573 error_destroy (er);
1574 return 1;
1575 }
1576
1577 i++;
1578 }
1579 else
1580 {
1581 if (string_grow (&processed, &len, '$'))
1582 {
1583 mem_free ((void **) &processed);
1584 error_destroy (er);
1585 return 1;
1586 }
1587
1588 {
1589 /* length of token being extracted */
1590 unsigned int tlen = 0;
1591
1592 if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1593 {
1594 mem_free ((void **) &processed);
1595 error_destroy (er);
1596 return 1;
1597 }
1598
1599 /* skip the dollar sign */
1600 i++;
1601
1602 while ((**er).m_text[i] != '$')
1603 {
1604 if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1605 {
1606 mem_free ((void **) &processed);
1607 error_destroy (er);
1608 return 1;
1609 }
1610
1611 i++;
1612 }
1613
1614 /* skip the dollar sign */
1615 i++;
1616 }
1617 }
1618 }
1619
1620 mem_free ((void **) &(**er).m_text);
1621 (**er).m_text = processed;
1622 }
1623
1624 *text = t;
1625 return 0;
1626 }
1627
1628 /*
1629 returns 0 on success,
1630 returns 1 otherwise,
1631 */
1632 static int get_emits (const byte **text, emit **em, map_byte *mapb)
1633 {
1634 const byte *t = *text;
1635 byte *temp = NULL;
1636 emit *e = NULL;
1637 emit_dest dest;
1638
1639 if (*t != '.')
1640 return 0;
1641
1642 t++;
1643 if (get_identifier (&t, &temp))
1644 return 1;
1645 eat_spaces (&t);
1646
1647 /* .emit */
1648 if (str_equal ((byte *) "emit", temp))
1649 dest = ed_output;
1650 /* .load */
1651 else if (str_equal ((byte *) "load", temp))
1652 dest = ed_regbyte;
1653 else
1654 {
1655 mem_free ((void **) &temp);
1656 return 0;
1657 }
1658
1659 mem_free ((void **) &temp);
1660
1661 emit_create (&e);
1662 if (e == NULL)
1663 return 1;
1664
1665 e->m_emit_dest = dest;
1666
1667 if (dest == ed_regbyte)
1668 {
1669 if (get_identifier (&t, &e->m_regname))
1670 {
1671 emit_destroy (&e);
1672 return 1;
1673 }
1674 eat_spaces (&t);
1675 }
1676
1677 /* 0xNN */
1678 if (*t == '0')
1679 {
1680 t += 2;
1681 e->m_byte = (byte) hex_convert (&t);
1682
1683 e->m_emit_type = et_byte;
1684 }
1685 /* * */
1686 else if (*t == '*')
1687 {
1688 t++;
1689
1690 e->m_emit_type = et_stream;
1691 }
1692 /* $ */
1693 else if (*t == '$')
1694 {
1695 t++;
1696
1697 e->m_emit_type = et_position;
1698 }
1699 /* 'c' */
1700 else if (*t == '\'')
1701 {
1702 if (get_string (&t, &temp))
1703 {
1704 emit_destroy (&e);
1705 return 1;
1706 }
1707 e->m_byte = (byte) temp[0];
1708
1709 mem_free ((void **) &temp);
1710
1711 e->m_emit_type = et_byte;
1712 }
1713 else
1714 {
1715 if (get_identifier (&t, &temp))
1716 {
1717 emit_destroy (&e);
1718 return 1;
1719 }
1720
1721 if (map_byte_find (&mapb, temp, &e->m_byte))
1722 {
1723 mem_free ((void **) &temp);
1724 emit_destroy (&e);
1725 return 1;
1726 }
1727
1728 mem_free ((void **) &temp);
1729
1730 e->m_emit_type = et_byte;
1731 }
1732
1733 eat_spaces (&t);
1734
1735 if (get_emits (&t, &e->m_next, mapb))
1736 {
1737 emit_destroy (&e);
1738 return 1;
1739 }
1740
1741 *text = t;
1742 *em = e;
1743 return 0;
1744 }
1745
1746 /*
1747 returns 0 on success,
1748 returns 1 otherwise,
1749 */
1750 static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1751 {
1752 const byte *t = *text;
1753 spec *s = NULL;
1754
1755 spec_create (&s);
1756 if (s == NULL)
1757 return 1;
1758
1759 /* first - read optional .if statement */
1760 if (*t == '.')
1761 {
1762 const byte *u = t;
1763 byte *keyword = NULL;
1764
1765 /* skip the dot */
1766 u++;
1767
1768 if (get_identifier (&u, &keyword))
1769 {
1770 spec_destroy (&s);
1771 return 1;
1772 }
1773
1774 /* .if */
1775 if (str_equal ((byte *) "if", keyword))
1776 {
1777 cond_create (&s->m_cond);
1778 if (s->m_cond == NULL)
1779 {
1780 spec_destroy (&s);
1781 return 1;
1782 }
1783
1784 /* skip the left paren */
1785 eat_spaces (&u);
1786 u++;
1787
1788 /* get the left operand */
1789 eat_spaces (&u);
1790 if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1791 {
1792 spec_destroy (&s);
1793 return 1;
1794 }
1795 s->m_cond->m_operands[0].m_type = cot_regbyte;
1796
1797 /* get the operator (!= or ==) */
1798 eat_spaces (&u);
1799 if (*u == '!')
1800 s->m_cond->m_type = ct_not_equal;
1801 else
1802 s->m_cond->m_type = ct_equal;
1803 u += 2;
1804
1805 /* skip the 0x prefix */
1806 eat_spaces (&u);
1807 u += 2;
1808
1809 /* get the right operand */
1810 s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1811 s->m_cond->m_operands[1].m_type = cot_byte;
1812
1813 /* skip the right paren */
1814 eat_spaces (&u);
1815 u++;
1816
1817 eat_spaces (&u);
1818
1819 t = u;
1820 }
1821
1822 mem_free ((void **) &keyword);
1823 }
1824
1825 if (*t == '\'')
1826 {
1827 byte *temp = NULL;
1828
1829 if (get_string (&t, &temp))
1830 {
1831 spec_destroy (&s);
1832 return 1;
1833 }
1834 eat_spaces (&t);
1835
1836 if (*t == '-')
1837 {
1838 byte *temp2 = NULL;
1839
1840 /* skip the '-' character */
1841 t++;
1842 eat_spaces (&t);
1843
1844 if (get_string (&t, &temp2))
1845 {
1846 mem_free ((void **) &temp);
1847 spec_destroy (&s);
1848 return 1;
1849 }
1850 eat_spaces (&t);
1851
1852 s->m_spec_type = st_byte_range;
1853 s->m_byte[0] = *temp;
1854 s->m_byte[1] = *temp2;
1855
1856 mem_free ((void **) &temp2);
1857 }
1858 else
1859 {
1860 s->m_spec_type = st_byte;
1861 *s->m_byte = *temp;
1862 }
1863
1864 mem_free ((void **) &temp);
1865 }
1866 else if (*t == '"')
1867 {
1868 if (get_string (&t, &s->m_string))
1869 {
1870 spec_destroy (&s);
1871 return 1;
1872 }
1873 eat_spaces (&t);
1874
1875 s->m_spec_type = st_string;
1876 }
1877 else if (*t == '.')
1878 {
1879 byte *keyword = NULL;
1880
1881 /* skip the dot */
1882 t++;
1883
1884 if (get_identifier (&t, &keyword))
1885 {
1886 spec_destroy (&s);
1887 return 1;
1888 }
1889 eat_spaces (&t);
1890
1891 /* .true */
1892 if (str_equal ((byte *) "true", keyword))
1893 {
1894 s->m_spec_type = st_true;
1895 }
1896 /* .false */
1897 else if (str_equal ((byte *) "false", keyword))
1898 {
1899 s->m_spec_type = st_false;
1900 }
1901 /* .debug */
1902 else if (str_equal ((byte *) "debug", keyword))
1903 {
1904 s->m_spec_type = st_debug;
1905 }
1906 /* .loop */
1907 else if (str_equal ((byte *) "loop", keyword))
1908 {
1909 if (get_identifier (&t, &s->m_string))
1910 {
1911 mem_free ((void **) &keyword);
1912 spec_destroy (&s);
1913 return 1;
1914 }
1915 eat_spaces (&t);
1916
1917 s->m_spec_type = st_identifier_loop;
1918 }
1919
1920 mem_free ((void **) &keyword);
1921 }
1922 else
1923 {
1924 if (get_identifier (&t, &s->m_string))
1925 {
1926 spec_destroy (&s);
1927 return 1;
1928 }
1929 eat_spaces (&t);
1930
1931 s->m_spec_type = st_identifier;
1932 }
1933
1934 if (get_error (&t, &s->m_errtext, maps))
1935 {
1936 spec_destroy (&s);
1937 return 1;
1938 }
1939
1940 if (get_emits (&t, &s->m_emits, mapb))
1941 {
1942 spec_destroy (&s);
1943 return 1;
1944 }
1945
1946 *text = t;
1947 *sp = s;
1948 return 0;
1949 }
1950
1951 /*
1952 returns 0 on success,
1953 returns 1 otherwise,
1954 */
1955 static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
1956 {
1957 const byte *t = *text;
1958 rule *r = NULL;
1959
1960 rule_create (&r);
1961 if (r == NULL)
1962 return 1;
1963
1964 if (get_spec (&t, &r->m_specs, maps, mapb))
1965 {
1966 rule_destroy (&r);
1967 return 1;
1968 }
1969
1970 while (*t != ';')
1971 {
1972 byte *op = NULL;
1973 spec *sp = NULL;
1974
1975 /* skip the dot that precedes "and" or "or" */
1976 t++;
1977
1978 /* read "and" or "or" keyword */
1979 if (get_identifier (&t, &op))
1980 {
1981 rule_destroy (&r);
1982 return 1;
1983 }
1984 eat_spaces (&t);
1985
1986 if (r->m_oper == op_none)
1987 {
1988 /* .and */
1989 if (str_equal ((byte *) "and", op))
1990 r->m_oper = op_and;
1991 /* .or */
1992 else
1993 r->m_oper = op_or;
1994 }
1995
1996 mem_free ((void **) &op);
1997
1998 if (get_spec (&t, &sp, maps, mapb))
1999 {
2000 rule_destroy (&r);
2001 return 1;
2002 }
2003
2004 spec_append (&r->m_specs, &sp);
2005 }
2006
2007 /* skip the semicolon */
2008 t++;
2009 eat_spaces (&t);
2010
2011 *text = t;
2012 *ru = r;
2013 return 0;
2014 }
2015
2016 /*
2017 returns 0 on success,
2018 returns 1 otherwise,
2019 */
2020 static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2021 {
2022 if (map_rule_find (&mapr, symbol, ru))
2023 return 1;
2024
2025 /* (**ru).m_referenced = 1; */
2026
2027 return 0;
2028 }
2029
2030 /*
2031 returns 0 on success,
2032 returns 1 otherwise,
2033 */
2034 static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2035 byte **string_symbol, map_byte *regbytes)
2036 {
2037 rule *rulez = di->m_rulez;
2038
2039 /* update dependecies for the root and lexer symbols */
2040 if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2041 (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2042 return 1;
2043
2044 mem_free ((void **) syntax_symbol);
2045 mem_free ((void **) string_symbol);
2046
2047 /* update dependecies for the rest of the rules */
2048 while (rulez)
2049 {
2050 spec *sp = rulez->m_specs;
2051
2052 /* iterate through all the specifiers */
2053 while (sp)
2054 {
2055 /* update dependency for identifier */
2056 if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2057 {
2058 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2059 return 1;
2060
2061 mem_free ((void **) &sp->m_string);
2062 }
2063
2064 /* some errtexts reference to a rule */
2065 if (sp->m_errtext && sp->m_errtext->m_token_name)
2066 {
2067 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2068 return 1;
2069
2070 mem_free ((void **) &sp->m_errtext->m_token_name);
2071 }
2072
2073 /* update dependency for condition */
2074 if (sp->m_cond)
2075 {
2076 int i;
2077 for (i = 0; i < 2; i++)
2078 if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2079 {
2080 sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2081 sp->m_cond->m_operands[i].m_regname);
2082
2083 if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2084 return 1;
2085
2086 mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2087 }
2088 }
2089
2090 /* update dependency for all .load instructions */
2091 if (sp->m_emits)
2092 {
2093 emit *em = sp->m_emits;
2094 while (em != NULL)
2095 {
2096 if (em->m_emit_dest == ed_regbyte)
2097 {
2098 em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2099
2100 if (em->m_regbyte == NULL)
2101 return 1;
2102
2103 mem_free ((void **) &em->m_regname);
2104 }
2105
2106 em = em->m_next;
2107 }
2108 }
2109
2110 sp = sp->m_next;
2111 }
2112
2113 rulez = rulez->m_next;
2114 }
2115
2116 /* check for unreferenced symbols */
2117 /* de = di->m_defntns;
2118 while (de)
2119 {
2120 if (!de->m_referenced)
2121 {
2122 map_def *ma = mapd;
2123 while (ma)
2124 {
2125 if (ma->data == de)
2126 {
2127 assert (0);
2128 break;
2129 }
2130 ma = ma->next;
2131 }
2132 }
2133 de = de->m_next;
2134 }
2135 */
2136 return 0;
2137 }
2138
2139 static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2140 {
2141 byte values[2];
2142 int i;
2143
2144 if (co == NULL)
2145 return 1;
2146
2147 for (i = 0; i < 2; i++)
2148 switch (co->m_operands[i].m_type)
2149 {
2150 case cot_byte:
2151 values[i] = co->m_operands[i].m_byte;
2152 break;
2153 case cot_regbyte:
2154 values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2155 break;
2156 }
2157
2158 switch (co->m_type)
2159 {
2160 case ct_equal:
2161 return values[0] == values[1];
2162 case ct_not_equal:
2163 return values[0] != values[1];
2164 }
2165
2166 return 0;
2167 }
2168
2169 static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2170 {
2171 while (top != limit)
2172 {
2173 regbyte_ctx *rbc = top->m_prev;
2174 regbyte_ctx_destroy (&top);
2175 top = rbc;
2176 }
2177 }
2178
2179 typedef enum match_result_
2180 {
2181 mr_not_matched, /* the examined string does not match */
2182 mr_matched, /* the examined string matches */
2183 mr_error_raised, /* mr_not_matched + error has been raised */
2184 mr_dont_emit, /* used by identifier loops only */
2185 mr_internal_error /* an internal error has occured such as out of memory */
2186 } match_result;
2187
2188 /*
2189 This function does the main job. It parses the text and generates output data.
2190
2191 XXX optimize it - the barray seems to be the bottleneck
2192 */
2193 static match_result match (dict *di, const byte *text, unsigned int *index, rule *ru, barray **ba,
2194 int filtering_string, regbyte_ctx **rbc)
2195 {
2196 unsigned int ind = *index;
2197 match_result status = mr_not_matched;
2198 spec *sp = ru->m_specs;
2199 regbyte_ctx *ctx = *rbc;
2200
2201 /* for every specifier in the rule */
2202 while (sp)
2203 {
2204 unsigned int i, len, save_ind = ind;
2205 barray *array = NULL;
2206
2207 if (satisfies_condition (sp->m_cond, ctx))
2208 {
2209 switch (sp->m_spec_type)
2210 {
2211 case st_identifier:
2212 barray_create (&array);
2213 if (array == NULL)
2214 {
2215 free_regbyte_ctx_stack (ctx, *rbc);
2216 return mr_internal_error;
2217 }
2218
2219 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2220 if (status == mr_internal_error)
2221 {
2222 free_regbyte_ctx_stack (ctx, *rbc);
2223 barray_destroy (&array);
2224 return mr_internal_error;
2225 }
2226 break;
2227 case st_string:
2228 len = str_length (sp->m_string);
2229
2230 /* prefilter the stream */
2231 if (!filtering_string && di->m_string)
2232 {
2233 barray *ba;
2234 unsigned int filter_index = 0;
2235 match_result result;
2236 regbyte_ctx *null_ctx = NULL;
2237
2238 barray_create (&ba);
2239 if (ba == NULL)
2240 {
2241 free_regbyte_ctx_stack (ctx, *rbc);
2242 return mr_internal_error;
2243 }
2244
2245 result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2246
2247 if (result == mr_internal_error)
2248 {
2249 free_regbyte_ctx_stack (ctx, *rbc);
2250 barray_destroy (&ba);
2251 return mr_internal_error;
2252 }
2253
2254 if (result != mr_matched)
2255 {
2256 barray_destroy (&ba);
2257 status = mr_not_matched;
2258 break;
2259 }
2260
2261 barray_destroy (&ba);
2262
2263 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2264 {
2265 status = mr_not_matched;
2266 break;
2267 }
2268
2269 status = mr_matched;
2270 ind += len;
2271 }
2272 else
2273 {
2274 status = mr_matched;
2275 for (i = 0; status == mr_matched && i < len; i++)
2276 if (text[ind + i] != sp->m_string[i])
2277 status = mr_not_matched;
2278 if (status == mr_matched)
2279 ind += len;
2280 }
2281 break;
2282 case st_byte:
2283 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2284 if (status == mr_matched)
2285 ind++;
2286 break;
2287 case st_byte_range:
2288 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2289 mr_matched : mr_not_matched;
2290 if (status == mr_matched)
2291 ind++;
2292 break;
2293 case st_true:
2294 status = mr_matched;
2295 break;
2296 case st_false:
2297 status = mr_not_matched;
2298 break;
2299 case st_debug:
2300 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2301 break;
2302 case st_identifier_loop:
2303 barray_create (&array);
2304 if (array == NULL)
2305 {
2306 free_regbyte_ctx_stack (ctx, *rbc);
2307 return mr_internal_error;
2308 }
2309
2310 status = mr_dont_emit;
2311 for (;;)
2312 {
2313 match_result result;
2314
2315 save_ind = ind;
2316 result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2317
2318 if (result == mr_error_raised)
2319 {
2320 status = result;
2321 break;
2322 }
2323 else if (result == mr_matched)
2324 {
2325 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2326 barray_append (ba, &array))
2327 {
2328 free_regbyte_ctx_stack (ctx, *rbc);
2329 barray_destroy (&array);
2330 return mr_internal_error;
2331 }
2332 barray_destroy (&array);
2333 barray_create (&array);
2334 if (array == NULL)
2335 {
2336 free_regbyte_ctx_stack (ctx, *rbc);
2337 return mr_internal_error;
2338 }
2339 }
2340 else if (result == mr_internal_error)
2341 {
2342 free_regbyte_ctx_stack (ctx, *rbc);
2343 barray_destroy (&array);
2344 return mr_internal_error;
2345 }
2346 else
2347 break;
2348 }
2349 break;
2350 }
2351 }
2352 else
2353 {
2354 status = mr_not_matched;
2355 }
2356
2357 if (status == mr_error_raised)
2358 {
2359 free_regbyte_ctx_stack (ctx, *rbc);
2360 barray_destroy (&array);
2361
2362 return mr_error_raised;
2363 }
2364
2365 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2366 {
2367 free_regbyte_ctx_stack (ctx, *rbc);
2368 barray_destroy (&array);
2369
2370 if (sp->m_errtext)
2371 {
2372 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2373 ind), ind);
2374
2375 return mr_error_raised;
2376 }
2377
2378 return mr_not_matched;
2379 }
2380
2381 if (status == mr_matched)
2382 {
2383 if (sp->m_emits)
2384 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2385 {
2386 free_regbyte_ctx_stack (ctx, *rbc);
2387 barray_destroy (&array);
2388 return mr_internal_error;
2389 }
2390
2391 if (array)
2392 if (barray_append (ba, &array))
2393 {
2394 free_regbyte_ctx_stack (ctx, *rbc);
2395 barray_destroy (&array);
2396 return mr_internal_error;
2397 }
2398 }
2399
2400 barray_destroy (&array);
2401
2402 /* if the rule operator is a logical or, we pick up the first matching specifier */
2403 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2404 {
2405 *index = ind;
2406 *rbc = ctx;
2407 return mr_matched;
2408 }
2409
2410 sp = sp->m_next;
2411 }
2412
2413 /* everything went fine - all specifiers match up */
2414 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2415 {
2416 *index = ind;
2417 *rbc = ctx;
2418 return mr_matched;
2419 }
2420
2421 free_regbyte_ctx_stack (ctx, *rbc);
2422 return mr_not_matched;
2423 }
2424
2425 static byte *error_get_token (error *er, dict *di, const byte *text, unsigned int ind)
2426 {
2427 byte *str = NULL;
2428
2429 if (er->m_token)
2430 {
2431 barray *ba;
2432 unsigned int filter_index = 0;
2433 regbyte_ctx *ctx = NULL;
2434
2435 barray_create (&ba);
2436 if (ba != NULL)
2437 {
2438 if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2439 filter_index)
2440 {
2441 str = mem_alloc (filter_index + 1);
2442 if (str != NULL)
2443 {
2444 str_copy_n (str, text + ind, filter_index);
2445 str[filter_index] = '\0';
2446 }
2447 }
2448 barray_destroy (&ba);
2449 }
2450 }
2451
2452 return str;
2453 }
2454
2455 typedef struct grammar_load_state_
2456 {
2457 dict *di;
2458 byte *syntax_symbol;
2459 byte *string_symbol;
2460 map_str *maps;
2461 map_byte *mapb;
2462 map_rule *mapr;
2463 } grammar_load_state;
2464
2465 static void grammar_load_state_create (grammar_load_state **gr)
2466 {
2467 *gr = mem_alloc (sizeof (grammar_load_state));
2468 if (*gr)
2469 {
2470 (**gr).di = NULL;
2471 (**gr).syntax_symbol = NULL;
2472 (**gr).string_symbol = NULL;
2473 (**gr).maps = NULL;
2474 (**gr).mapb = NULL;
2475 (**gr).mapr = NULL;
2476 }
2477 }
2478
2479 static void grammar_load_state_destroy (grammar_load_state **gr)
2480 {
2481 if (*gr)
2482 {
2483 dict_destroy (&(**gr).di);
2484 mem_free ((void **) &(**gr).syntax_symbol);
2485 mem_free ((void **) &(**gr).string_symbol);
2486 map_str_destroy (&(**gr).maps);
2487 map_byte_destroy (&(**gr).mapb);
2488 map_rule_destroy (&(**gr).mapr);
2489 mem_free ((void **) gr);
2490 }
2491 }
2492
2493 /*
2494 the API
2495 */
2496
2497 grammar grammar_load_from_text (const byte *text)
2498 {
2499 grammar_load_state *g = NULL;
2500 grammar id = 0;
2501
2502 clear_last_error ();
2503
2504 grammar_load_state_create (&g);
2505 if (g == NULL)
2506 return 0;
2507
2508 dict_create (&g->di);
2509 if (g->di == NULL)
2510 {
2511 grammar_load_state_destroy (&g);
2512 return 0;
2513 }
2514
2515 eat_spaces (&text);
2516
2517 /* skip ".syntax" keyword */
2518 text += 7;
2519 eat_spaces (&text);
2520
2521 /* retrieve root symbol */
2522 if (get_identifier (&text, &g->syntax_symbol))
2523 {
2524 grammar_load_state_destroy (&g);
2525 return 0;
2526 }
2527 eat_spaces (&text);
2528
2529 /* skip semicolon */
2530 text++;
2531 eat_spaces (&text);
2532
2533 while (*text)
2534 {
2535 byte *symbol = NULL;
2536 int is_dot = *text == '.';
2537
2538 if (is_dot)
2539 text++;
2540
2541 if (get_identifier (&text, &symbol))
2542 {
2543 grammar_load_state_destroy (&g);
2544 return 0;
2545 }
2546 eat_spaces (&text);
2547
2548 /* .emtcode */
2549 if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2550 {
2551 map_byte *ma = NULL;
2552
2553 mem_free ((void **) &symbol);
2554
2555 if (get_emtcode (&text, &ma))
2556 {
2557 grammar_load_state_destroy (&g);
2558 return 0;
2559 }
2560
2561 map_byte_append (&g->mapb, &ma);
2562 }
2563 /* .regbyte */
2564 else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2565 {
2566 map_byte *ma = NULL;
2567
2568 mem_free ((void **) &symbol);
2569
2570 if (get_regbyte (&text, &ma))
2571 {
2572 grammar_load_state_destroy (&g);
2573 return 0;
2574 }
2575
2576 map_byte_append (&g->di->m_regbytes, &ma);
2577 }
2578 /* .errtext */
2579 else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2580 {
2581 map_str *ma = NULL;
2582
2583 mem_free ((void **) &symbol);
2584
2585 if (get_errtext (&text, &ma))
2586 {
2587 grammar_load_state_destroy (&g);
2588 return 0;
2589 }
2590
2591 map_str_append (&g->maps, &ma);
2592 }
2593 /* .string */
2594 else if (is_dot && str_equal (symbol, (byte *) "string"))
2595 {
2596 mem_free ((void **) &symbol);
2597
2598 if (g->di->m_string != NULL)
2599 {
2600 grammar_load_state_destroy (&g);
2601 return 0;
2602 }
2603
2604 if (get_identifier (&text, &g->string_symbol))
2605 {
2606 grammar_load_state_destroy (&g);
2607 return 0;
2608 }
2609
2610 /* skip semicolon */
2611 eat_spaces (&text);
2612 text++;
2613 eat_spaces (&text);
2614 }
2615 else
2616 {
2617 rule *ru = NULL;
2618 map_rule *ma = NULL;
2619
2620 if (get_rule (&text, &ru, g->maps, g->mapb))
2621 {
2622 grammar_load_state_destroy (&g);
2623 return 0;
2624 }
2625
2626 rule_append (&g->di->m_rulez, &ru);
2627
2628 /* if a rule consist of only one specifier, give it an ".and" operator */
2629 if (ru->m_oper == op_none)
2630 ru->m_oper = op_and;
2631
2632 map_rule_create (&ma);
2633 if (ma == NULL)
2634 {
2635 grammar_load_state_destroy (&g);
2636 return 0;
2637 }
2638
2639 ma->key = symbol;
2640 ma->data = ru;
2641 map_rule_append (&g->mapr, &ma);
2642 }
2643 }
2644
2645 if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2646 g->di->m_regbytes))
2647 {
2648 grammar_load_state_destroy (&g);
2649 return 0;
2650 }
2651
2652 dict_append (&g_dicts, &g->di);
2653 id = g->di->m_id;
2654 g->di = NULL;
2655
2656 grammar_load_state_destroy (&g);
2657
2658 return id;
2659 }
2660
2661 int grammar_set_reg8 (grammar id, const byte *name, byte value)
2662 {
2663 dict *di = NULL;
2664 map_byte *reg = NULL;
2665
2666 clear_last_error ();
2667
2668 dict_find (&g_dicts, id, &di);
2669 if (di == NULL)
2670 {
2671 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2672 return 0;
2673 }
2674
2675 reg = map_byte_locate (&di->m_regbytes, name);
2676 if (reg == NULL)
2677 {
2678 set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
2679 return 0;
2680 }
2681
2682 reg->data = value;
2683 return 1;
2684 }
2685
2686 int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
2687 {
2688 dict *di = NULL;
2689 barray *ba = NULL;
2690 unsigned int index = 0;
2691 regbyte_ctx *rbc = NULL;
2692
2693 clear_last_error ();
2694
2695 dict_find (&g_dicts, id, &di);
2696 if (di == NULL)
2697 {
2698 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2699 return 0;
2700 }
2701
2702 barray_create (&ba);
2703 if (ba == NULL)
2704 return 0;
2705
2706 *prod = NULL;
2707 *size = 0;
2708
2709 if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
2710 {
2711 barray_destroy (&ba);
2712 free_regbyte_ctx_stack (rbc, NULL);
2713 return 0;
2714 }
2715
2716 free_regbyte_ctx_stack (rbc, NULL);
2717
2718 *prod = mem_alloc (ba->len * sizeof (byte));
2719 if (*prod == NULL)
2720 {
2721 barray_destroy (&ba);
2722 return 0;
2723 }
2724
2725 mem_copy (*prod, ba->data, ba->len * sizeof (byte));
2726 *size = ba->len;
2727 barray_destroy (&ba);
2728
2729 return 1;
2730 }
2731
2732 int grammar_destroy (grammar id)
2733 {
2734 dict **di = &g_dicts;
2735
2736 clear_last_error ();
2737
2738 while (*di != NULL)
2739 {
2740 if ((**di).m_id == id)
2741 {
2742 dict *tmp = *di;
2743 *di = (**di).m_next;
2744 dict_destroy (&tmp);
2745 return 1;
2746 }
2747
2748 di = &(**di).m_next;
2749 }
2750
2751 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2752 return 0;
2753 }
2754
2755 void grammar_get_last_error (byte *text, unsigned int size, int *pos)
2756 {
2757 unsigned int len = 0, dots_made = 0;
2758 const byte *p = error_message;
2759
2760 *text = '\0';
2761
2762 #define APPEND_CHARACTER(x) if (dots_made == 0) {\
2763 if (len < size - 1) {\
2764 text[len++] = (x); text[len] = '\0';\
2765 } else {\
2766 int i;\
2767 for (i = 0; i < 3; i++)\
2768 if (--len >= 0)\
2769 text[len] = '.';\
2770 dots_made = 1;\
2771 }\
2772 }
2773
2774 if (p)
2775 while (*p)
2776 if (*p == '$')
2777 {
2778 const byte *r = error_param;
2779
2780 while (*r)
2781 {
2782 APPEND_CHARACTER(*r)
2783 r++;
2784 }
2785
2786 p++;
2787 }
2788 else
2789 {
2790 APPEND_CHARACTER(*p)
2791 p++;
2792 }
2793
2794 *pos = error_position;
2795
2796 #undef APPEND_CHARACTER
2797
2798 }
2799